diff --git a/.gitignore b/.gitignore index d2c08e90..50b02725 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,8 @@ /regression.diffs /regression.out /results + +# Extra files +/datapagemap.c +/datapagemap.h +/xlogreader.c diff --git a/Makefile b/Makefile index c3233a47..5da6dc3e 100644 --- a/Makefile +++ b/Makefile @@ -14,11 +14,16 @@ OBJS = backup.o \ util.o \ validate.o \ xlog.o \ + datapagemap.o \ + parsexlog.o \ + xlogreader.o \ pgut/pgut.o \ pgut/pgut-port.o DOCS = doc/pg_arman.txt +EXTRA_CLEAN = datapagemap.c datapagemap.h xlogreader.c + # asciidoc and xmlto are present, so install the html documentation and man # pages as well. html is part of the vanilla documentation. Man pages need a # special handling at installation. @@ -35,6 +40,27 @@ PG_LIBS = $(libpq_pgport) REGRESS = init option show delete backup restore +all: checksrcdir docs datapagemap.h pg_arman + +# This rule's only purpose is to give the user instructions on how to pass +# the path to PostgreSQL source tree to the makefile. +.PHONY: checksrcdir +checksrcdir: +ifndef top_srcdir + @echo "You must have PostgreSQL source tree available to compile." + @echo "Pass the path to the PostgreSQL source tree to make, in the top_srcdir" + @echo "variable: \"make top_srcdir=\"" + @exit 1 +endif + +# Those files are symlinked from the PostgreSQL sources. +xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/% + rm -f $@ && $(LN_S) $< . +datapagemap.c: % : $(top_srcdir)/src/bin/pg_rewind/% + rm -f $@ && $(LN_S) $< . +datapagemap.h: % : $(top_srcdir)/src/bin/pg_rewind/% + rm -f && $(LN_S) $< . + PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) @@ -43,7 +69,6 @@ include $(PGXS) # Compile documentation as well is ASCIIDOC and XMLTO are defined ifneq ($(ASCIIDOC),) ifneq ($(XMLTO),) -all: docs docs: $(MAKE) -C doc/ @@ -53,7 +78,13 @@ install: install-man install-man: $(MKDIR_P) '$(DESTDIR)$(mandir)/man1/' $(INSTALL_DATA) $(man_DOCS) '$(DESTDIR)$(mandir)/man1/' +else +docs: + @echo "No docs to build" endif # XMLTO +else +docs: + @echo "No docs to build" endif # ASCIIDOC # Clean up documentation as well diff --git a/backup.c b/backup.c index 4772ee40..d81ec646 100644 --- a/backup.c +++ b/backup.c @@ -28,6 +28,9 @@ static int server_version = 0; static bool in_backup = false; /* TODO: more robust logic */ +/* list of files contained in backup */ +parray *backup_files_list; + /* * Backup routines */ @@ -48,6 +51,7 @@ static void create_file_list(parray *files, const char *subdir, const char *prefix, bool is_append); +static void wait_for_archive(pgBackup *backup, const char *sql); /* * Take a backup of database and return the list of files backed up. @@ -56,7 +60,6 @@ static parray * do_backup_database(parray *backup_list, pgBackupOption bkupopt) { int i; - parray *files; /* backup file list from non-snapshot */ parray *prev_files = NULL; /* file list of previous database backup */ FILE *fp; char path[MAXPGPATH]; @@ -68,6 +71,7 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt) /* repack the options */ bool smooth_checkpoint = bkupopt.smooth_checkpoint; + pgBackup *prev_backup = NULL; /* Block backup operations on a standby */ if (pg_is_standby()) @@ -78,6 +82,9 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt) /* Initialize size summary */ current.data_bytes = 0; + /* do some checks on the node */ + sanityChecks(); + /* * Obtain current timeline by scanning control file, theh LSN * obtained at output of pg_start_backup or pg_stop_backup does @@ -123,8 +130,8 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt) * List directories and symbolic links with the physical path to make * mkdirs.sh, then sort them in order of path. Omit $PGDATA. */ - files = parray_new(); - dir_list_file(files, pgdata, NULL, false, false); + backup_files_list = parray_new(); + dir_list_file(backup_files_list, pgdata, NULL, false, false); if (!check) { @@ -133,7 +140,7 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt) if (fp == NULL) elog(ERROR_SYSTEM, "can't open make directory script \"%s\": %s", path, strerror(errno)); - dir_print_mkdirs_sh(fp, files, pgdata); + dir_print_mkdirs_sh(fp, backup_files_list, pgdata); fclose(fp); if (chmod(path, DIR_PERMISSION) == -1) elog(ERROR_SYSTEM, "can't change mode of \"%s\": %s", path, @@ -141,9 +148,9 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt) } /* clear directory list */ - parray_walk(files, pgFileFree); - parray_free(files); - files = NULL; + parray_walk(backup_files_list, pgFileFree); + parray_free(backup_files_list); + backup_files_list = NULL; /* * To take differential backup, the file list of the last completed database @@ -151,8 +158,6 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt) */ if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) { - pgBackup *prev_backup; - /* find last completed database backup */ prev_backup = catalog_get_last_data_backup(backup_list, current.tli); pgBackupGetPath(prev_backup, prev_file_txt, lengthof(prev_file_txt), @@ -167,26 +172,55 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt) (uint32) (*lsn >> 32), (uint32) *lsn); } - /* initialize backup list from non-snapshot */ - files = parray_new(); + /* initialize backup list */ + backup_files_list = parray_new(); /* list files with the logical path. omit $PGDATA */ - add_files(files, pgdata, false, true); + add_files(backup_files_list, pgdata, false, true); /* backup files */ pgBackupGetPath(¤t, path, lengthof(path), DATABASE_DIR); - backup_files(pgdata, path, files, prev_files, lsn, NULL); + + /* + * Build page mapping in differential mode. When using this mode, the + * list of blocks to be taken is known by scanning the WAL segments + * present in archives up to the point where start backup has begun. + * However, normally this segment is not yet available in the archives, + * leading to failures when building the page map. Hence before doing + * anything and in order to ensure that all the segments needed for the + * scan are here, for a switch of the last segment with pg_switch_xlog. + */ + if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) + { + /* Enforce archiving of last segment and wait for it to be here */ + wait_for_archive(¤t, "SELECT * FROM pg_switch_xlog()"); + + /* Now build the page map */ + parray_qsort(backup_files_list, pgFileComparePathDesc); + elog(LOG, "extractPageMap"); + elog(LOG, "current_tli:%X", current.tli); + elog(LOG, "prev_backup->start_lsn: %X/%X", + (uint32) (prev_backup->start_lsn >> 32), + (uint32) (prev_backup->start_lsn)); + elog(LOG, "current.start_lsn: %X/%X", + (uint32) (current.start_lsn >> 32), + (uint32) (current.start_lsn)); + extractPageMap(arclog_path, prev_backup->start_lsn, current.tli, + current.start_lsn); + } + + backup_files(pgdata, path, backup_files_list, prev_files, lsn, NULL); /* notify end of backup */ pg_stop_backup(¤t); /* create file list */ - create_file_list(files, pgdata, DATABASE_FILE_LIST, NULL, false); + create_file_list(backup_files_list, pgdata, DATABASE_FILE_LIST, NULL, false); /* print summary of size of backup mode files */ - for (i = 0; i < parray_num(files); i++) + for (i = 0; i < parray_num(backup_files_list); i++) { - pgFile *file = (pgFile *) parray_get(files, i); + pgFile *file = (pgFile *) parray_get(backup_files_list, i); if (!S_ISREG(file->mode)) continue; /* @@ -204,7 +238,7 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt) current.data_bytes); elog(LOG, "========================================"); - return files; + return backup_files_list; } @@ -654,7 +688,6 @@ backup_files(const char *from_root, } else { - elog(LOG, "\n"); elog(ERROR_SYSTEM, "can't stat backup mode. \"%s\": %s", file->path, strerror(errno)); @@ -825,3 +858,72 @@ create_file_list(parray *files, fclose(fp); } } + +/* + * A helper function to create the path of a relation file and segment. + * + * The returned path is palloc'd + */ +static char * +datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) +{ + char *path; + char *segpath; + + path = relpathperm(rnode, forknum); + if (segno > 0) + { + segpath = psprintf("%s.%u", path, segno); + pfree(path); + return segpath; + } + else + return path; +} + +/* + * This routine gets called while reading WAL segments from the WAL archive, + * for every block that have changed in the target system. It makes note of + * all the changed blocks in the pagemap of the file and adds them in the + * things to track for the backup. + */ +void +process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno) +{ + char *path; + char *rel_path; + BlockNumber blkno_inseg; + int segno; + pgFile *file_item = NULL; + int j; + + segno = blkno / RELSEG_SIZE; + blkno_inseg = blkno % RELSEG_SIZE; + + rel_path = datasegpath(rnode, forknum, segno); + path = pg_malloc(strlen(rel_path) + strlen(pgdata) + 2); + sprintf(path, "%s/%s", pgdata, rel_path); + + for (j = 0; j < parray_num(backup_files_list); j++) + { + pgFile *p = (pgFile *) parray_get(backup_files_list, j); + + if (strcmp(p->path, path) == 0) + { + file_item = p; + break; + } + } + + /* + * If we don't have any record of this file in the file map, it means + * that it's a relation that did not have much activity since the last + * backup. We can safely ignore it. If it is a new relation file, the + * backup would simply copy it as-is. + */ + if (file_item) + datapagemap_add(&file_item->pagemap, blkno_inseg); + + pg_free(path); + pg_free(rel_path); +} diff --git a/data.c b/data.c index 09b65dec..d69fdd7e 100644 --- a/data.c +++ b/data.c @@ -74,10 +74,10 @@ backup_data_file(const char *from_root, const char *to_root, FILE *out; BackupPageHeader header; DataPage page; /* used as read buffer */ - BlockNumber blknum; - size_t read_len; - int errno_tmp; + BlockNumber blknum = 0; + size_t read_len = 0; pg_crc32 crc; + off_t offset; INIT_CRC32C(crc); @@ -117,117 +117,139 @@ backup_data_file(const char *from_root, const char *to_root, /* confirm server version */ check_server_version(); - /* read each page and write the page excluding hole */ - for (blknum = 0; - (read_len = fread(&page, 1, sizeof(page), in)) == sizeof(page); - ++blknum) - { - XLogRecPtr page_lsn; - int upper_offset; - int upper_length; - - header.block = blknum; - - /* - * If a invalid data page was found, fallback to simple copy to ensure - * all pages in the file don't have BackupPageHeader. - */ - if (!parse_page(&page, &page_lsn, - &header.hole_offset, &header.hole_length)) - { - elog(LOG, "%s fall back to simple copy", file->path); - fclose(in); - fclose(out); - file->is_datafile = false; - return copy_file(from_root, to_root, file); - } - - file->read_size += read_len; - - /* if the page has not been modified since last backup, skip it */ - if (lsn && !XLogRecPtrIsInvalid(page_lsn) && page_lsn < *lsn) - continue; - - upper_offset = header.hole_offset + header.hole_length; - upper_length = BLCKSZ - upper_offset; - - /* write data page excluding hole */ - if (fwrite(&header, 1, sizeof(header), out) != sizeof(header) || - fwrite(page.data, 1, header.hole_offset, out) != header.hole_offset || - fwrite(page.data + upper_offset, 1, upper_length, out) != upper_length) - { - int errno_tmp = errno; - /* oops */ - fclose(in); - fclose(out); - elog(ERROR_SYSTEM, "cannot write at block %u of \"%s\": %s", - blknum, to_path, strerror(errno_tmp)); - } - - /* update CRC */ - COMP_CRC32C(crc, &header, sizeof(header)); - COMP_CRC32C(crc, page.data, header.hole_offset); - COMP_CRC32C(crc, page.data + upper_offset, upper_length); - - file->write_size += sizeof(header) + read_len - header.hole_length; - } - errno_tmp = errno; - if (!feof(in)) - { - fclose(in); - fclose(out); - elog(ERROR_SYSTEM, "cannot read backup mode file \"%s\": %s", - file->path, strerror(errno_tmp)); - } - /* - * The odd size page at the tail is probably a page exactly written now, so - * write whole of it. + * Read each page and write the page excluding hole. If it has been + * determined that the page can be copied safely, but no page map + * has been built, it means that we are in presence of a relation + * file that needs to be completely scanned. If a page map is present + * only scan the blocks needed. In each case, pages are copied without + * their hole to ensure some basic level of compression. */ - if (read_len > 0) + if (file->pagemap.bitmapsize == 0) { - /* - * If the odd size page is the 1st page, fallback to simple copy because - * the file is not a datafile. - * Otherwise treat the page as a datapage with no hole. - */ - if (blknum == 0) - file->is_datafile = false; - else + for (blknum = 0; + (read_len = fread(&page, 1, sizeof(page), in)) == sizeof(page); + ++blknum) { - header.block = blknum; - header.hole_offset = 0; - header.hole_length = 0; + XLogRecPtr page_lsn; + int upper_offset; + int upper_length; - if (fwrite(&header, 1, sizeof(header), out) != sizeof(header)) + header.block = blknum; + + /* + * If an invalid data page was found, fallback to simple copy to ensure + * all pages in the file don't have BackupPageHeader. + */ + if (!parse_page(&page, &page_lsn, + &header.hole_offset, &header.hole_length)) + { + elog(LOG, "%s fall back to simple copy", file->path); + fclose(in); + fclose(out); + file->is_datafile = false; + return copy_file(from_root, to_root, file); + } + + file->read_size += read_len; + + /* if the page has not been modified since last backup, skip it */ + if (lsn && !XLogRecPtrIsInvalid(page_lsn) && page_lsn < *lsn) + continue; + + upper_offset = header.hole_offset + header.hole_length; + upper_length = BLCKSZ - upper_offset; + + /* write data page excluding hole */ + if (fwrite(&header, 1, sizeof(header), out) != sizeof(header) || + fwrite(page.data, 1, header.hole_offset, out) != header.hole_offset || + fwrite(page.data + upper_offset, 1, upper_length, out) != upper_length) { int errno_tmp = errno; /* oops */ fclose(in); fclose(out); - elog(ERROR_SYSTEM, - "cannot write at block %u of \"%s\": %s", + elog(ERROR_SYSTEM, "cannot write at block %u of \"%s\": %s", blknum, to_path, strerror(errno_tmp)); } + + /* update CRC */ COMP_CRC32C(crc, &header, sizeof(header)); - file->write_size += sizeof(header); - } + COMP_CRC32C(crc, page.data, header.hole_offset); + COMP_CRC32C(crc, page.data + upper_offset, upper_length); - /* write odd size page image */ - if (fwrite(page.data, 1, read_len, out) != read_len) + file->write_size += sizeof(header) + read_len - header.hole_length; + } + } + else + { + datapagemap_iterator_t *iter; + + iter = datapagemap_iterate(&file->pagemap); + while (datapagemap_next(iter, &blknum)) { - int errno_tmp = errno; - /* oops */ - fclose(in); - fclose(out); - elog(ERROR_SYSTEM, "cannot write at block %u of \"%s\": %s", - blknum, to_path, strerror(errno_tmp)); + XLogRecPtr page_lsn; + int upper_offset; + int upper_length; + int ret; + + offset = blknum * BLCKSZ; + if (offset > 0) + { + ret = fseek(in, offset, SEEK_SET); + if (ret != 0) + elog(ERROR_PG_INCOMPATIBLE, + "Can't seek in file offset: %llu ret:%i\n", + (long long unsigned int) offset, ret); + } + read_len = fread(&page, 1, sizeof(page), in); + + header.block = blknum; + + /* + * If an invalid data page was found, fallback to simple copy to ensure + * all pages in the file don't have BackupPageHeader. + */ + if (!parse_page(&page, &page_lsn, + &header.hole_offset, &header.hole_length)) + { + elog(LOG, "%s fall back to simple copy", file->path); + fclose(in); + fclose(out); + file->is_datafile = false; + return copy_file(from_root, to_root, file); + } + + file->read_size += read_len; + + /* if the page has not been modified since last backup, skip it */ + if (lsn && !XLogRecPtrIsInvalid(page_lsn) && page_lsn < *lsn) + continue; + + upper_offset = header.hole_offset + header.hole_length; + upper_length = BLCKSZ - upper_offset; + + /* write data page excluding hole */ + if (fwrite(&header, 1, sizeof(header), out) != sizeof(header) || + fwrite(page.data, 1, header.hole_offset, out) != header.hole_offset || + fwrite(page.data + upper_offset, 1, upper_length, out) != upper_length) + { + int errno_tmp = errno; + /* oops */ + fclose(in); + fclose(out); + elog(ERROR_SYSTEM, "cannot write at block %u of \"%s\": %s", + blknum, to_path, strerror(errno_tmp)); + } + + /* update CRC */ + COMP_CRC32C(crc, &header, sizeof(header)); + COMP_CRC32C(crc, page.data, header.hole_offset); + COMP_CRC32C(crc, page.data + upper_offset, upper_length); + + file->write_size += sizeof(header) + read_len - header.hole_length; } - - COMP_CRC32C(crc, page.data, read_len); - - file->write_size += read_len; - file->read_size += read_len; + pg_free(iter); } /* @@ -344,6 +366,11 @@ restore_data_file(const char *from_root, } } + elog(LOG, "header block: %i, blknum: %i, hole_offset: %i, BLCKSZ:%i", + header.block, + blknum, + header.hole_offset, + BLCKSZ); if (header.block < blknum || header.hole_offset > BLCKSZ || (int) header.hole_offset + (int) header.hole_length > BLCKSZ) { @@ -381,6 +408,7 @@ restore_data_file(const char *from_root, if (chmod(to_path, file->mode) == -1) { int errno_tmp = errno; + fclose(in); fclose(out); elog(ERROR_SYSTEM, "cannot change mode of \"%s\": %s", to_path, diff --git a/delete.c b/delete.c index 374ec074..1f81b1c6 100644 --- a/delete.c +++ b/delete.c @@ -97,7 +97,7 @@ pgBackupDelete(int keep_generations, int keep_days) else snprintf(days_str, lengthof(days_str), "%d", keep_days); - elog(LOG, "deleted old backups (generations=%s, days=%s)\n", + elog(LOG, "deleted old backups (generations=%s, days=%s)", generations_str, days_str); } diff --git a/dir.c b/dir.c index faecad46..095d1ae4 100644 --- a/dir.c +++ b/dir.c @@ -17,6 +17,7 @@ #include #include "pgut/pgut-port.h" +#include "datapagemap.h" /* directory exclusion list for backup mode listing */ const char *pgdata_exclude[] = @@ -78,7 +79,7 @@ pgFileNew(const char *path, bool omit_symlink) strerror(errno)); } - file = (pgFile *) pgut_malloc(offsetof(pgFile, path) + strlen(path) + 1); + file = (pgFile *) pgut_malloc(sizeof(pgFile)); file->mtime = st.st_mtime; file->size = st.st_size; @@ -88,6 +89,9 @@ pgFileNew(const char *path, bool omit_symlink) file->crc = 0; file->is_datafile = false; file->linked = NULL; + file->pagemap.bitmap = NULL; + file->pagemap.bitmapsize = 0; + file->path = pgut_malloc(strlen(path) + 1); strcpy(file->path, path); /* enough buffer size guaranteed */ return file; @@ -167,6 +171,7 @@ pgFileFree(void *file) if (file == NULL) return; free(((pgFile *)file)->linked); + free(((pgFile *)file)->path); free(file); } @@ -540,8 +545,10 @@ dir_read_file_list(const char *root, const char *file_txt) } tm.tm_isdst = -1; - file = (pgFile *) pgut_malloc(offsetof(pgFile, path) + - (root ? strlen(root) + 1 : 0) + strlen(path) + 1); + file = (pgFile *) pgut_malloc(sizeof(pgFile)); + file->path = pgut_malloc((root ? strlen(root) + 1 : 0) + strlen(path) + 1); + file->pagemap.bitmap = NULL; + file->pagemap.bitmapsize = 0; tm.tm_year -= 1900; tm.tm_mon -= 1; diff --git a/doc/pg_arman.txt b/doc/pg_arman.txt index 16b1d9f7..6deb0310 100644 --- a/doc/pg_arman.txt +++ b/doc/pg_arman.txt @@ -72,7 +72,11 @@ Backup target can be one of the following types: - Full backup, backup a whole database cluster. - Differential backup, backup only files or pages modified after the last -verified backup. +verified backup. A scan of the WAL records since the last backup up to the +LSN position of pg_start_backup is done and all the blocks touched are +recorded and tracked as part of the backup. As the WAL segments scanned +need to be located in the WAL archive, the last segment after pg_start_backup +has been run needs to be forcibly switched. It is recommended to verify backup files as soon as possible after backup. Unverified backup cannot be used in restore and in differential backup. diff --git a/parsexlog.c b/parsexlog.c new file mode 100644 index 00000000..6f421519 --- /dev/null +++ b/parsexlog.c @@ -0,0 +1,244 @@ +/*------------------------------------------------------------------------- + * + * parsexlog.c + * Functions for reading Write-Ahead-Log + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include "pg_arman.h" + +#include + +#include "commands/dbcommands_xlog.h" +#include "catalog/storage_xlog.h" + +/* + * RmgrNames is an array of resource manager names, to make error messages + * a bit nicer. + */ +#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup) \ + name, + +static const char *RmgrNames[RM_MAX_ID + 1] = { +#include "access/rmgrlist.h" +}; + +static void extractPageInfo(XLogReaderState *record); + +static int xlogreadfd = -1; +static XLogSegNo xlogreadsegno = -1; +static char xlogfpath[MAXPGPATH]; + +typedef struct XLogPageReadPrivate +{ + const char *archivedir; + TimeLineID tli; +} XLogPageReadPrivate; + +static int SimpleXLogPageRead(XLogReaderState *xlogreader, + XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, char *readBuf, + TimeLineID *pageTLI); + +/* + * Read WAL from the archive directory, starting from 'startpoint' on the + * given timeline, until 'endpoint'. Make note of the data blocks touched + * by the WAL records, and return them in a page map. + */ +void +extractPageMap(const char *archivedir, XLogRecPtr startpoint, TimeLineID tli, + XLogRecPtr endpoint) +{ + XLogRecord *record; + XLogReaderState *xlogreader; + char *errormsg; + XLogPageReadPrivate private; + + private.archivedir = archivedir; + private.tli = tli; + xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); + if (xlogreader == NULL) + elog(ERROR, "out of memory"); + + do + { + record = XLogReadRecord(xlogreader, startpoint, &errormsg); + if (record == NULL) + { + XLogRecPtr errptr; + + errptr = startpoint ? startpoint : xlogreader->EndRecPtr; + + if (errormsg) + elog(ERROR, "could not read WAL record at %X/%X: %s", + (uint32) (errptr >> 32), (uint32) (errptr), + errormsg); + else + elog(ERROR, "could not read WAL record at %X/%X", + (uint32) (startpoint >> 32), + (uint32) (startpoint)); + } + + extractPageInfo(xlogreader); + + startpoint = InvalidXLogRecPtr; /* continue reading at next record */ + + } while (xlogreader->ReadRecPtr != endpoint); + + XLogReaderFree(xlogreader); + if (xlogreadfd != -1) + { + close(xlogreadfd); + xlogreadfd = -1; + } +} + +/* XLogreader callback function, to read a WAL page */ +static int +SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, char *readBuf, + TimeLineID *pageTLI) +{ + XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data; + uint32 targetPageOff; + XLogRecPtr targetSegEnd; + XLogSegNo targetSegNo; + + XLByteToSeg(targetPagePtr, targetSegNo); + XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd); + targetPageOff = targetPagePtr % XLogSegSize; + + /* + * See if we need to switch to a new segment because the requested record + * is not in the currently open one. + */ + if (xlogreadfd >= 0 && !XLByteInSeg(targetPagePtr, xlogreadsegno)) + { + close(xlogreadfd); + xlogreadfd = -1; + } + + XLByteToSeg(targetPagePtr, xlogreadsegno); + + if (xlogreadfd < 0) + { + char xlogfname[MAXFNAMELEN]; + + XLogFileName(xlogfname, private->tli, xlogreadsegno); + snprintf(xlogfpath, MAXPGPATH, "%s/%s", private->archivedir, + xlogfname); + elog(LOG, "opening WAL segment \"%s\"", xlogfpath); + + xlogreadfd = open(xlogfpath, O_RDONLY | PG_BINARY, 0); + + if (xlogreadfd < 0) + { + elog(WARNING, "could not open WAL segment \"%s\": %s", + xlogfpath, strerror(errno)); + return -1; + } + } + + /* + * At this point, we have the right segment open. + */ + Assert(xlogreadfd != -1); + + /* Read the requested page */ + if (lseek(xlogreadfd, (off_t) targetPageOff, SEEK_SET) < 0) + { + elog(WARNING, "could not seek in file \"%s\": %s", xlogfpath, + strerror(errno)); + return -1; + } + + if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + { + elog(WARNING, "could not read from file \"%s\": %s", + xlogfpath, strerror(errno)); + return -1; + } + + Assert(targetSegNo == xlogreadsegno); + + *pageTLI = private->tli; + return XLOG_BLCKSZ; +} + +/* + * Extract information on which blocks the current record modifies. + */ +static void +extractPageInfo(XLogReaderState *record) +{ + int block_id; + RmgrId rmid = XLogRecGetRmid(record); + uint8 info = XLogRecGetInfo(record); + uint8 rminfo = info & ~XLR_INFO_MASK; + + /* Is this a special record type that I recognize? */ + + if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE) + { + /* + * New databases can be safely ignored. They would be completely + * copied if found. + */ + } + else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_DROP) + { + /* + * An existing database was dropped. It is fine to ignore that + * they will be removed appropriately. + */ + } + else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_CREATE) + { + /* + * We can safely ignore these. The file will be removed when + * combining the backups in the case of differential on. + */ + } + else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_TRUNCATE) + { + /* + * We can safely ignore these. When we compare the sizes later on, + * we'll notice that they differ, and copy the missing tail from + * source system. + */ + } + else if (info & XLR_SPECIAL_REL_UPDATE) + { + /* + * This record type modifies a relation file in some special way, but + * we don't recognize the type. That's bad - we don't know how to + * track that change. + */ + elog(ERROR, "WAL record modifies a relation, but record type is not recognized\n" + "lsn: %X/%X, rmgr: %s, info: %02X", + (uint32) (record->ReadRecPtr >> 32), (uint32) (record->ReadRecPtr), + RmgrNames[rmid], info); + } + + for (block_id = 0; block_id <= record->max_block_id; block_id++) + { + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blkno; + + if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno)) + continue; + + /* We only care about the main fork; others are copied in toto */ + if (forknum != MAIN_FORKNUM) + continue; + + process_block_change(forknum, rnode, blkno); + } +} diff --git a/pg_arman.h b/pg_arman.h index dc72f684..17ab01f1 100644 --- a/pg_arman.h +++ b/pg_arman.h @@ -17,8 +17,10 @@ #include "pgut/pgut.h" #include "access/xlogdefs.h" #include "access/xlog_internal.h" +#include "catalog/pg_control.h" #include "utils/pg_crc.h" #include "parray.h" +#include "datapagemap.h" /* Query to fetch current transaction ID */ #define TXID_CURRENT_SQL "SELECT txid_current();" @@ -62,7 +64,8 @@ typedef struct pgFile pg_crc32 crc; /* CRC value of the file, regular file only */ char *linked; /* path of the linked file */ bool is_datafile; /* true if the file is PostgreSQL data file */ - char path[1]; /* path of the file */ + char *path; /* path of the file */ + datapagemap_t pagemap; } pgFile; typedef struct pgBackupRange @@ -196,11 +199,16 @@ extern pgBackup current; /* exclude directory list for $PGDATA file listing */ extern const char *pgdata_exclude[]; +/* backup file list from non-snapshot */ +extern parray *backup_files_list; + /* in backup.c */ extern int do_backup(pgBackupOption bkupopt); extern BackupMode parse_backup_mode(const char *value); extern void check_server_version(void); extern bool fileExists(const char *path); +extern void process_block_change(ForkNumber forknum, RelFileNode rnode, + BlockNumber blkno); /* in restore.c */ extern int do_restore(const char *target_time, @@ -279,8 +287,13 @@ extern void restore_data_file(const char *from_root, const char *to_root, extern bool copy_file(const char *from_root, const char *to_root, pgFile *file); +/* parsexlog.c */ +extern void extractPageMap(const char *datadir, XLogRecPtr startpoint, + TimeLineID tli, XLogRecPtr endpoint); + /* in util.c */ extern TimeLineID get_current_timeline(void); +extern void sanityChecks(void); extern void time2iso(char *buf, size_t len, time_t time); extern const char *status2str(BackupStatus status); extern void remove_trailing_space(char *buf, int comment_mark); diff --git a/restore.c b/restore.c index 04755ac2..5317f3ca 100644 --- a/restore.c +++ b/restore.c @@ -155,13 +155,14 @@ base_backup_found: /* restore following differential backup */ elog(LOG, "searching differential backup..."); + for (i = base_index - 1; i >= 0; i--) { pgBackup *backup = (pgBackup *) parray_get(backups, i); /* don't use incomplete nor different timeline backup */ if (backup->status != BACKUP_STATUS_OK || - backup->tli != base_backup->tli) + backup->tli != base_backup->tli) continue; /* use database backup only */ diff --git a/sql/common.sh b/sql/common.sh index fccfb7b0..16b83dc5 100644 --- a/sql/common.sh +++ b/sql/common.sh @@ -70,6 +70,7 @@ function init_backup() port = ${TEST_PGPORT} logging_collector = on wal_level = hot_standby +wal_log_hints = on archive_mode = on archive_command = 'cp %p ${ARCLOG_PATH}/%f' EOF diff --git a/util.c b/util.c index b7eda1a2..2f1351df 100644 --- a/util.c +++ b/util.c @@ -11,7 +11,7 @@ #include -#include "catalog/pg_control.h" +#include "storage/bufpage.h" static void checkControlFile(ControlFileData *ControlFile) @@ -27,13 +27,13 @@ checkControlFile(ControlFileData *ControlFile) if (!EQ_CRC32C(crc, ControlFile->crc)) elog(ERROR_CORRUPTED, "Calculated CRC checksum does not match value stored in file.\n" "Either the file is corrupt, or it has a different layout than this program\n" - "is expecting. The results below are untrustworthy.\n"); + "is expecting. The results below are untrustworthy."); if (ControlFile->pg_control_version % 65536 == 0 && ControlFile->pg_control_version / 65536 != 0) elog(ERROR_CORRUPTED, "possible byte ordering mismatch\n" "The byte ordering used to store the pg_control file might not match the one\n" "used by this program. In that case the results below would be incorrect, and\n" - "the PostgreSQL installation would be incompatible with this data directory.\n"); + "the PostgreSQL installation would be incompatible with this data directory."); } /* @@ -43,7 +43,7 @@ static void digestControlFile(ControlFileData *ControlFile, char *src, size_t size) { if (size != PG_CONTROL_SIZE) - elog(ERROR_PG_INCOMPATIBLE, "unexpected control file size %d, expected %d\n", + elog(ERROR_PG_INCOMPATIBLE, "unexpected control file size %d, expected %d", (int) size, PG_CONTROL_SIZE); memcpy(ControlFile, src, sizeof(ControlFileData)); @@ -52,6 +52,29 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size) checkControlFile(ControlFile); } +void +sanityChecks(void) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + + /* First fetch file... */ + buffer = slurpFile(pgdata, "global/pg_control", &size); + digestControlFile(&ControlFile, buffer, size); + pg_free(buffer); + + /* + * Node work is done on need to use checksums or hint bit wal-logging + * this to prevent from data corruption that could occur because of + * hint bits. + */ + if (ControlFile.data_checksum_version != PG_DATA_CHECKSUM_VERSION && + !ControlFile.wal_log_hints) + elog(ERROR_PG_INCOMPATIBLE, + "target master need to use either data checksums or \"wal_log_hints = on\"."); +} + /* * Utility shared by backup and restore to fetch the current timeline * used by a node.