/*------------------------------------------------------------------------- * * parsexlog.c * Functions for reading Write-Ahead-Log * * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 2015-2017, Postgres Professional * *------------------------------------------------------------------------- */ #include "postgres_fe.h" #include "pg_probackup.h" #include #include "commands/dbcommands_xlog.h" #include "catalog/storage_xlog.h" #include "access/transam.h" /* * RmgrNames is an array of resource manager names, to make error messages * a bit nicer. */ #define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup) \ name, static const char *RmgrNames[RM_MAX_ID + 1] = { #include "access/rmgrlist.h" }; /* some from access/xact.h */ /* * XLOG allows to store some information in high 4 bits of log record xl_info * field. We use 3 for the opcode, and one about an optional flag variable. */ #define XLOG_XACT_COMMIT 0x00 #define XLOG_XACT_PREPARE 0x10 #define XLOG_XACT_ABORT 0x20 #define XLOG_XACT_COMMIT_PREPARED 0x30 #define XLOG_XACT_ABORT_PREPARED 0x40 #define XLOG_XACT_ASSIGNMENT 0x50 /* free opcode 0x60 */ /* free opcode 0x70 */ /* mask for filtering opcodes out of xl_info */ #define XLOG_XACT_OPMASK 0x70 typedef struct xl_xact_commit { TimestampTz xact_time; /* time of commit */ /* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */ /* xl_xact_dbinfo follows if XINFO_HAS_DBINFO */ /* xl_xact_subxacts follows if XINFO_HAS_SUBXACT */ /* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */ /* xl_xact_invals follows if XINFO_HAS_INVALS */ /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_commit; typedef struct xl_xact_abort { TimestampTz xact_time; /* time of abort */ /* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */ /* No db_info required */ /* xl_xact_subxacts follows if HAS_SUBXACT */ /* xl_xact_relfilenodes follows if HAS_RELFILENODES */ /* No invalidation messages needed. */ /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ } xl_xact_abort; static void extractPageInfo(XLogReaderState *record); static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime); static int xlogreadfd = -1; static XLogSegNo xlogreadsegno = -1; static char xlogfpath[MAXPGPATH]; static bool xlogexists = false; typedef struct XLogPageReadPrivate { const char *archivedir; TimeLineID tli; } XLogPageReadPrivate; static int SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *pageTLI); /* * Read WAL from the archive directory, from 'startpoint' to 'endpoint' on the * given timeline. Collect data blocks touched by the WAL records into a page map. * * If **prev_segno** is true then read all segments up to **endpoint** segment * minus one. Else read all segments up to **endpoint** segment. */ void extractPageMap(const char *archivedir, XLogRecPtr startpoint, TimeLineID tli, XLogRecPtr endpoint, bool prev_segno) { XLogRecord *record; XLogReaderState *xlogreader; char *errormsg; XLogPageReadPrivate private; XLogSegNo endSegNo, nextSegNo = 0; private.archivedir = archivedir; private.tli = tli; xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); if (xlogreader == NULL) elog(ERROR, "out of memory"); XLByteToSeg(endpoint, endSegNo); if (prev_segno) endSegNo--; do { record = XLogReadRecord(xlogreader, startpoint, &errormsg); if (record == NULL) { XLogRecPtr errptr; errptr = startpoint ? startpoint : xlogreader->EndRecPtr; if (errormsg) elog(ERROR, "could not read WAL record at %X/%X: %s", (uint32) (errptr >> 32), (uint32) (errptr), errormsg); else elog(ERROR, "could not read WAL record at %X/%X", (uint32) (errptr >> 32), (uint32) (errptr)); } extractPageInfo(xlogreader); startpoint = InvalidXLogRecPtr; /* continue reading at next record */ XLByteToSeg(xlogreader->EndRecPtr, nextSegNo); } while (nextSegNo <= endSegNo && xlogreader->EndRecPtr != endpoint); XLogReaderFree(xlogreader); if (xlogreadfd != -1) { close(xlogreadfd); xlogreadfd = -1; xlogexists = false; } } void validate_wal(pgBackup *backup, const char *archivedir, time_t target_time, TransactionId target_xid, TimeLineID tli) { XLogRecPtr startpoint = backup->start_lsn; XLogRecord *record; XLogReaderState *xlogreader; char *errormsg; XLogPageReadPrivate private; TransactionId last_xid = InvalidTransactionId; TimestampTz last_time = 0; char last_timestamp[100], target_timestamp[100]; bool all_wal = false, got_endpoint = false; private.archivedir = archivedir; private.tli = tli; xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); if (xlogreader == NULL) elog(ERROR, "out of memory"); /* We will check it in the end */ xlogfpath[0] = '\0'; while (true) { bool timestamp_record; record = XLogReadRecord(xlogreader, startpoint, &errormsg); if (record == NULL) { if (errormsg) elog(WARNING, "%s", errormsg); break; } /* Got WAL record at stop_lsn */ if (xlogreader->ReadRecPtr == backup->stop_lsn) got_endpoint = true; timestamp_record = getRecordTimestamp(xlogreader, &last_time); if (XLogRecGetXid(xlogreader) != InvalidTransactionId) last_xid = XLogRecGetXid(xlogreader); /* Check target xid */ if (TransactionIdIsValid(target_xid) && target_xid == last_xid) { all_wal = true; break; } /* Check target time */ else if (target_time != 0 && timestamp_record && timestamptz_to_time_t(last_time) >= target_time) { all_wal = true; break; } /* If there are no target xid and target time */ else if (!TransactionIdIsValid(target_xid) && target_time == 0 && xlogreader->ReadRecPtr == backup->stop_lsn) { all_wal = true; /* We don't stop here. We want to get last_xid and last_time */ } startpoint = InvalidXLogRecPtr; /* continue reading at next record */ } if (last_time > 0) time2iso(last_timestamp, lengthof(last_timestamp), timestamptz_to_time_t(last_time)); else time2iso(last_timestamp, lengthof(last_timestamp), backup->recovery_time); if (last_xid == InvalidTransactionId) last_xid = backup->recovery_xid; /* There are all need WAL records */ if (all_wal) elog(INFO, "backup validation completed successfully on time %s and xid " XID_FMT, last_timestamp, last_xid); /* There are not need WAL records */ else { if (xlogfpath[0] != 0) { /* XLOG reader couldnt read WAL segment */ if (!xlogexists) elog(WARNING, "WAL segment \"%s\" is absent", xlogfpath); else if (xlogreadfd != -1) elog(WARNING, "error was occured during reading WAL segment \"%s\"", xlogfpath); } if (!got_endpoint) elog(ERROR, "there are not enough WAL records to restore from %X/%X to %X/%X", (uint32) (backup->start_lsn >> 32), (uint32) (backup->start_lsn), (uint32) (backup->stop_lsn >> 32), (uint32) (backup->stop_lsn)); else { if (target_time > 0) time2iso(target_timestamp, lengthof(target_timestamp), target_time); elog(WARNING, "recovery can be done up to time %s and xid " XID_FMT, last_timestamp, last_xid); if (TransactionIdIsValid(target_xid) && target_time != 0) elog(ERROR, "not enough WAL records to time %s and xid " XID_FMT, target_timestamp, target_xid); else if (TransactionIdIsValid(target_xid)) elog(ERROR, "not enough WAL records to xid " XID_FMT, target_xid); else if (target_time != 0) elog(ERROR, "not enough WAL records to time %s", target_timestamp); } } /* clean */ XLogReaderFree(xlogreader); if (xlogreadfd != -1) { close(xlogreadfd); xlogreadfd = -1; xlogexists = false; } } /* * Read from archived WAL segments latest recovery time and xid. All necessary * segments present at archive folder. We waited **stop_lsn** in * pg_stop_backup(). */ bool read_recovery_info(const char *archivedir, TimeLineID tli, XLogRecPtr start_lsn, XLogRecPtr stop_lsn, time_t *recovery_time, TransactionId *recovery_xid) { XLogRecPtr startpoint = stop_lsn; XLogReaderState *xlogreader; XLogPageReadPrivate private; private.archivedir = archivedir; private.tli = tli; xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private); if (xlogreader == NULL) elog(ERROR, "out of memory"); /* Read records from stop_lsn down to start_lsn */ do { XLogRecord *record; TimestampTz last_time = 0; char *errormsg; record = XLogReadRecord(xlogreader, startpoint, &errormsg); if (record == NULL) { XLogRecPtr errptr; errptr = startpoint ? startpoint : xlogreader->EndRecPtr; if (errormsg) elog(ERROR, "could not read WAL record at %X/%X: %s", (uint32) (errptr >> 32), (uint32) (errptr), errormsg); else elog(ERROR, "could not read WAL record at %X/%X", (uint32) (errptr >> 32), (uint32) (errptr)); } /* Read previous record */ startpoint = record->xl_prev; if (getRecordTimestamp(xlogreader, &last_time)) { *recovery_time = timestamptz_to_time_t(last_time); *recovery_xid = XLogRecGetXid(xlogreader); return true; } } while (startpoint >= start_lsn); /* Didn't find timestamp from WAL records between start_lsn and stop_lsn */ return false; } /* XLogreader callback function, to read a WAL page */ static int SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *pageTLI) { XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data; uint32 targetPageOff; XLogSegNo targetSegNo; XLByteToSeg(targetPagePtr, targetSegNo); targetPageOff = targetPagePtr % XLogSegSize; /* * See if we need to switch to a new segment because the requested record * is not in the currently open one. */ if (xlogreadfd >= 0 && !XLByteInSeg(targetPagePtr, xlogreadsegno)) { close(xlogreadfd); xlogreadfd = -1; xlogexists = false; } XLByteToSeg(targetPagePtr, xlogreadsegno); if (xlogreadfd < 0) { char xlogfname[MAXFNAMELEN]; XLogFileName(xlogfname, private->tli, xlogreadsegno); snprintf(xlogfpath, MAXPGPATH, "%s/%s", private->archivedir, xlogfname); if (fileExists(xlogfpath)) { elog(LOG, "opening WAL segment \"%s\"", xlogfpath); xlogexists = true; xlogreadfd = open(xlogfpath, O_RDONLY | PG_BINARY, 0); if (xlogreadfd < 0) { elog(WARNING, "could not open WAL segment \"%s\": %s", xlogfpath, strerror(errno)); return -1; } } /* Exit without error if WAL segment doesn't exist */ else return -1; } /* * At this point, we have the right segment open. */ Assert(xlogreadfd != -1); /* Read the requested page */ if (lseek(xlogreadfd, (off_t) targetPageOff, SEEK_SET) < 0) { elog(WARNING, "could not seek in file \"%s\": %s", xlogfpath, strerror(errno)); return -1; } if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { elog(WARNING, "could not read from file \"%s\": %s", xlogfpath, strerror(errno)); return -1; } Assert(targetSegNo == xlogreadsegno); *pageTLI = private->tli; return XLOG_BLCKSZ; } /* * Extract information about blocks modified in this record. */ static void extractPageInfo(XLogReaderState *record) { uint8 block_id; RmgrId rmid = XLogRecGetRmid(record); uint8 info = XLogRecGetInfo(record); uint8 rminfo = info & ~XLR_INFO_MASK; /* Is this a special record type that I recognize? */ if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE) { /* * New databases can be safely ignored. They would be completely * copied if found. */ } else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_DROP) { /* * An existing database was dropped. It is fine to ignore that * they will be removed appropriately. */ } else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_CREATE) { /* * We can safely ignore these. The file will be removed when * combining the backups in the case of differential on. */ } else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_TRUNCATE) { /* * We can safely ignore these. When we compare the sizes later on, * we'll notice that they differ, and copy the missing tail from * source system. */ } else if (info & XLR_SPECIAL_REL_UPDATE) { /* * This record type modifies a relation file in some special way, but * we don't recognize the type. That's bad - we don't know how to * track that change. */ elog(ERROR, "WAL record modifies a relation, but record type is not recognized\n" "lsn: %X/%X, rmgr: %s, info: %02X", (uint32) (record->ReadRecPtr >> 32), (uint32) (record->ReadRecPtr), RmgrNames[rmid], info); } for (block_id = 0; block_id <= record->max_block_id; block_id++) { RelFileNode rnode; ForkNumber forknum; BlockNumber blkno; if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno)) continue; /* We only care about the main fork; others are copied in toto */ if (forknum != MAIN_FORKNUM) continue; process_block_change(forknum, rnode, blkno); } } /* * Extract timestamp from WAL record. * * If the record contains a timestamp, returns true, and saves the timestamp * in *recordXtime. If the record type has no timestamp, returns false. * Currently, only transaction commit/abort records and restore points contain * timestamps. */ static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime) { uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; uint8 xact_info = info & XLOG_XACT_OPMASK; uint8 rmid = XLogRecGetRmid(record); if (rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT) { *recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time; return true; } else if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_COMMIT || xact_info == XLOG_XACT_COMMIT_PREPARED)) { *recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time; return true; } else if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_ABORT || xact_info == XLOG_XACT_ABORT_PREPARED)) { *recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time; return true; } return false; }