From d29aa8b0b4ce8a5e3bd3c7b5dfbbf040d7029db5 Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Wed, 31 Oct 2018 09:47:53 +0300 Subject: [PATCH 01/26] PGPRO-2095: backup from replica without connection to master for PostgreSQL >= 9.6 --- src/backup.c | 81 +++++++++++++++++++++++++++++++++---------- src/pg_probackup.h | 1 + src/util.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 150 insertions(+), 18 deletions(-) diff --git a/src/backup.c b/src/backup.c index 0dd681fa..602ab823 100644 --- a/src/backup.c +++ b/src/backup.c @@ -475,6 +475,8 @@ do_backup_instance(void) pgBackup *prev_backup = NULL; parray *prev_backup_filelist = NULL; + pgFile *pg_control = NULL; + elog(LOG, "Database backup start"); /* Initialize size summary */ @@ -754,9 +756,37 @@ do_backup_instance(void) parray_free(prev_backup_filelist); } + /* Copy pg_control in case of backup from replica >= 9.6 */ + if (current.from_replica && !exclusive_backup) + { + for (i = 0; i < parray_num(backup_files_list); i++) + { + pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i); + + if (strcmp(tmp_file->name, "pg_control") == 0) + { + pg_control = tmp_file; + break; + } + } + + if (!pg_control) + elog(ERROR, "Failed to locate pg_control in copied files"); + + if (is_remote_backup) + remote_copy_file(NULL, pg_control); + else + if (!copy_file(pgdata, database_path, pg_control)) + elog(ERROR, "Failed to copy pg_control"); + } + + /* Notify end of backup */ pg_stop_backup(¤t); + if (current.from_replica && !exclusive_backup) + set_min_recovery_point(pg_control, database_path, current.stop_lsn); + /* Add archived xlog files into the list of files of this backup */ if (stream_wal) { @@ -883,7 +913,7 @@ do_backup(time_t start_time) } } - if (current.from_replica) + if (current.from_replica && exclusive_backup) { /* Check master connection options */ if (master_host == NULL) @@ -1089,8 +1119,11 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) params[0] = label; - /* For replica we call pg_start_backup() on master */ - conn = (backup->from_replica) ? master_conn : backup_conn; + /* For 9.5 replica we call pg_start_backup() on master */ + if (backup->from_replica && exclusive_backup) + conn = master_conn; + else + conn = backup_conn; /* 2nd argument is 'fast'*/ params[1] = smooth ? "false" : "true"; @@ -1118,16 +1151,21 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) PQclear(res); - if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) + if (current.backup_mode == BACKUP_MODE_DIFF_PAGE && + (!(backup->from_replica && !exclusive_backup))) /* * Switch to a new WAL segment. It is necessary to get archived WAL * segment, which includes start LSN of current backup. + * Don`t do this for replica backups unless it`s PG 9.5 */ pg_switch_wal(conn); + //elog(INFO, "START LSN: %X/%X", + // (uint32) (backup->start_lsn >> 32), (uint32) (backup->start_lsn)); + if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) /* In PAGE mode wait for current segment... */ - wait_wal_lsn(backup->start_lsn, true, false); + wait_wal_lsn(backup->start_lsn, true, false); /* * Do not wait start_lsn for stream backup. * Because WAL streaming will start after pg_start_backup() in stream @@ -1669,7 +1707,7 @@ pg_stop_backup(pgBackup *backup) PGresult *tablespace_map_content = NULL; uint32 lsn_hi; uint32 lsn_lo; - XLogRecPtr restore_lsn = InvalidXLogRecPtr; + //XLogRecPtr restore_lsn = InvalidXLogRecPtr; int pg_stop_backup_timeout = 0; char path[MAXPGPATH]; char backup_label[MAXPGPATH]; @@ -1689,16 +1727,21 @@ pg_stop_backup(pgBackup *backup) if (!backup_in_progress) elog(ERROR, "backup is not in progress"); - /* For replica we call pg_stop_backup() on master */ - conn = (current.from_replica) ? master_conn : backup_conn; + /* For 9.5 replica we call pg_stop_backup() on master */ + if (current.from_replica && exclusive_backup) + conn = master_conn; + else + conn = backup_conn; /* Remove annoying NOTICE messages generated by backend */ res = pgut_execute(conn, "SET client_min_messages = warning;", 0, NULL); PQclear(res); - /* Create restore point */ - if (backup != NULL) + /* Create restore point + * only if it`s backup from master, or exclusive replica(wich connects to master) + */ + if (backup != NULL && (!current.from_replica || (current.from_replica && exclusive_backup))) { const char *params[1]; char name[1024]; @@ -1716,7 +1759,7 @@ pg_stop_backup(pgBackup *backup) /* Extract timeline and LSN from the result */ XLogDataFromLSN(PQgetvalue(res, 0, 0), &lsn_hi, &lsn_lo); /* Calculate LSN */ - restore_lsn = ((uint64) lsn_hi) << 32 | lsn_lo; + //restore_lsn = ((uint64) lsn_hi) << 32 | lsn_lo; PQclear(res); } @@ -1830,10 +1873,10 @@ pg_stop_backup(pgBackup *backup) /* Calculate LSN */ stop_backup_lsn = ((uint64) lsn_hi) << 32 | lsn_lo; - if (!XRecOffIsValid(stop_backup_lsn)) - { - stop_backup_lsn = restore_lsn; - } + //if (!XRecOffIsValid(stop_backup_lsn)) + //{ + // stop_backup_lsn = restore_lsn; + //} if (!XRecOffIsValid(stop_backup_lsn)) elog(ERROR, "Invalid stop_backup_lsn value %X/%X", @@ -1939,7 +1982,7 @@ pg_stop_backup(pgBackup *backup) stream_xlog_path[MAXPGPATH]; /* Wait for stop_lsn to be received by replica */ - if (backup->from_replica) + if (current.from_replica) wait_replica_wal_lsn(stop_backup_lsn, false); /* * Wait for stop_lsn to be archived or streamed. @@ -1962,10 +2005,12 @@ pg_stop_backup(pgBackup *backup) elog(LOG, "Getting the Recovery Time from WAL"); + /* iterate over WAL from stop_backup lsn to start_backup lsn */ if (!read_recovery_info(xlog_path, backup->tli, xlog_seg_size, backup->start_lsn, backup->stop_lsn, &backup->recovery_time, &backup->recovery_xid)) { + elog(LOG, "Failed to find Recovery Time in WAL. Forced to trust current_timestamp"); backup->recovery_time = recovery_time; backup->recovery_xid = recovery_xid; } @@ -2074,7 +2119,7 @@ backup_files(void *arg) elog(ERROR, "interrupted during backup"); if (progress) - elog(LOG, "Progress: (%d/%d). Process file \"%s\"", + elog(INFO, "Progress: (%d/%d). Process file \"%s\"", i + 1, n_backup_files_list, file->path); /* stat file to check its current state */ @@ -2168,7 +2213,7 @@ backup_files(void *arg) file->path, file->write_size); } else - elog(LOG, "unexpected file type %d", buf.st_mode); + elog(WARNING, "unexpected file type %d", buf.st_mode); } /* Close connection */ diff --git a/src/pg_probackup.h b/src/pg_probackup.h index e337771e..b75bb581 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -555,6 +555,7 @@ extern uint64 get_system_identifier(char *pgdata); extern uint64 get_remote_system_identifier(PGconn *conn); extern uint32 get_data_checksum_version(bool safe); extern uint32 get_xlog_seg_size(char *pgdata_path); +extern void set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn); extern void sanityChecks(void); extern void time2iso(char *buf, size_t len, time_t time); diff --git a/src/util.c b/src/util.c index 4eefa788..5f059c37 100644 --- a/src/util.c +++ b/src/util.c @@ -14,6 +14,8 @@ #include +#include + const char * base36enc(long unsigned int value) { @@ -100,6 +102,44 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size) checkControlFile(ControlFile); } +/* + * Write ControlFile to pg_control + */ +static void +writeControlFile(ControlFileData *ControlFile, char *path) +{ + int fd; + char *buffer = NULL; + +#if PG_VERSION_NUM >= 100000 + int ControlFileSize = PG_CONTROL_FILE_SIZE; +#else + int ControlFileSize = PG_CONTROL_SIZE; +#endif + + /* copy controlFileSize */ + buffer = pg_malloc(ControlFileSize); + memcpy(buffer, &ControlFile, sizeof(ControlFileData)); + + /* Write pg_control */ + unlink(path); + fd = open(path, + O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + S_IRUSR | S_IWUSR); + + if (fd < 0) + elog(ERROR, "Failed to open file: %s", path); + + if (write(fd, buffer, ControlFileSize) != ControlFileSize) + elog(ERROR, "Failed to overwrite file: %s", path); + + if (fsync(fd) != 0) + elog(ERROR, "Failed to fsync file: %s", path); + + pg_free(buffer); + close(fd); +} + /* * Utility shared by backup and restore to fetch the current timeline * used by a node. @@ -250,6 +290,52 @@ get_data_checksum_version(bool safe) return ControlFile.data_checksum_version; } +/* MinRecoveryPoint 'as-is' is not to be trusted + * Use STOP LSN instead + */ +void +set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + char fullpath[MAXPGPATH]; + + elog(LOG, "Setting minRecPoint to STOP LSN: %X/%X", + (uint32) (stop_backup_lsn >> 32), + (uint32) stop_backup_lsn); + + /* Path to pg_control in backup */ + snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE); + + /* First fetch file... */ + buffer = slurpFile(backup_path, XLOG_CONTROL_FILE, &size, false); + if (buffer == NULL) + elog(ERROR, "ERROR"); + + digestControlFile(&ControlFile, buffer, size); + + ControlFile.minRecoveryPoint = stop_backup_lsn; + + /* Update checksum in pg_control header */ + INIT_CRC32C(ControlFile.crc); + COMP_CRC32C(ControlFile.crc, + (char *) &ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32C(ControlFile.crc); + + /* paranoia */ + checkControlFile(&ControlFile); + + /* update pg_control */ + writeControlFile(&ControlFile, fullpath); + + /* Update pg_control checksum in backup_list */ + file->crc = pgFileGetCRC(fullpath, false); + + pg_free(buffer); +} + /* * Convert time_t value to ISO-8601 format string. Always set timezone offset. From 3769efdcb2c2cf1d88261d2d5e382d6b766e48a6 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 2 Nov 2018 15:08:33 +0300 Subject: [PATCH 02/26] Update copyrights and rename COPYRIGHT to LICENSE --- COPYRIGHT | 29 ----------------------------- LICENSE | 22 ++++++++++++++++++++++ README.md | 6 +++--- src/archive.c | 2 +- src/backup.c | 2 +- src/catalog.c | 2 +- src/data.c | 2 +- src/delete.c | 2 +- src/dir.c | 2 +- src/fetch.c | 2 +- src/help.c | 2 +- src/init.c | 2 +- src/pg_probackup.c | 2 +- src/pg_probackup.h | 2 +- src/restore.c | 2 +- src/util.c | 2 +- src/utils/logger.c | 2 +- src/utils/logger.h | 2 +- src/utils/pgut.c | 6 +++--- src/utils/pgut.h | 4 ++-- src/validate.c | 2 +- 21 files changed, 46 insertions(+), 53 deletions(-) delete mode 100644 COPYRIGHT create mode 100644 LICENSE diff --git a/COPYRIGHT b/COPYRIGHT deleted file mode 100644 index 49d70472..00000000 --- a/COPYRIGHT +++ /dev/null @@ -1,29 +0,0 @@ -Copyright (c) 2015-2017, Postgres Professional -Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - -Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group -Portions Copyright (c) 1994, The Regents of the University of California - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the NIPPON TELEGRAPH AND TELEPHONE CORPORATION - (NTT) nor the names of its contributors may be used to endorse or - promote products derived from this software without specific prior - written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..3969eaa8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2015-2018, Postgres Professional +Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + +Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group +Portions Copyright (c) 1994, The Regents of the University of California + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose, without fee, and without a written agreement +is hereby granted, provided that the above copyright notice and this +paragraph and the following two paragraphs appear in all copies. + +IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR +DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING +LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS +DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO +PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/README.md b/README.md index 73936ae1..53945eb8 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ The utility is compatible with: `PTRACK` backup support provided via following options: * vanilla PostgreSQL compiled with ptrack patch. Currently there are patches for [PostgreSQL 9.6](https://gist.githubusercontent.com/gsmol/5b615c971dfd461c76ef41a118ff4d97/raw/e471251983f14e980041f43bea7709b8246f4178/ptrack_9.6.6_v1.5.patch) and [PostgreSQL 10](https://gist.githubusercontent.com/gsmol/be8ee2a132b88463821021fd910d960e/raw/de24f9499f4f314a4a3e5fae5ed4edb945964df8/ptrack_10.1_v1.5.patch) -* Postgres Pro Standard 9.5, 9.6, 10 +* Postgres Pro Standard 9.5, 9.6, 10, 11 * Postgres Pro Enterprise 9.5, 9.6, 10 As compared to other backup solutions, `pg_probackup` offers the following benefits that can help you implement different backup strategies and deal with large amounts of data: @@ -38,7 +38,7 @@ Regardless of the chosen backup type, all backups taken with `pg_probackup` supp `pg_probackup` currently has the following limitations: * Creating backups from a remote server is currently not supported. -* The server from which the backup was taken and the restored server must be compatible by the [block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#guc-block-size) and [wal_block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#guc-wal-block-size) parameters and have the same major release number. +* The server from which the backup was taken and the restored server must be compatible by the [block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#GUC-BLOCK-SIZE) and [wal_block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#GUC-WAL-BLOCK-SIZE) parameters and have the same major release number. * Microsoft Windows operating system is not supported. * Configuration files outside of PostgreSQL data directory are not included into the backup and should be backed up separately. @@ -85,7 +85,7 @@ Currently the latest documentation can be found at [Postgres Pro Enterprise docu ## Licence -This module available under the same license as [PostgreSQL](https://www.postgresql.org/about/licence/). +This module available under the [license](LICENSE) similar to [PostgreSQL](https://www.postgresql.org/about/licence/). ## Feedback diff --git a/src/archive.c b/src/archive.c index e26d17b6..2953b89e 100644 --- a/src/archive.c +++ b/src/archive.c @@ -3,7 +3,7 @@ * archive.c: - pg_probackup specific archive commands for archive backups. * * - * Portions Copyright (c) 2017, Postgres Professional + * Portions Copyright (c) 2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/backup.c b/src/backup.c index 0dd681fa..b2aed318 100644 --- a/src/backup.c +++ b/src/backup.c @@ -3,7 +3,7 @@ * backup.c: backup DB cluster, archived WAL * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/catalog.c b/src/catalog.c index 74d8ee90..41676f4c 100644 --- a/src/catalog.c +++ b/src/catalog.c @@ -3,7 +3,7 @@ * catalog.c: backup catalog operation * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/data.c b/src/data.c index c3fcc254..a8a9e4de 100644 --- a/src/data.c +++ b/src/data.c @@ -3,7 +3,7 @@ * data.c: utils to parse and backup data pages * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/delete.c b/src/delete.c index 9d1c3867..c5f16af7 100644 --- a/src/delete.c +++ b/src/delete.c @@ -3,7 +3,7 @@ * delete.c: delete backup files. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/dir.c b/src/dir.c index 1a93c1fe..9e55c821 100644 --- a/src/dir.c +++ b/src/dir.c @@ -3,7 +3,7 @@ * dir.c: directory operation utility. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/fetch.c b/src/fetch.c index 17e77025..988ce283 100644 --- a/src/fetch.c +++ b/src/fetch.c @@ -3,7 +3,7 @@ * fetch.c * Functions for fetching files from PostgreSQL data directory * - * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/help.c b/src/help.c index f534f396..1cf6d404 100644 --- a/src/help.c +++ b/src/help.c @@ -2,7 +2,7 @@ * * help.c * - * Copyright (c) 2017-2017, Postgres Professional + * Copyright (c) 2017-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/init.c b/src/init.c index d8e238fd..75b50e4f 100644 --- a/src/init.c +++ b/src/init.c @@ -3,7 +3,7 @@ * init.c: - initialize backup catalog. * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/pg_probackup.c b/src/pg_probackup.c index 113e8901..cf51fd10 100644 --- a/src/pg_probackup.c +++ b/src/pg_probackup.c @@ -3,7 +3,7 @@ * pg_probackup.c: Backup/Recovery manager for PostgreSQL. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/pg_probackup.h b/src/pg_probackup.h index e337771e..7bd87e56 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -3,7 +3,7 @@ * pg_probackup.h: Backup/Recovery manager for PostgreSQL. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/restore.c b/src/restore.c index 9c87cd39..c08e647c 100644 --- a/src/restore.c +++ b/src/restore.c @@ -3,7 +3,7 @@ * restore.c: restore DB cluster and archived WAL. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/util.c b/src/util.c index 4eefa788..8d4974ca 100644 --- a/src/util.c +++ b/src/util.c @@ -3,7 +3,7 @@ * util.c: log messages to log file or stderr, and misc code. * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/utils/logger.c b/src/utils/logger.c index 563d2027..4cdbf721 100644 --- a/src/utils/logger.c +++ b/src/utils/logger.c @@ -2,7 +2,7 @@ * * logger.c: - log events into log file or stderr. * - * Copyright (c) 2017-2017, Postgres Professional + * Copyright (c) 2017-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/utils/logger.h b/src/utils/logger.h index e1feb86c..15ec38f1 100644 --- a/src/utils/logger.h +++ b/src/utils/logger.h @@ -2,7 +2,7 @@ * * logger.h: - prototypes of logger functions. * - * Copyright (c) 2017-2017, Postgres Professional + * Copyright (c) 2017-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/utils/pgut.c b/src/utils/pgut.c index ec3fd8bb..b789a326 100644 --- a/src/utils/pgut.c +++ b/src/utils/pgut.c @@ -3,7 +3,7 @@ * pgut.c * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2017-2017, Postgres Professional + * Portions Copyright (c) 2017-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -1655,8 +1655,8 @@ pgut_disconnect(PGconn *conn) PGresult * -pgut_execute_parallel(PGconn* conn, - PGcancel* thread_cancel_conn, const char *query, +pgut_execute_parallel(PGconn* conn, + PGcancel* thread_cancel_conn, const char *query, int nParams, const char **params, bool text_result) { diff --git a/src/utils/pgut.h b/src/utils/pgut.h index 9aac75ca..6e9a50fd 100644 --- a/src/utils/pgut.h +++ b/src/utils/pgut.h @@ -3,7 +3,7 @@ * pgut.h * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2017-2017, Postgres Professional + * Portions Copyright (c) 2017-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -115,7 +115,7 @@ extern PGresult *pgut_execute(PGconn* conn, const char *query, int nParams, const char **params); extern PGresult *pgut_execute_extended(PGconn* conn, const char *query, int nParams, const char **params, bool text_result, bool ok_error); -extern PGresult *pgut_execute_parallel(PGconn* conn, PGcancel* thread_cancel_conn, +extern PGresult *pgut_execute_parallel(PGconn* conn, PGcancel* thread_cancel_conn, const char *query, int nParams, const char **params, bool text_result); extern bool pgut_send(PGconn* conn, const char *query, int nParams, const char **params, int elevel); diff --git a/src/validate.c b/src/validate.c index f61bcb79..4fa7d78b 100644 --- a/src/validate.c +++ b/src/validate.c @@ -3,7 +3,7 @@ * validate.c: validate backup files. * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ From 6c9dfcfe8290df7745356ac9183d82dca61ade98 Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Wed, 7 Nov 2018 04:21:56 +0300 Subject: [PATCH 03/26] PGPRO-2095: use latest replayed lsn instead of STOP LSN --- src/backup.c | 80 +++++++++++++++++++++++++++------------------- src/pg_probackup.h | 4 +++ src/util.c | 30 ++++++++--------- 3 files changed, 67 insertions(+), 47 deletions(-) diff --git a/src/backup.c b/src/backup.c index 602ab823..e3e1d60a 100644 --- a/src/backup.c +++ b/src/backup.c @@ -756,28 +756,25 @@ do_backup_instance(void) parray_free(prev_backup_filelist); } - /* Copy pg_control in case of backup from replica >= 9.6 */ + /* In case of backup from replica >= 9.6 we must fix minRecPoint, + * First we must find pg_control in backup_files_list. + */ if (current.from_replica && !exclusive_backup) { + char pg_control_path[MAXPGPATH]; + + snprintf(pg_control_path, sizeof(pg_control_path), "%s/%s", pgdata, "global/pg_control"); + for (i = 0; i < parray_num(backup_files_list); i++) { pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i); - if (strcmp(tmp_file->name, "pg_control") == 0) + if (strcmp(tmp_file->path, pg_control_path) == 0) { pg_control = tmp_file; break; } } - - if (!pg_control) - elog(ERROR, "Failed to locate pg_control in copied files"); - - if (is_remote_backup) - remote_copy_file(NULL, pg_control); - else - if (!copy_file(pgdata, database_path, pg_control)) - elog(ERROR, "Failed to copy pg_control"); } @@ -1160,9 +1157,6 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) */ pg_switch_wal(conn); - //elog(INFO, "START LSN: %X/%X", - // (uint32) (backup->start_lsn >> 32), (uint32) (backup->start_lsn)); - if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) /* In PAGE mode wait for current segment... */ wait_wal_lsn(backup->start_lsn, true, false); @@ -1175,8 +1169,10 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) /* ...for others wait for previous segment */ wait_wal_lsn(backup->start_lsn, true, true); - /* Wait for start_lsn to be replayed by replica */ - if (backup->from_replica) + /* In case of backup from replica for PostgreSQL 9.5 + * wait for start_lsn to be replayed by replica + */ + if (backup->from_replica && exclusive_backup) wait_replica_wal_lsn(backup->start_lsn, true); } @@ -1526,7 +1522,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment) GetXLogFileName(wal_segment, tli, targetSegNo, xlog_seg_size); /* - * In pg_start_backup we wait for 'lsn' in 'pg_wal' directory iff it is + * In pg_start_backup we wait for 'lsn' in 'pg_wal' directory if it is * stream and non-page backup. Page backup needs archived WAL files, so we * wait for 'lsn' in archive 'wal' directory for page backups. * @@ -1547,7 +1543,12 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment) { join_path_components(wal_segment_path, arclog_path, wal_segment); wal_segment_dir = arclog_path; - timeout = archive_timeout; + + if (archive_timeout > 0) + timeout = archive_timeout; + else + timeout = ARCHIVE_TIMEOUT_DEFAULT; + } if (wait_prev_segment) @@ -1780,14 +1781,29 @@ pg_stop_backup(pgBackup *backup) * Stop the non-exclusive backup. Besides stop_lsn it returns from * pg_stop_backup(false) copy of the backup label and tablespace map * so they can be written to disk by the caller. + * In case of backup from replica >= 9.6 we do not trust minRecPoint + * and stop_backup LSN, so we use latest replayed LSN as STOP LSN. */ - stop_backup_query = "SELECT" - " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," - " current_timestamp(0)::timestamptz," - " lsn," - " labelfile," - " spcmapfile" - " FROM pg_catalog.pg_stop_backup(false)"; + if (current.from_replica) + stop_backup_query = "SELECT" + " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," + " current_timestamp(0)::timestamptz," +#if PG_VERSION_NUM >= 100000 + " pg_catalog.pg_last_wal_replay_lsn()," +#else + " pg_catalog.pg_last_xlog_replay_location()," +#endif + " labelfile," + " spcmapfile" + " FROM pg_catalog.pg_stop_backup(false)"; + else + stop_backup_query = "SELECT" + " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," + " current_timestamp(0)::timestamptz," + " lsn," + " labelfile," + " spcmapfile" + " FROM pg_catalog.pg_stop_backup(false)"; } else @@ -1873,14 +1889,14 @@ pg_stop_backup(pgBackup *backup) /* Calculate LSN */ stop_backup_lsn = ((uint64) lsn_hi) << 32 | lsn_lo; - //if (!XRecOffIsValid(stop_backup_lsn)) - //{ - // stop_backup_lsn = restore_lsn; - //} - if (!XRecOffIsValid(stop_backup_lsn)) - elog(ERROR, "Invalid stop_backup_lsn value %X/%X", - (uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn)); + { + if (XRecOffIsNull(stop_backup_lsn)) + stop_backup_lsn = stop_backup_lsn + SizeOfXLogLongPHD; + else + elog(ERROR, "Invalid stop_backup_lsn value %X/%X", + (uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn)); + } /* Write backup_label and tablespace_map */ if (!exclusive_backup) diff --git a/src/pg_probackup.h b/src/pg_probackup.h index b75bb581..f5d6bb5c 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -57,6 +57,10 @@ #define XID_FMT "%u" #endif +/* Check if an XLogRecPtr value is pointed to 0 offset */ +#define XRecOffIsNull(xlrp) \ + ((xlrp) % XLOG_BLCKSZ == 0) + typedef enum CompressAlg { NOT_DEFINED_COMPRESS = 0, diff --git a/src/util.c b/src/util.c index 5f059c37..e20cda17 100644 --- a/src/util.c +++ b/src/util.c @@ -119,7 +119,7 @@ writeControlFile(ControlFileData *ControlFile, char *path) /* copy controlFileSize */ buffer = pg_malloc(ControlFileSize); - memcpy(buffer, &ControlFile, sizeof(ControlFileData)); + memcpy(buffer, ControlFile, sizeof(ControlFileData)); /* Write pg_control */ unlink(path); @@ -136,8 +136,8 @@ writeControlFile(ControlFileData *ControlFile, char *path) if (fsync(fd) != 0) elog(ERROR, "Failed to fsync file: %s", path); - pg_free(buffer); close(fd); + pg_free(buffer); } /* @@ -290,9 +290,7 @@ get_data_checksum_version(bool safe) return ControlFile.data_checksum_version; } -/* MinRecoveryPoint 'as-is' is not to be trusted - * Use STOP LSN instead - */ +/* MinRecoveryPoint 'as-is' is not to be trusted */ void set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn) { @@ -301,20 +299,21 @@ set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_ba size_t size; char fullpath[MAXPGPATH]; - elog(LOG, "Setting minRecPoint to STOP LSN: %X/%X", - (uint32) (stop_backup_lsn >> 32), - (uint32) stop_backup_lsn); - - /* Path to pg_control in backup */ - snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE); - - /* First fetch file... */ - buffer = slurpFile(backup_path, XLOG_CONTROL_FILE, &size, false); + /* First fetch file content */ + buffer = slurpFile(pgdata, XLOG_CONTROL_FILE, &size, false); if (buffer == NULL) elog(ERROR, "ERROR"); digestControlFile(&ControlFile, buffer, size); + elog(LOG, "Current minRecPoint %X/%X", + (uint32) (ControlFile.minRecoveryPoint >> 32), + (uint32) ControlFile.minRecoveryPoint); + + elog(LOG, "Setting minRecPoint to %X/%X", + (uint32) (stop_backup_lsn >> 32), + (uint32) stop_backup_lsn); + ControlFile.minRecoveryPoint = stop_backup_lsn; /* Update checksum in pg_control header */ @@ -327,7 +326,8 @@ set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_ba /* paranoia */ checkControlFile(&ControlFile); - /* update pg_control */ + /* overwrite pg_control */ + snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE); writeControlFile(&ControlFile, fullpath); /* Update pg_control checksum in backup_list */ From 16b9b369815f8527ce5d046df9351bf7f358f660 Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Wed, 7 Nov 2018 04:29:54 +0300 Subject: [PATCH 04/26] bugfix: change units for archive-timeout & archive-timeout to seconds, log-rotation-age to minutes --- src/configure.c | 6 +++--- src/pg_probackup.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/configure.c b/src/configure.c index 30845607..1714d9f5 100644 --- a/src/configure.c +++ b/src/configure.c @@ -245,7 +245,7 @@ readBackupCatalogConfigFile(void) { 's', 0, "error-log-filename", &(config->error_log_filename), SOURCE_CMDLINE }, { 's', 0, "log-directory", &(config->log_directory), SOURCE_CMDLINE }, { 'U', 0, "log-rotation-size", &(config->log_rotation_size), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_KB }, - { 'U', 0, "log-rotation-age", &(config->log_rotation_age), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MS }, + { 'U', 0, "log-rotation-age", &(config->log_rotation_age), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MIN }, /* connection options */ { 's', 0, "pgdata", &(config->pgdata), SOURCE_FILE_STRICT }, { 's', 0, "pgdatabase", &(config->pgdatabase), SOURCE_FILE_STRICT }, @@ -257,14 +257,14 @@ readBackupCatalogConfigFile(void) { 's', 0, "master-port", &(config->master_port), SOURCE_FILE_STRICT }, { 's', 0, "master-db", &(config->master_db), SOURCE_FILE_STRICT }, { 's', 0, "master-user", &(config->master_user), SOURCE_FILE_STRICT }, - { 'u', 0, "replica-timeout", &(config->replica_timeout), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MS }, + { 'u', 0, "replica-timeout", &(config->replica_timeout), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_S }, /* other options */ { 'U', 0, "system-identifier", &(config->system_identifier), SOURCE_FILE_STRICT }, #if PG_VERSION_NUM >= 110000 {'u', 0, "xlog-seg-size", &config->xlog_seg_size, SOURCE_FILE_STRICT}, #endif /* archive options */ - { 'u', 0, "archive-timeout", &(config->archive_timeout), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MS }, + { 'u', 0, "archive-timeout", &(config->archive_timeout), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_S }, {0} }; diff --git a/src/pg_probackup.c b/src/pg_probackup.c index cf51fd10..b1b51ceb 100644 --- a/src/pg_probackup.c +++ b/src/pg_probackup.c @@ -199,7 +199,7 @@ static pgut_option options[] = { 's', 143, "error-log-filename", &error_log_filename, SOURCE_CMDLINE }, { 's', 144, "log-directory", &log_directory, SOURCE_CMDLINE }, { 'U', 145, "log-rotation-size", &log_rotation_size, SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_KB }, - { 'U', 146, "log-rotation-age", &log_rotation_age, SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MS }, + { 'U', 146, "log-rotation-age", &log_rotation_age, SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MIN }, /* connection options */ { 's', 'd', "pgdatabase", &pgut_dbname, SOURCE_CMDLINE }, { 's', 'h', "pghost", &host, SOURCE_CMDLINE }, From 1f951e4e71d1d70e4ed8931cb8841d03b1f8db95 Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Wed, 7 Nov 2018 04:57:06 +0300 Subject: [PATCH 05/26] revert log-rotation-age unit change --- src/configure.c | 2 +- src/pg_probackup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/configure.c b/src/configure.c index 1714d9f5..6d3cf9cb 100644 --- a/src/configure.c +++ b/src/configure.c @@ -245,7 +245,7 @@ readBackupCatalogConfigFile(void) { 's', 0, "error-log-filename", &(config->error_log_filename), SOURCE_CMDLINE }, { 's', 0, "log-directory", &(config->log_directory), SOURCE_CMDLINE }, { 'U', 0, "log-rotation-size", &(config->log_rotation_size), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_KB }, - { 'U', 0, "log-rotation-age", &(config->log_rotation_age), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MIN }, + { 'U', 0, "log-rotation-age", &(config->log_rotation_age), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MS }, /* connection options */ { 's', 0, "pgdata", &(config->pgdata), SOURCE_FILE_STRICT }, { 's', 0, "pgdatabase", &(config->pgdatabase), SOURCE_FILE_STRICT }, diff --git a/src/pg_probackup.c b/src/pg_probackup.c index b1b51ceb..cf51fd10 100644 --- a/src/pg_probackup.c +++ b/src/pg_probackup.c @@ -199,7 +199,7 @@ static pgut_option options[] = { 's', 143, "error-log-filename", &error_log_filename, SOURCE_CMDLINE }, { 's', 144, "log-directory", &log_directory, SOURCE_CMDLINE }, { 'U', 145, "log-rotation-size", &log_rotation_size, SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_KB }, - { 'U', 146, "log-rotation-age", &log_rotation_age, SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MIN }, + { 'U', 146, "log-rotation-age", &log_rotation_age, SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MS }, /* connection options */ { 's', 'd', "pgdatabase", &pgut_dbname, SOURCE_CMDLINE }, { 's', 'h', "pghost", &host, SOURCE_CMDLINE }, From 62d0f5fc9740ec095095e3e812a88be7aa716ab8 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Wed, 7 Nov 2018 22:06:50 +0300 Subject: [PATCH 06/26] Fix bug with page compression --- src/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data.c b/src/data.c index a8a9e4de..cc2edac5 100644 --- a/src/data.c +++ b/src/data.c @@ -368,7 +368,7 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum, BackupPageHeader header; size_t write_buffer_size = sizeof(header); char write_buffer[BLCKSZ+sizeof(header)]; - char compressed_page[BLCKSZ]; + char compressed_page[BLCKSZ*2]; /* compressed page may require more space than uncompressed */ if(page_state == SkipCurrentPage) return; @@ -395,7 +395,7 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum, Assert (header.compressed_size <= BLCKSZ); /* The page was successfully compressed. */ - if (header.compressed_size > 0) + if (header.compressed_size > 0 && header.compressed_size < BLCKSZ) { memcpy(write_buffer, &header, sizeof(header)); memcpy(write_buffer + sizeof(header), From 05e3211e2765d0b6d20e0c13799084076783a17d Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 8 Nov 2018 12:27:42 +0300 Subject: [PATCH 07/26] Provide backward compatibility for decompression bug --- src/data.c | 63 +++++++++++++++++++++++++++++++++++++++++++--- src/pg_probackup.c | 2 +- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/src/data.c b/src/data.c index cc2edac5..876e7f1e 100644 --- a/src/data.c +++ b/src/data.c @@ -99,6 +99,55 @@ do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, return -1; } + +#define ZLIB_MAGIC 0x78 + +/* + * Before version 2.0.23 there was a bug in pro_backup that pages which compressed + * size is exactly the same as original size are not treated as compressed. + * This check tries to detect and decompress such pages. + * There is no 100% criteria to determine whether page is compressed or not. + * But at least we will do this check only for pages which will no pass validation step. + */ +static bool +page_may_be_compressed(Page page, CompressAlg alg) +{ + PageHeader phdr; + + phdr = (PageHeader) page; + + /* First check if page header is valid (it seems to be fast enough check) */ + if (!(PageGetPageSize(phdr) == BLCKSZ && + PageGetPageLayoutVersion(phdr) == PG_PAGE_LAYOUT_VERSION && + (phdr->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && + phdr->pd_lower >= SizeOfPageHeaderData && + phdr->pd_lower <= phdr->pd_upper && + phdr->pd_upper <= phdr->pd_special && + phdr->pd_special <= BLCKSZ && + phdr->pd_special == MAXALIGN(phdr->pd_special))) + { + /* ... end only if it is invalid, then do more checks */ + int major, middle, minor; + if ( parse_program_version(current.program_version) >= 20023) + { + /* Versions 2.0.23 and higher don't have such bug */ + return false; + } +#ifdef HAVE_LIBZ + /* For zlib we can check page magic: + * https://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like + */ + if (alg == ZLIB_COMPRESS && *(char*)page != ZLIB_MAGIC) + { + return false; + } +#endif + /* otherwize let's try to decompress the page */ + return true; + } + return false; +} + /* * When copying datafiles to backup we validate and compress them block * by block. Thus special header is required for each data block. @@ -717,7 +766,8 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, elog(ERROR, "cannot read block %u of \"%s\" read %lu of %d", blknum, file->path, read_len, header.compressed_size); - if (header.compressed_size != BLCKSZ) + if (header.compressed_size != BLCKSZ + || page_may_be_compressed(compressed_page.data, file->compress_alg)) { int32 uncompressed_size = 0; @@ -1595,7 +1645,8 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version) elog(ERROR, "cannot read block %u of \"%s\" read %lu of %d", blknum, file->path, read_len, header.compressed_size); - if (header.compressed_size != BLCKSZ) + if (header.compressed_size != BLCKSZ + || page_may_be_compressed(compressed_page.data, file->compress_alg)) { int32 uncompressed_size = 0; @@ -1605,9 +1656,15 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version) file->compress_alg); if (uncompressed_size != BLCKSZ) + { + if (header.compressed_size == BLCKSZ) + { + is_valid = false; + continue; + } elog(ERROR, "page of file \"%s\" uncompressed to %d bytes. != BLCKSZ", file->path, uncompressed_size); - + } if (validate_one_page(page.data, file, blknum, stop_lsn, checksum_version) == PAGE_IS_FOUND_AND_NOT_VALID) is_valid = false; diff --git a/src/pg_probackup.c b/src/pg_probackup.c index cf51fd10..82107097 100644 --- a/src/pg_probackup.c +++ b/src/pg_probackup.c @@ -17,7 +17,7 @@ #include "utils/thread.h" -const char *PROGRAM_VERSION = "2.0.22"; +const char *PROGRAM_VERSION = "2.0.23"; const char *PROGRAM_URL = "https://github.com/postgrespro/pg_probackup"; const char *PROGRAM_EMAIL = "https://github.com/postgrespro/pg_probackup/issues"; From 5e12fec6ab34b0b031b9f380dd63ab884d8657a2 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 8 Nov 2018 12:43:19 +0300 Subject: [PATCH 08/26] Eliminate unsued varaibles --- src/data.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/data.c b/src/data.c index 876e7f1e..a8378f09 100644 --- a/src/data.c +++ b/src/data.c @@ -127,8 +127,7 @@ page_may_be_compressed(Page page, CompressAlg alg) phdr->pd_special == MAXALIGN(phdr->pd_special))) { /* ... end only if it is invalid, then do more checks */ - int major, middle, minor; - if ( parse_program_version(current.program_version) >= 20023) + if (parse_program_version(current.program_version) >= 20023) { /* Versions 2.0.23 and higher don't have such bug */ return false; From ea3ed9fa171f5807bdb4790f8b64a0211cb729a7 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 8 Nov 2018 15:04:55 +0300 Subject: [PATCH 09/26] Instead of current global backup variable use function local argument, which is sent by callers --- src/data.c | 59 +++++++++++++++++++++++-------- src/merge.c | 9 +++-- src/pg_probackup.h | 7 ++-- src/restore.c | 3 +- src/validate.c | 11 +++--- tests/expected/option_version.out | 2 +- 6 files changed, 63 insertions(+), 28 deletions(-) diff --git a/src/data.c b/src/data.c index a8378f09..5edeefa4 100644 --- a/src/data.c +++ b/src/data.c @@ -57,7 +57,7 @@ zlib_decompress(void *dst, size_t dst_size, void const *src, size_t src_size) */ static int32 do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, - CompressAlg alg, int level) + CompressAlg alg, int level, const char **errormsg) { switch (alg) { @@ -66,7 +66,13 @@ do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, return -1; #ifdef HAVE_LIBZ case ZLIB_COMPRESS: - return zlib_compress(dst, dst_size, src, src_size, level); + { + int32 ret; + ret = zlib_compress(dst, dst_size, src, src_size, level); + if (ret != Z_OK && errormsg) + *errormsg = zError(ret); + return ret; + } #endif case PGLZ_COMPRESS: return pglz_compress(src, src_size, dst, PGLZ_strategy_always); @@ -81,7 +87,7 @@ do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, */ static int32 do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, - CompressAlg alg) + CompressAlg alg, const char **errormsg) { switch (alg) { @@ -90,7 +96,13 @@ do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, return -1; #ifdef HAVE_LIBZ case ZLIB_COMPRESS: - return zlib_decompress(dst, dst_size, src, src_size); + { + int32 ret; + ret = zlib_decompress(dst, dst_size, src, src_size); + if (ret != Z_OK && errormsg) + *errormsg = zError(ret); + return ret; + } #endif case PGLZ_COMPRESS: return pglz_decompress(src, src_size, dst, dst_size); @@ -110,7 +122,7 @@ do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, * But at least we will do this check only for pages which will no pass validation step. */ static bool -page_may_be_compressed(Page page, CompressAlg alg) +page_may_be_compressed(Page page, CompressAlg alg, uint32 backup_version) { PageHeader phdr; @@ -127,7 +139,7 @@ page_may_be_compressed(Page page, CompressAlg alg) phdr->pd_special == MAXALIGN(phdr->pd_special))) { /* ... end only if it is invalid, then do more checks */ - if (parse_program_version(current.program_version) >= 20023) + if (backup_version >= 20023) { /* Versions 2.0.23 and higher don't have such bug */ return false; @@ -434,9 +446,16 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum, } else { + const char *errormsg = NULL; + /* The page was not truncated, so we need to compress it */ header.compressed_size = do_compress(compressed_page, BLCKSZ, - page, BLCKSZ, calg, clevel); + page, BLCKSZ, calg, clevel, + &errormsg); + /* Something went wrong and errormsg was assigned, throw a warning */ + if (header.compressed_size < 0 && errormsg != NULL) + elog(WARNING, "An error occured during compressing block %u of file \"%s\": %s", + blknum, file->path, errormsg); file->compress_alg = calg; file->read_size += BLCKSZ; @@ -473,7 +492,7 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum, fclose(in); fclose(out); - elog(ERROR, "File: %s, cannot write backup at block %u : %s", + elog(ERROR, "File: %s, cannot write backup at block %u: %s", file->path, blknum, strerror(errno_tmp)); } @@ -661,7 +680,7 @@ backup_data_file(backup_files_arg* arguments, */ void restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, - bool write_header) + bool write_header, uint32 backup_version) { FILE *in = NULL; FILE *out = NULL; @@ -766,14 +785,19 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, blknum, file->path, read_len, header.compressed_size); if (header.compressed_size != BLCKSZ - || page_may_be_compressed(compressed_page.data, file->compress_alg)) + || page_may_be_compressed(compressed_page.data, file->compress_alg, + backup_version)) { int32 uncompressed_size = 0; + const char *errormsg = NULL; uncompressed_size = do_decompress(page.data, BLCKSZ, compressed_page.data, header.compressed_size, - file->compress_alg); + file->compress_alg, &errormsg); + if (uncompressed_size < 0 && errormsg != NULL) + elog(WARNING, "An error occured during decompressing block %u of file \"%s\": %s", + blknum, file->path, errormsg); if (uncompressed_size != BLCKSZ) elog(ERROR, "page of file \"%s\" uncompressed to %d bytes. != BLCKSZ", @@ -1578,7 +1602,8 @@ validate_one_page(Page page, pgFile *file, /* Valiate pages of datafile in backup one by one */ bool -check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version) +check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, + uint32 backup_version) { size_t read_len = 0; bool is_valid = true; @@ -1645,14 +1670,20 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version) blknum, file->path, read_len, header.compressed_size); if (header.compressed_size != BLCKSZ - || page_may_be_compressed(compressed_page.data, file->compress_alg)) + || page_may_be_compressed(compressed_page.data, file->compress_alg, + backup_version)) { int32 uncompressed_size = 0; + const char *errormsg = NULL; uncompressed_size = do_decompress(page.data, BLCKSZ, compressed_page.data, header.compressed_size, - file->compress_alg); + file->compress_alg, + &errormsg); + if (uncompressed_size < 0 && errormsg != NULL) + elog(WARNING, "An error occured during decompressing block %u of file \"%s\": %s", + blknum, file->path, errormsg); if (uncompressed_size != BLCKSZ) { diff --git a/src/merge.c b/src/merge.c index 2464199f..137f1acd 100644 --- a/src/merge.c +++ b/src/merge.c @@ -457,14 +457,16 @@ merge_files(void *arg) file->path = to_path_tmp; /* Decompress first/target file */ - restore_data_file(tmp_file_path, file, false, false); + restore_data_file(tmp_file_path, file, false, false, + parse_program_version(to_backup->program_version)); file->path = prev_path; } /* Merge second/source file with first/target file */ restore_data_file(tmp_file_path, file, from_backup->backup_mode == BACKUP_MODE_DIFF_DELTA, - false); + false, + parse_program_version(from_backup->program_version)); elog(VERBOSE, "Compress file and save it to the directory \"%s\"", argument->to_root); @@ -496,7 +498,8 @@ merge_files(void *arg) /* We can merge in-place here */ restore_data_file(to_path_tmp, file, from_backup->backup_mode == BACKUP_MODE_DIFF_DELTA, - true); + true, + parse_program_version(from_backup->program_version)); /* * We need to calculate write_size, restore_data_file() doesn't diff --git a/src/pg_probackup.h b/src/pg_probackup.h index 7bd87e56..182a647b 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -517,7 +517,8 @@ extern bool backup_data_file(backup_files_arg* arguments, CompressAlg calg, int clevel); extern void restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, - bool write_header); + bool write_header, + uint32 backup_version); extern bool copy_file(const char *from_root, const char *to_root, pgFile *file); extern void move_file(const char *from_root, const char *to_root, pgFile *file); extern void push_wal_file(const char *from_path, const char *to_path, @@ -526,8 +527,8 @@ extern void get_wal_file(const char *from_path, const char *to_path); extern bool calc_file_checksum(pgFile *file); -extern bool check_file_pages(pgFile* file, - XLogRecPtr stop_lsn, uint32 checksum_version); +extern bool check_file_pages(pgFile* file, XLogRecPtr stop_lsn, + uint32 checksum_version, uint32 backup_version); /* parsexlog.c */ extern void extractPageMap(const char *archivedir, diff --git a/src/restore.c b/src/restore.c index c08e647c..439f3c4e 100644 --- a/src/restore.c +++ b/src/restore.c @@ -621,7 +621,8 @@ restore_files(void *arg) file->path + strlen(from_root) + 1); restore_data_file(to_path, file, arguments->backup->backup_mode == BACKUP_MODE_DIFF_DELTA, - false); + false, + parse_program_version(arguments->backup->program_version)); } else copy_file(from_root, pgdata, file); diff --git a/src/validate.c b/src/validate.c index 4fa7d78b..5cbb9b26 100644 --- a/src/validate.c +++ b/src/validate.c @@ -28,6 +28,7 @@ typedef struct bool corrupted; XLogRecPtr stop_lsn; uint32 checksum_version; + uint32 backup_version; /* * Return value from the thread. @@ -92,11 +93,6 @@ pgBackupValidate(pgBackup *backup) pg_atomic_clear_flag(&file->lock); } - /* - * We use program version to calculate checksum in pgBackupValidateFiles() - */ - validate_backup_version = parse_program_version(backup->program_version); - /* init thread args with own file lists */ threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); threads_args = (validate_files_arg *) @@ -111,6 +107,7 @@ pgBackupValidate(pgBackup *backup) arg->corrupted = false; arg->stop_lsn = backup->stop_lsn; arg->checksum_version = backup->checksum_version; + arg->backup_version = parse_program_version(backup->program_version); /* By default there are some error */ threads_args[i].ret = 1; @@ -233,7 +230,9 @@ pgBackupValidateFiles(void *arg) /* validate relation blocks */ if (file->is_datafile) { - if (!check_file_pages(file, arguments->stop_lsn, arguments->checksum_version)) + if (!check_file_pages(file, arguments->stop_lsn, + arguments->checksum_version, + arguments->backup_version)) arguments->corrupted = true; } } diff --git a/tests/expected/option_version.out b/tests/expected/option_version.out index cb0a30d4..6a0391c2 100644 --- a/tests/expected/option_version.out +++ b/tests/expected/option_version.out @@ -1 +1 @@ -pg_probackup 2.0.22 \ No newline at end of file +pg_probackup 2.0.23 \ No newline at end of file From 728e3d5bdc174c8c604c7ee8a57350b4c1847a5e Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 8 Nov 2018 15:25:28 +0300 Subject: [PATCH 10/26] Avoid compression warnings for already compressed pages --- src/data.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/data.c b/src/data.c index 5edeefa4..d3e48e9a 100644 --- a/src/data.c +++ b/src/data.c @@ -449,7 +449,7 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum, const char *errormsg = NULL; /* The page was not truncated, so we need to compress it */ - header.compressed_size = do_compress(compressed_page, BLCKSZ, + header.compressed_size = do_compress(compressed_page, sizeof(compressed_page), page, BLCKSZ, calg, clevel, &errormsg); /* Something went wrong and errormsg was assigned, throw a warning */ @@ -459,7 +459,6 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum, file->compress_alg = calg; file->read_size += BLCKSZ; - Assert (header.compressed_size <= BLCKSZ); /* The page was successfully compressed. */ if (header.compressed_size > 0 && header.compressed_size < BLCKSZ) From 53c1a05b9b5406af0bcad81b8a8f87cf11794f75 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 8 Nov 2018 15:50:24 +0300 Subject: [PATCH 11/26] Remove unnecessary validate_backup_version --- src/validate.c | 4 +- src/validate.c.autosave | 532 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 533 insertions(+), 3 deletions(-) create mode 100644 src/validate.c.autosave diff --git a/src/validate.c b/src/validate.c index 5cbb9b26..2f75cd4b 100644 --- a/src/validate.c +++ b/src/validate.c @@ -19,8 +19,6 @@ static void *pgBackupValidateFiles(void *arg); static void do_validate_instance(void); static bool corrupted_backup_found = false; -/* Program version of a current backup */ -static uint32 validate_backup_version = 0; typedef struct { @@ -220,7 +218,7 @@ pgBackupValidateFiles(void *arg) * To avoid this problem we need to use different algorithm, CRC-32 in * this case. */ - crc = pgFileGetCRC(file->path, validate_backup_version <= 20021); + crc = pgFileGetCRC(file->path, arguments->backup_version <= 20021); if (crc != file->crc) { elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", diff --git a/src/validate.c.autosave b/src/validate.c.autosave new file mode 100644 index 00000000..1c07e0ae --- /dev/null +++ b/src/validate.c.autosave @@ -0,0 +1,532 @@ +/*------------------------------------------------------------------------- + * + * validate.c: validate backup files. + * + * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + * Portions Copyright (c) 2015-2018, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "pg_probackup.h" + +#include +#include + +#include "utils/thread.h" + +static void *pgBackupValidateFiles(void *arg); +static void do_validate_instance(void); + +static bool corrupted_backup_found = false; + +typedef struct +{ + parray *files; + bool corrupted; + XLogRecPtr stop_lsn; + uint32 checksum_version; + uint32 backup_version; + + /* + * Return value from the thread. + * 0 means there is no error, 1 - there is an error. + */ + int ret; +} validate_files_arg; + +if (is_absolute_path(host)) + if (hostaddr && *hostaddr) + printf(_("You are connected to database \"%s\" as user \"%s\" on address \"%s\" at port \"%s\".\n"), + db, PQuser(pset.db), hostaddr, PQport(pset.db)); + else + printf(_("You are connected to database \"%s\" as user \"%s\" via socket in \"%s\" at port \"%s\".\n"), + db, PQuser(pset.db), host, PQport(pset.db)); +else + if (hostaddr && *hostaddr && strcmp(host, hostaddr) != 0) + printf(_("You are connected to database \"%s\" as user \"%s\" on host \"%s\" (address \"%s\") at port \"%s\".\n"), + db, PQuser(pset.db), host, hostaddr, PQport(pset.db)); + else + printf(_("You are connected to database \"%s\" as user \"%s\" on host \"%s\" at port \"%s\".\n"), + db, PQuser(pset.db), host, PQport(pset.db)); + +/* + * Validate backup files. + */ +void +pgBackupValidate(pgBackup *backup) +{ + char base_path[MAXPGPATH]; + char path[MAXPGPATH]; + parray *files; + bool corrupted = false; + bool validation_isok = true; + /* arrays with meta info for multi threaded validate */ + pthread_t *threads; + validate_files_arg *threads_args; + int i; + + /* Revalidation is attempted for DONE, ORPHAN and CORRUPT backups */ + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE && + backup->status != BACKUP_STATUS_ORPHAN && + backup->status != BACKUP_STATUS_CORRUPT) + { + elog(WARNING, "Backup %s has status %s. Skip validation.", + base36enc(backup->start_time), status2str(backup->status)); + corrupted_backup_found = true; + return; + } + + if (backup->status == BACKUP_STATUS_OK || backup->status == BACKUP_STATUS_DONE) + elog(INFO, "Validating backup %s", base36enc(backup->start_time)); + /* backups in MERGING status must have an option of revalidation without losing MERGING status + else if (backup->status == BACKUP_STATUS_MERGING) + { + some message here + } + */ + else + elog(INFO, "Revalidating backup %s", base36enc(backup->start_time)); + + if (backup->backup_mode != BACKUP_MODE_FULL && + backup->backup_mode != BACKUP_MODE_DIFF_PAGE && + backup->backup_mode != BACKUP_MODE_DIFF_PTRACK && + backup->backup_mode != BACKUP_MODE_DIFF_DELTA) + elog(WARNING, "Invalid backup_mode of backup %s", base36enc(backup->start_time)); + + pgBackupGetPath(backup, base_path, lengthof(base_path), DATABASE_DIR); + pgBackupGetPath(backup, path, lengthof(path), DATABASE_FILE_LIST); + files = dir_read_file_list(base_path, path); + + /* setup threads */ + for (i = 0; i < parray_num(files); i++) + { + pgFile *file = (pgFile *) parray_get(files, i); + pg_atomic_clear_flag(&file->lock); + } + + /* init thread args with own file lists */ + threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); + threads_args = (validate_files_arg *) + palloc(sizeof(validate_files_arg) * num_threads); + + /* Validate files */ + for (i = 0; i < num_threads; i++) + { + validate_files_arg *arg = &(threads_args[i]); + + arg->files = files; + arg->corrupted = false; + arg->stop_lsn = backup->stop_lsn; + arg->checksum_version = backup->checksum_version; + arg->backup_version = parse_program_version(backup->program_version); + /* By default there are some error */ + threads_args[i].ret = 1; + + pthread_create(&threads[i], NULL, pgBackupValidateFiles, arg); + } + + /* Wait theads */ + for (i = 0; i < num_threads; i++) + { + validate_files_arg *arg = &(threads_args[i]); + + pthread_join(threads[i], NULL); + if (arg->corrupted) + corrupted = true; + if (arg->ret == 1) + validation_isok = false; + } + if (!validation_isok) + elog(ERROR, "Data files validation failed"); + + pfree(threads); + pfree(threads_args); + + /* cleanup */ + parray_walk(files, pgFileFree); + parray_free(files); + + /* Update backup status */ + backup->status = corrupted ? BACKUP_STATUS_CORRUPT : BACKUP_STATUS_OK; + write_backup_status(backup); + + if (corrupted) + elog(WARNING, "Backup %s data files are corrupted", base36enc(backup->start_time)); + else + elog(INFO, "Backup %s data files are valid", base36enc(backup->start_time)); +} + +/* + * Validate files in the backup. + * NOTE: If file is not valid, do not use ERROR log message, + * rather throw a WARNING and set arguments->corrupted = true. + * This is necessary to update backup status. + */ +static void * +pgBackupValidateFiles(void *arg) +{ + int i; + validate_files_arg *arguments = (validate_files_arg *)arg; + pg_crc32 crc; + + for (i = 0; i < parray_num(arguments->files); i++) + { + struct stat st; + pgFile *file = (pgFile *) parray_get(arguments->files, i); + + if (!pg_atomic_test_set_flag(&file->lock)) + continue; + + if (interrupted) + elog(ERROR, "Interrupted during validate"); + + /* Validate only regular files */ + if (!S_ISREG(file->mode)) + continue; + /* + * Skip files which has no data, because they + * haven't changed between backups. + */ + if (file->write_size == BYTES_INVALID) + continue; + + /* + * Currently we don't compute checksums for + * cfs_compressed data files, so skip them. + */ + if (file->is_cfs) + continue; + + /* print progress */ + elog(VERBOSE, "Validate files: (%d/%lu) %s", + i + 1, (unsigned long) parray_num(arguments->files), file->path); + + if (stat(file->path, &st) == -1) + { + if (errno == ENOENT) + elog(WARNING, "Backup file \"%s\" is not found", file->path); + else + elog(WARNING, "Cannot stat backup file \"%s\": %s", + file->path, strerror(errno)); + arguments->corrupted = true; + break; + } + + if (file->write_size != st.st_size) + { + elog(WARNING, "Invalid size of backup file \"%s\" : " INT64_FORMAT ". Expected %lu", + file->path, file->write_size, (unsigned long) st.st_size); + arguments->corrupted = true; + break; + } + + /* + * Pre 2.0.22 we use CRC-32C, but in newer version of pg_probackup we + * use CRC-32. + * + * pg_control stores its content and checksum of the content, calculated + * using CRC-32C. If we calculate checksum of the whole pg_control using + * CRC-32C we get same checksum constantly. It might be because of the + * CRC-32C algorithm. + * To avoid this problem we need to use different algorithm, CRC-32 in + * this case. + */ + crc = pgFileGetCRC(file->path, arguments->backup_version <= 20021); + if (crc != file->crc) + { + elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", + file->path, file->crc, crc); + arguments->corrupted = true; + + /* validate relation blocks */ + if (file->is_datafile) + { + if (!check_file_pages(file, arguments->stop_lsn, + arguments->checksum_version, + arguments->backup_version)) + arguments->corrupted = true; + } + } + } + + /* Data files validation is successful */ + arguments->ret = 0; + + return NULL; +} + +/* + * Validate all backups in the backup catalog. + * If --instance option was provided, validate only backups of this instance. + */ +int +do_validate_all(void) +{ + if (instance_name == NULL) + { + /* Show list of instances */ + char path[MAXPGPATH]; + DIR *dir; + struct dirent *dent; + + /* open directory and list contents */ + join_path_components(path, backup_path, BACKUPS_DIR); + dir = opendir(path); + if (dir == NULL) + elog(ERROR, "cannot open directory \"%s\": %s", path, strerror(errno)); + + errno = 0; + while ((dent = readdir(dir))) + { + char child[MAXPGPATH]; + struct stat st; + + /* skip entries point current dir or parent dir */ + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0) + continue; + + join_path_components(child, path, dent->d_name); + + if (lstat(child, &st) == -1) + elog(ERROR, "cannot stat file \"%s\": %s", child, strerror(errno)); + + if (!S_ISDIR(st.st_mode)) + continue; + + instance_name = dent->d_name; + sprintf(backup_instance_path, "%s/%s/%s", backup_path, BACKUPS_DIR, instance_name); + sprintf(arclog_path, "%s/%s/%s", backup_path, "wal", instance_name); + xlog_seg_size = get_config_xlog_seg_size(); + + do_validate_instance(); + } + } + else + { + do_validate_instance(); + } + + if (corrupted_backup_found) + { + elog(WARNING, "Some backups are not valid"); + return 1; + } + else + elog(INFO, "All backups are valid"); + + return 0; +} + +/* + * Validate all backups in the given instance of the backup catalog. + */ +static void +do_validate_instance(void) +{ + char *current_backup_id; + int i; + int j; + parray *backups; + pgBackup *current_backup = NULL; + + elog(INFO, "Validate backups of the instance '%s'", instance_name); + + /* Get exclusive lock of backup catalog */ + catalog_lock(); + + /* Get list of all backups sorted in order of descending start time */ + backups = catalog_get_backup_list(INVALID_BACKUP_ID); + + /* Examine backups one by one and validate them */ + for (i = 0; i < parray_num(backups); i++) + { + pgBackup *base_full_backup; + char *parent_backup_id; + + current_backup = (pgBackup *) parray_get(backups, i); + + /* Find ancestor for incremental backup */ + if (current_backup->backup_mode != BACKUP_MODE_FULL) + { + pgBackup *tmp_backup = NULL; + int result; + + result = scan_parent_chain(current_backup, &tmp_backup); + + /* chain is broken */ + if (result == 0) + { + /* determine missing backup ID */ + + parent_backup_id = base36enc_dup(tmp_backup->parent_backup); + corrupted_backup_found = true; + + /* orphanize current_backup */ + if (current_backup->status == BACKUP_STATUS_OK) + { + current_backup->status = BACKUP_STATUS_ORPHAN; + write_backup_status(current_backup); + elog(WARNING, "Backup %s is orphaned because his parent %s is missing", + base36enc(current_backup->start_time), + parent_backup_id); + } + else + { + elog(WARNING, "Backup %s has missing parent %s", + base36enc(current_backup->start_time), parent_backup_id); + } + continue; + } + /* chain is whole, but at least one parent is invalid */ + else if (result == 1) + { + /* determine corrupt backup ID */ + parent_backup_id = base36enc_dup(tmp_backup->start_time); + + /* Oldest corrupt backup has a chance for revalidation */ + if (current_backup->start_time != tmp_backup->start_time) + { + /* orphanize current_backup */ + if (current_backup->status == BACKUP_STATUS_OK) + { + current_backup->status = BACKUP_STATUS_ORPHAN; + write_backup_status(current_backup); + elog(WARNING, "Backup %s is orphaned because his parent %s has status: %s", + base36enc(current_backup->start_time), parent_backup_id, + status2str(tmp_backup->status)); + } + else + { + elog(WARNING, "Backup %s has parent %s with status: %s", + base36enc(current_backup->start_time),parent_backup_id, + status2str(tmp_backup->status)); + } + continue; + } + base_full_backup = find_parent_full_backup(current_backup); + } + /* chain is whole, all parents are valid at first glance, + * current backup validation can proceed + */ + else + base_full_backup = tmp_backup; + } + else + base_full_backup = current_backup; + + /* Valiate backup files*/ + pgBackupValidate(current_backup); + + /* Validate corresponding WAL files */ + if (current_backup->status == BACKUP_STATUS_OK) + validate_wal(current_backup, arclog_path, 0, + 0, 0, base_full_backup->tli, xlog_seg_size); + + /* + * Mark every descendant of corrupted backup as orphan + */ + if (current_backup->status == BACKUP_STATUS_CORRUPT) + { + /* This is ridiculous but legal. + * PAGE1_2b <- OK + * PAGE1_2a <- OK + * PAGE1_1b <- ORPHAN + * PAGE1_1a <- CORRUPT + * FULL1 <- OK + */ + + corrupted_backup_found = true; + current_backup_id = base36enc_dup(current_backup->start_time); + + for (j = i - 1; j >= 0; j--) + { + pgBackup *backup = (pgBackup *) parray_get(backups, j); + + if (is_parent(current_backup->start_time, backup, false)) + { + if (backup->status == BACKUP_STATUS_OK) + { + backup->status = BACKUP_STATUS_ORPHAN; + write_backup_status(backup); + + elog(WARNING, "Backup %s is orphaned because his parent %s has status: %s", + base36enc(backup->start_time), + current_backup_id, + status2str(current_backup->status)); + } + } + } + free(current_backup_id); + } + + /* For every OK backup we try to revalidate all his ORPHAN descendants. */ + if (current_backup->status == BACKUP_STATUS_OK) + { + /* revalidate all ORPHAN descendats + * be very careful not to miss a missing backup + * for every backup we must check that he is descendant of current_backup + */ + for (j = i - 1; j >= 0; j--) + { + pgBackup *backup = (pgBackup *) parray_get(backups, j); + pgBackup *tmp_backup = NULL; + int result; + + //PAGE3b ORPHAN + //PAGE2b ORPHAN ----- + //PAGE6a ORPHAN | + //PAGE5a CORRUPT | + //PAGE4a missing | + //PAGE3a missing | + //PAGE2a ORPHAN | + //PAGE1a OK <- we are here <-| + //FULL OK + + if (is_parent(current_backup->start_time, backup, false)) + { + /* Revalidation make sense only if parent chain is whole. + * is_parent() do not guarantee that. + */ + result = scan_parent_chain(backup, &tmp_backup); + + if (result == 1) + { + /* revalidation make sense only if oldest invalid backup is current_backup + */ + + if (tmp_backup->start_time != backup->start_time) + continue; + + if (backup->status == BACKUP_STATUS_ORPHAN) + { + /* Revaliate backup files*/ + pgBackupValidate(backup); + + if (backup->status == BACKUP_STATUS_OK) + { + //tmp_backup = find_parent_full_backup(dest_backup); + /* Revalidation successful, validate corresponding WAL files */ + validate_wal(backup, arclog_path, 0, + 0, 0, current_backup->tli, + xlog_seg_size); + } + } + + if (backup->status != BACKUP_STATUS_OK) + { + corrupted_backup_found = true; + continue; + } + } + } + } + } + } + + /* cleanup */ + parray_walk(backups, pgBackupFree); + parray_free(backups); +} From cf6e7445e5a39876da304fdab38c13a682f11d5c Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 8 Nov 2018 15:51:58 +0300 Subject: [PATCH 12/26] Remove .autosave --- src/validate.c.autosave | 532 ---------------------------------------- 1 file changed, 532 deletions(-) delete mode 100644 src/validate.c.autosave diff --git a/src/validate.c.autosave b/src/validate.c.autosave deleted file mode 100644 index 1c07e0ae..00000000 --- a/src/validate.c.autosave +++ /dev/null @@ -1,532 +0,0 @@ -/*------------------------------------------------------------------------- - * - * validate.c: validate backup files. - * - * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2018, Postgres Professional - * - *------------------------------------------------------------------------- - */ - -#include "pg_probackup.h" - -#include -#include - -#include "utils/thread.h" - -static void *pgBackupValidateFiles(void *arg); -static void do_validate_instance(void); - -static bool corrupted_backup_found = false; - -typedef struct -{ - parray *files; - bool corrupted; - XLogRecPtr stop_lsn; - uint32 checksum_version; - uint32 backup_version; - - /* - * Return value from the thread. - * 0 means there is no error, 1 - there is an error. - */ - int ret; -} validate_files_arg; - -if (is_absolute_path(host)) - if (hostaddr && *hostaddr) - printf(_("You are connected to database \"%s\" as user \"%s\" on address \"%s\" at port \"%s\".\n"), - db, PQuser(pset.db), hostaddr, PQport(pset.db)); - else - printf(_("You are connected to database \"%s\" as user \"%s\" via socket in \"%s\" at port \"%s\".\n"), - db, PQuser(pset.db), host, PQport(pset.db)); -else - if (hostaddr && *hostaddr && strcmp(host, hostaddr) != 0) - printf(_("You are connected to database \"%s\" as user \"%s\" on host \"%s\" (address \"%s\") at port \"%s\".\n"), - db, PQuser(pset.db), host, hostaddr, PQport(pset.db)); - else - printf(_("You are connected to database \"%s\" as user \"%s\" on host \"%s\" at port \"%s\".\n"), - db, PQuser(pset.db), host, PQport(pset.db)); - -/* - * Validate backup files. - */ -void -pgBackupValidate(pgBackup *backup) -{ - char base_path[MAXPGPATH]; - char path[MAXPGPATH]; - parray *files; - bool corrupted = false; - bool validation_isok = true; - /* arrays with meta info for multi threaded validate */ - pthread_t *threads; - validate_files_arg *threads_args; - int i; - - /* Revalidation is attempted for DONE, ORPHAN and CORRUPT backups */ - if (backup->status != BACKUP_STATUS_OK && - backup->status != BACKUP_STATUS_DONE && - backup->status != BACKUP_STATUS_ORPHAN && - backup->status != BACKUP_STATUS_CORRUPT) - { - elog(WARNING, "Backup %s has status %s. Skip validation.", - base36enc(backup->start_time), status2str(backup->status)); - corrupted_backup_found = true; - return; - } - - if (backup->status == BACKUP_STATUS_OK || backup->status == BACKUP_STATUS_DONE) - elog(INFO, "Validating backup %s", base36enc(backup->start_time)); - /* backups in MERGING status must have an option of revalidation without losing MERGING status - else if (backup->status == BACKUP_STATUS_MERGING) - { - some message here - } - */ - else - elog(INFO, "Revalidating backup %s", base36enc(backup->start_time)); - - if (backup->backup_mode != BACKUP_MODE_FULL && - backup->backup_mode != BACKUP_MODE_DIFF_PAGE && - backup->backup_mode != BACKUP_MODE_DIFF_PTRACK && - backup->backup_mode != BACKUP_MODE_DIFF_DELTA) - elog(WARNING, "Invalid backup_mode of backup %s", base36enc(backup->start_time)); - - pgBackupGetPath(backup, base_path, lengthof(base_path), DATABASE_DIR); - pgBackupGetPath(backup, path, lengthof(path), DATABASE_FILE_LIST); - files = dir_read_file_list(base_path, path); - - /* setup threads */ - for (i = 0; i < parray_num(files); i++) - { - pgFile *file = (pgFile *) parray_get(files, i); - pg_atomic_clear_flag(&file->lock); - } - - /* init thread args with own file lists */ - threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); - threads_args = (validate_files_arg *) - palloc(sizeof(validate_files_arg) * num_threads); - - /* Validate files */ - for (i = 0; i < num_threads; i++) - { - validate_files_arg *arg = &(threads_args[i]); - - arg->files = files; - arg->corrupted = false; - arg->stop_lsn = backup->stop_lsn; - arg->checksum_version = backup->checksum_version; - arg->backup_version = parse_program_version(backup->program_version); - /* By default there are some error */ - threads_args[i].ret = 1; - - pthread_create(&threads[i], NULL, pgBackupValidateFiles, arg); - } - - /* Wait theads */ - for (i = 0; i < num_threads; i++) - { - validate_files_arg *arg = &(threads_args[i]); - - pthread_join(threads[i], NULL); - if (arg->corrupted) - corrupted = true; - if (arg->ret == 1) - validation_isok = false; - } - if (!validation_isok) - elog(ERROR, "Data files validation failed"); - - pfree(threads); - pfree(threads_args); - - /* cleanup */ - parray_walk(files, pgFileFree); - parray_free(files); - - /* Update backup status */ - backup->status = corrupted ? BACKUP_STATUS_CORRUPT : BACKUP_STATUS_OK; - write_backup_status(backup); - - if (corrupted) - elog(WARNING, "Backup %s data files are corrupted", base36enc(backup->start_time)); - else - elog(INFO, "Backup %s data files are valid", base36enc(backup->start_time)); -} - -/* - * Validate files in the backup. - * NOTE: If file is not valid, do not use ERROR log message, - * rather throw a WARNING and set arguments->corrupted = true. - * This is necessary to update backup status. - */ -static void * -pgBackupValidateFiles(void *arg) -{ - int i; - validate_files_arg *arguments = (validate_files_arg *)arg; - pg_crc32 crc; - - for (i = 0; i < parray_num(arguments->files); i++) - { - struct stat st; - pgFile *file = (pgFile *) parray_get(arguments->files, i); - - if (!pg_atomic_test_set_flag(&file->lock)) - continue; - - if (interrupted) - elog(ERROR, "Interrupted during validate"); - - /* Validate only regular files */ - if (!S_ISREG(file->mode)) - continue; - /* - * Skip files which has no data, because they - * haven't changed between backups. - */ - if (file->write_size == BYTES_INVALID) - continue; - - /* - * Currently we don't compute checksums for - * cfs_compressed data files, so skip them. - */ - if (file->is_cfs) - continue; - - /* print progress */ - elog(VERBOSE, "Validate files: (%d/%lu) %s", - i + 1, (unsigned long) parray_num(arguments->files), file->path); - - if (stat(file->path, &st) == -1) - { - if (errno == ENOENT) - elog(WARNING, "Backup file \"%s\" is not found", file->path); - else - elog(WARNING, "Cannot stat backup file \"%s\": %s", - file->path, strerror(errno)); - arguments->corrupted = true; - break; - } - - if (file->write_size != st.st_size) - { - elog(WARNING, "Invalid size of backup file \"%s\" : " INT64_FORMAT ". Expected %lu", - file->path, file->write_size, (unsigned long) st.st_size); - arguments->corrupted = true; - break; - } - - /* - * Pre 2.0.22 we use CRC-32C, but in newer version of pg_probackup we - * use CRC-32. - * - * pg_control stores its content and checksum of the content, calculated - * using CRC-32C. If we calculate checksum of the whole pg_control using - * CRC-32C we get same checksum constantly. It might be because of the - * CRC-32C algorithm. - * To avoid this problem we need to use different algorithm, CRC-32 in - * this case. - */ - crc = pgFileGetCRC(file->path, arguments->backup_version <= 20021); - if (crc != file->crc) - { - elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", - file->path, file->crc, crc); - arguments->corrupted = true; - - /* validate relation blocks */ - if (file->is_datafile) - { - if (!check_file_pages(file, arguments->stop_lsn, - arguments->checksum_version, - arguments->backup_version)) - arguments->corrupted = true; - } - } - } - - /* Data files validation is successful */ - arguments->ret = 0; - - return NULL; -} - -/* - * Validate all backups in the backup catalog. - * If --instance option was provided, validate only backups of this instance. - */ -int -do_validate_all(void) -{ - if (instance_name == NULL) - { - /* Show list of instances */ - char path[MAXPGPATH]; - DIR *dir; - struct dirent *dent; - - /* open directory and list contents */ - join_path_components(path, backup_path, BACKUPS_DIR); - dir = opendir(path); - if (dir == NULL) - elog(ERROR, "cannot open directory \"%s\": %s", path, strerror(errno)); - - errno = 0; - while ((dent = readdir(dir))) - { - char child[MAXPGPATH]; - struct stat st; - - /* skip entries point current dir or parent dir */ - if (strcmp(dent->d_name, ".") == 0 || - strcmp(dent->d_name, "..") == 0) - continue; - - join_path_components(child, path, dent->d_name); - - if (lstat(child, &st) == -1) - elog(ERROR, "cannot stat file \"%s\": %s", child, strerror(errno)); - - if (!S_ISDIR(st.st_mode)) - continue; - - instance_name = dent->d_name; - sprintf(backup_instance_path, "%s/%s/%s", backup_path, BACKUPS_DIR, instance_name); - sprintf(arclog_path, "%s/%s/%s", backup_path, "wal", instance_name); - xlog_seg_size = get_config_xlog_seg_size(); - - do_validate_instance(); - } - } - else - { - do_validate_instance(); - } - - if (corrupted_backup_found) - { - elog(WARNING, "Some backups are not valid"); - return 1; - } - else - elog(INFO, "All backups are valid"); - - return 0; -} - -/* - * Validate all backups in the given instance of the backup catalog. - */ -static void -do_validate_instance(void) -{ - char *current_backup_id; - int i; - int j; - parray *backups; - pgBackup *current_backup = NULL; - - elog(INFO, "Validate backups of the instance '%s'", instance_name); - - /* Get exclusive lock of backup catalog */ - catalog_lock(); - - /* Get list of all backups sorted in order of descending start time */ - backups = catalog_get_backup_list(INVALID_BACKUP_ID); - - /* Examine backups one by one and validate them */ - for (i = 0; i < parray_num(backups); i++) - { - pgBackup *base_full_backup; - char *parent_backup_id; - - current_backup = (pgBackup *) parray_get(backups, i); - - /* Find ancestor for incremental backup */ - if (current_backup->backup_mode != BACKUP_MODE_FULL) - { - pgBackup *tmp_backup = NULL; - int result; - - result = scan_parent_chain(current_backup, &tmp_backup); - - /* chain is broken */ - if (result == 0) - { - /* determine missing backup ID */ - - parent_backup_id = base36enc_dup(tmp_backup->parent_backup); - corrupted_backup_found = true; - - /* orphanize current_backup */ - if (current_backup->status == BACKUP_STATUS_OK) - { - current_backup->status = BACKUP_STATUS_ORPHAN; - write_backup_status(current_backup); - elog(WARNING, "Backup %s is orphaned because his parent %s is missing", - base36enc(current_backup->start_time), - parent_backup_id); - } - else - { - elog(WARNING, "Backup %s has missing parent %s", - base36enc(current_backup->start_time), parent_backup_id); - } - continue; - } - /* chain is whole, but at least one parent is invalid */ - else if (result == 1) - { - /* determine corrupt backup ID */ - parent_backup_id = base36enc_dup(tmp_backup->start_time); - - /* Oldest corrupt backup has a chance for revalidation */ - if (current_backup->start_time != tmp_backup->start_time) - { - /* orphanize current_backup */ - if (current_backup->status == BACKUP_STATUS_OK) - { - current_backup->status = BACKUP_STATUS_ORPHAN; - write_backup_status(current_backup); - elog(WARNING, "Backup %s is orphaned because his parent %s has status: %s", - base36enc(current_backup->start_time), parent_backup_id, - status2str(tmp_backup->status)); - } - else - { - elog(WARNING, "Backup %s has parent %s with status: %s", - base36enc(current_backup->start_time),parent_backup_id, - status2str(tmp_backup->status)); - } - continue; - } - base_full_backup = find_parent_full_backup(current_backup); - } - /* chain is whole, all parents are valid at first glance, - * current backup validation can proceed - */ - else - base_full_backup = tmp_backup; - } - else - base_full_backup = current_backup; - - /* Valiate backup files*/ - pgBackupValidate(current_backup); - - /* Validate corresponding WAL files */ - if (current_backup->status == BACKUP_STATUS_OK) - validate_wal(current_backup, arclog_path, 0, - 0, 0, base_full_backup->tli, xlog_seg_size); - - /* - * Mark every descendant of corrupted backup as orphan - */ - if (current_backup->status == BACKUP_STATUS_CORRUPT) - { - /* This is ridiculous but legal. - * PAGE1_2b <- OK - * PAGE1_2a <- OK - * PAGE1_1b <- ORPHAN - * PAGE1_1a <- CORRUPT - * FULL1 <- OK - */ - - corrupted_backup_found = true; - current_backup_id = base36enc_dup(current_backup->start_time); - - for (j = i - 1; j >= 0; j--) - { - pgBackup *backup = (pgBackup *) parray_get(backups, j); - - if (is_parent(current_backup->start_time, backup, false)) - { - if (backup->status == BACKUP_STATUS_OK) - { - backup->status = BACKUP_STATUS_ORPHAN; - write_backup_status(backup); - - elog(WARNING, "Backup %s is orphaned because his parent %s has status: %s", - base36enc(backup->start_time), - current_backup_id, - status2str(current_backup->status)); - } - } - } - free(current_backup_id); - } - - /* For every OK backup we try to revalidate all his ORPHAN descendants. */ - if (current_backup->status == BACKUP_STATUS_OK) - { - /* revalidate all ORPHAN descendats - * be very careful not to miss a missing backup - * for every backup we must check that he is descendant of current_backup - */ - for (j = i - 1; j >= 0; j--) - { - pgBackup *backup = (pgBackup *) parray_get(backups, j); - pgBackup *tmp_backup = NULL; - int result; - - //PAGE3b ORPHAN - //PAGE2b ORPHAN ----- - //PAGE6a ORPHAN | - //PAGE5a CORRUPT | - //PAGE4a missing | - //PAGE3a missing | - //PAGE2a ORPHAN | - //PAGE1a OK <- we are here <-| - //FULL OK - - if (is_parent(current_backup->start_time, backup, false)) - { - /* Revalidation make sense only if parent chain is whole. - * is_parent() do not guarantee that. - */ - result = scan_parent_chain(backup, &tmp_backup); - - if (result == 1) - { - /* revalidation make sense only if oldest invalid backup is current_backup - */ - - if (tmp_backup->start_time != backup->start_time) - continue; - - if (backup->status == BACKUP_STATUS_ORPHAN) - { - /* Revaliate backup files*/ - pgBackupValidate(backup); - - if (backup->status == BACKUP_STATUS_OK) - { - //tmp_backup = find_parent_full_backup(dest_backup); - /* Revalidation successful, validate corresponding WAL files */ - validate_wal(backup, arclog_path, 0, - 0, 0, current_backup->tli, - xlog_seg_size); - } - } - - if (backup->status != BACKUP_STATUS_OK) - { - corrupted_backup_found = true; - continue; - } - } - } - } - } - } - - /* cleanup */ - parray_walk(backups, pgBackupFree); - parray_free(backups); -} From a0ec849b4a0fdf6a746a8e923fe566d1abca0a02 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 8 Nov 2018 18:13:42 +0300 Subject: [PATCH 13/26] We should check only negative return values --- src/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data.c b/src/data.c index d3e48e9a..eeb2952d 100644 --- a/src/data.c +++ b/src/data.c @@ -69,7 +69,7 @@ do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, { int32 ret; ret = zlib_compress(dst, dst_size, src, src_size, level); - if (ret != Z_OK && errormsg) + if (ret < Z_OK && errormsg) *errormsg = zError(ret); return ret; } @@ -99,7 +99,7 @@ do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, { int32 ret; ret = zlib_decompress(dst, dst_size, src, src_size); - if (ret != Z_OK && errormsg) + if (ret < Z_OK && errormsg) *errormsg = zError(ret); return ret; } From c530d28869d9865fd98d13c0b723db5f3d88779c Mon Sep 17 00:00:00 2001 From: Anastasia Date: Thu, 8 Nov 2018 19:38:22 +0300 Subject: [PATCH 14/26] change datafile validation algorithm: - validate file block by block by default, not only in case of file-level checksum corruption; - add an option: --skip-block-validation to disable this behaviour; - calculate file checksum at the same time as validate blocks; --- src/data.c | 22 ++++++++++++++++++-- src/dir.c | 30 ++++---------------------- src/help.c | 11 +++++++++- src/pg_probackup.c | 3 +++ src/pg_probackup.h | 27 ++++++++++++++++++++++-- src/validate.c | 52 +++++++++++++++++++++++++++------------------- 6 files changed, 93 insertions(+), 52 deletions(-) diff --git a/src/data.c b/src/data.c index eeb2952d..4fc53783 100644 --- a/src/data.c +++ b/src/data.c @@ -1601,12 +1601,14 @@ validate_one_page(Page page, pgFile *file, /* Valiate pages of datafile in backup one by one */ bool -check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, - uint32 backup_version) +check_file_pages(pgFile *file, XLogRecPtr stop_lsn, + uint32 checksum_version, uint32 backup_version) { size_t read_len = 0; bool is_valid = true; FILE *in; + pg_crc32 crc; + bool use_crc32c = (backup_version <= 20021); elog(VERBOSE, "validate relation blocks for file %s", file->name); @@ -1623,6 +1625,9 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, file->path, strerror(errno)); } + /* calc CRC of backup file */ + INIT_FILE_CRC32(use_crc32c, crc); + /* read and validate pages one by one */ while (true) { @@ -1647,6 +1652,8 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, blknum, file->path, strerror(errno_tmp)); } + COMP_FILE_CRC32(use_crc32c, crc, &header, read_len); + if (header.block < blknum) elog(ERROR, "backup is broken at file->path %s block %u", file->path, blknum); @@ -1668,6 +1675,8 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, elog(ERROR, "cannot read block %u of \"%s\" read %lu of %d", blknum, file->path, read_len, header.compressed_size); + COMP_FILE_CRC32(use_crc32c, crc, compressed_page.data, read_len); + if (header.compressed_size != BLCKSZ || page_may_be_compressed(compressed_page.data, file->compress_alg, backup_version)) @@ -1706,5 +1715,14 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, } } + FIN_FILE_CRC32(use_crc32c, crc); + + if (crc != file->crc) + { + elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", + file->path, file->crc, crc); + is_valid = false; + } + return is_valid; } diff --git a/src/dir.c b/src/dir.c index 9e55c821..5c6f60d8 100644 --- a/src/dir.c +++ b/src/dir.c @@ -267,28 +267,6 @@ pgFileGetCRC(const char *file_path, bool use_crc32c) size_t len; int errno_tmp; -#define INIT_FILE_CRC32(crc) \ -do { \ - if (use_crc32c) \ - INIT_CRC32C(crc); \ - else \ - INIT_TRADITIONAL_CRC32(crc); \ -} while (0) -#define COMP_FILE_CRC32(crc, data, len) \ -do { \ - if (use_crc32c) \ - COMP_CRC32C((crc), (data), (len)); \ - else \ - COMP_TRADITIONAL_CRC32(crc, data, len); \ -} while (0) -#define FIN_FILE_CRC32(crc) \ -do { \ - if (use_crc32c) \ - FIN_CRC32C(crc); \ - else \ - FIN_TRADITIONAL_CRC32(crc); \ -} while (0) - /* open file in binary read mode */ fp = fopen(file_path, PG_BINARY_R); if (fp == NULL) @@ -296,20 +274,20 @@ do { \ file_path, strerror(errno)); /* calc CRC of backup file */ - INIT_FILE_CRC32(crc); + INIT_FILE_CRC32(use_crc32c, crc); while ((len = fread(buf, 1, sizeof(buf), fp)) == sizeof(buf)) { if (interrupted) elog(ERROR, "interrupted during CRC calculation"); - COMP_FILE_CRC32(crc, buf, len); + COMP_FILE_CRC32(use_crc32c, crc, buf, len); } errno_tmp = errno; if (!feof(fp)) elog(WARNING, "cannot read \"%s\": %s", file_path, strerror(errno_tmp)); if (len > 0) - COMP_FILE_CRC32(crc, buf, len); - FIN_FILE_CRC32(crc); + COMP_FILE_CRC32(use_crc32c, crc, buf, len); + FIN_FILE_CRC32(use_crc32c, crc); fclose(fp); diff --git a/src/help.c b/src/help.c index 1cf6d404..409c8f82 100644 --- a/src/help.c +++ b/src/help.c @@ -118,6 +118,7 @@ help_pg_probackup(void) printf(_(" [--master-db=db_name] [--master-host=host_name]\n")); printf(_(" [--master-port=port] [--master-user=user_name]\n")); printf(_(" [--replica-timeout=timeout]\n")); + printf(_(" [--skip-block-validation]\n")); printf(_("\n %s restore -B backup-path --instance=instance_name\n"), PROGRAM_NAME); printf(_(" [-D pgdata-path] [-i backup-id] [--progress]\n")); @@ -127,12 +128,14 @@ help_pg_probackup(void) printf(_(" [--recovery-target-action=pause|promote|shutdown]\n")); printf(_(" [--restore-as-replica]\n")); printf(_(" [--no-validate]\n")); + printf(_(" [--skip-block-validation]\n")); printf(_("\n %s validate -B backup-path [--instance=instance_name]\n"), PROGRAM_NAME); printf(_(" [-i backup-id] [--progress]\n")); printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); printf(_(" [--recovery-target-name=target-name]\n")); printf(_(" [--timeline=timeline]\n")); + printf(_(" [--skip-block-validation]\n")); printf(_("\n %s show -B backup-path\n"), PROGRAM_NAME); printf(_(" [--instance=instance_name [-i backup-id]]\n")); @@ -203,7 +206,8 @@ help_backup(void) printf(_(" [-w --no-password] [-W --password]\n")); printf(_(" [--master-db=db_name] [--master-host=host_name]\n")); printf(_(" [--master-port=port] [--master-user=user_name]\n")); - printf(_(" [--replica-timeout=timeout]\n\n")); + printf(_(" [--replica-timeout=timeout]\n")); + printf(_(" [--skip-block-validation]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" -b, --backup-mode=backup-mode backup mode=FULL|PAGE|DELTA|PTRACK\n")); @@ -215,6 +219,7 @@ help_backup(void) printf(_(" -j, --threads=NUM number of parallel threads\n")); printf(_(" --archive-timeout=timeout wait timeout for WAL segment archiving (default: 5min)\n")); printf(_(" --progress show progress\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -279,6 +284,7 @@ help_restore(void) printf(_(" [--immediate] [--recovery-target-name=target-name]\n")); printf(_(" [--recovery-target-action=pause|promote|shutdown]\n")); printf(_(" [--restore-as-replica] [--no-validate]\n\n")); + printf(_(" [--skip-block-validation]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); @@ -305,6 +311,7 @@ help_restore(void) printf(_(" -R, --restore-as-replica write a minimal recovery.conf in the output directory\n")); printf(_(" to ease setting up a standby server\n")); printf(_(" --no-validate disable backup validation during restore\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -335,6 +342,7 @@ help_validate(void) printf(_(" [-i backup-id] [--progress]\n")); printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); printf(_(" [--timeline=timeline]\n\n")); + printf(_(" [--skip-block-validation]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); @@ -348,6 +356,7 @@ help_validate(void) printf(_(" --timeline=timeline recovering into a particular timeline\n")); printf(_(" --recovery-target-name=target-name\n")); printf(_(" the named restore point to which recovery will proceed\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); diff --git a/src/pg_probackup.c b/src/pg_probackup.c index 82107097..ea99672d 100644 --- a/src/pg_probackup.c +++ b/src/pg_probackup.c @@ -89,6 +89,8 @@ static pgRecoveryTarget *recovery_target_options = NULL; bool restore_as_replica = false; bool restore_no_validate = false; +bool skip_block_validation = false; + /* delete options */ bool delete_wal = false; bool delete_expired = false; @@ -179,6 +181,7 @@ static pgut_option options[] = { 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMDLINE }, { 'b', 27, "no-validate", &restore_no_validate, SOURCE_CMDLINE }, { 's', 28, "lsn", &target_lsn, SOURCE_CMDLINE }, + { 'b', 29, "skip-block-validation", &skip_block_validation, SOURCE_CMDLINE }, /* delete options */ { 'b', 130, "wal", &delete_wal, SOURCE_CMDLINE }, { 'b', 131, "expired", &delete_expired, SOURCE_CMDLINE }, diff --git a/src/pg_probackup.h b/src/pg_probackup.h index 182a647b..c6ff6343 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -65,6 +65,28 @@ typedef enum CompressAlg ZLIB_COMPRESS, } CompressAlg; +#define INIT_FILE_CRC32(use_crc32c, crc) \ +do { \ + if (use_crc32c) \ + INIT_CRC32C(crc); \ + else \ + INIT_TRADITIONAL_CRC32(crc); \ +} while (0) +#define COMP_FILE_CRC32(use_crc32c, crc, data, len) \ +do { \ + if (use_crc32c) \ + COMP_CRC32C((crc), (data), (len)); \ + else \ + COMP_TRADITIONAL_CRC32(crc, data, len); \ +} while (0) +#define FIN_FILE_CRC32(use_crc32c, crc) \ +do { \ + if (use_crc32c) \ + FIN_CRC32C(crc); \ + else \ + FIN_TRADITIONAL_CRC32(crc); \ +} while (0) + /* Information about single file (or dir) in backup */ typedef struct pgFile { @@ -339,6 +361,7 @@ extern bool exclusive_backup; /* restore options */ extern bool restore_as_replica; +extern bool skip_block_validation; /* delete options */ extern bool delete_wal; @@ -527,9 +550,9 @@ extern void get_wal_file(const char *from_path, const char *to_path); extern bool calc_file_checksum(pgFile *file); -extern bool check_file_pages(pgFile* file, XLogRecPtr stop_lsn, +extern bool check_file_pages(pgFile* file, + XLogRecPtr stop_lsn, uint32 checksum_version, uint32 backup_version); - /* parsexlog.c */ extern void extractPageMap(const char *archivedir, TimeLineID tli, uint32 seg_size, diff --git a/src/validate.c b/src/validate.c index 2f75cd4b..55ea5ebe 100644 --- a/src/validate.c +++ b/src/validate.c @@ -208,32 +208,42 @@ pgBackupValidateFiles(void *arg) } /* - * Pre 2.0.22 we use CRC-32C, but in newer version of pg_probackup we - * use CRC-32. - * - * pg_control stores its content and checksum of the content, calculated - * using CRC-32C. If we calculate checksum of the whole pg_control using - * CRC-32C we get same checksum constantly. It might be because of the - * CRC-32C algorithm. - * To avoid this problem we need to use different algorithm, CRC-32 in - * this case. + * If option skip-block-validation is set, compute only file-level CRC for + * datafiles, otherwise check them block by block. */ - crc = pgFileGetCRC(file->path, arguments->backup_version <= 20021); - if (crc != file->crc) + if (!file->is_datafile || skip_block_validation) { - elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", - file->path, file->crc, crc); - arguments->corrupted = true; - - /* validate relation blocks */ - if (file->is_datafile) + /* + * Pre 2.0.22 we use CRC-32C, but in newer version of pg_probackup we + * use CRC-32. + * + * pg_control stores its content and checksum of the content, calculated + * using CRC-32C. If we calculate checksum of the whole pg_control using + * CRC-32C we get same checksum constantly. It might be because of the + * CRC-32C algorithm. + * To avoid this problem we need to use different algorithm, CRC-32 in + * this case. + */ + crc = pgFileGetCRC(file->path, arguments->backup_version <= 20021); + if (crc != file->crc) { - if (!check_file_pages(file, arguments->stop_lsn, - arguments->checksum_version, - arguments->backup_version)) - arguments->corrupted = true; + elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", + file->path, file->crc, crc); + arguments->corrupted = true; } } + else + { + /* + * validate relation block by block + * check page headers, checksums (if enabled) + * and compute checksum of the file + */ + if (!check_file_pages(file, arguments->stop_lsn, + arguments->checksum_version, + arguments->backup_version)) + arguments->corrupted = true; + } } /* Data files validation is successful */ From 141ba33000d21fc3b5f78e9678552b6acf28f40d Mon Sep 17 00:00:00 2001 From: Anastasia Date: Thu, 8 Nov 2018 20:49:30 +0300 Subject: [PATCH 15/26] fix: close file after validation --- src/data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/data.c b/src/data.c index 4fc53783..1d1a2ee9 100644 --- a/src/data.c +++ b/src/data.c @@ -1716,6 +1716,7 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, } FIN_FILE_CRC32(use_crc32c, crc); + fclose(in); if (crc != file->crc) { From 4870fa7f68728cb51bc81f3d4f74cd53b1dd76f6 Mon Sep 17 00:00:00 2001 From: Anastasia Date: Fri, 9 Nov 2018 13:59:56 +0300 Subject: [PATCH 16/26] more informative error message for failed WAL record read --- src/parsexlog.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/parsexlog.c b/src/parsexlog.c index 7f7365f5..ee7b5076 100644 --- a/src/parsexlog.c +++ b/src/parsexlog.c @@ -237,10 +237,11 @@ doExtractPageMap(void *arg) */ if (XLogRecPtrIsInvalid(found)) { - elog(WARNING, "Thread [%d]: could not read WAL record at %X/%X", + elog(WARNING, "Thread [%d]: could not read WAL record at %X/%X. %s", private_data->thread_num, (uint32) (extract_arg->startpoint >> 32), - (uint32) (extract_arg->startpoint)); + (uint32) (extract_arg->startpoint), + (xlogreader->errormsg_buf[0] != '\0')?xlogreader->errormsg_buf:""); PrintXLogCorruptionMsg(private_data, ERROR); } extract_arg->startpoint = found; From 12a6dcdd93ec5dddc2d24df326005a91c41b36d5 Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Fri, 9 Nov 2018 18:45:29 +0300 Subject: [PATCH 17/26] PGPRO-1905: check message about system id mismatch --- tests/page.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/page.py b/tests/page.py index 3d19a81d..1d8238e1 100644 --- a/tests/page.py +++ b/tests/page.py @@ -936,6 +936,8 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): 'INFO: Wait for LSN' in e.message and 'in archived WAL segment' in e.message and 'could not read WAL record at' in e.message and + 'WAL file is from different database system: WAL file database system identifier is' in e.message and + 'pg_control database system identifier is' in e.message and 'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format( file_destination) in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( @@ -961,6 +963,8 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): 'INFO: Wait for LSN' in e.message and 'in archived WAL segment' in e.message and 'could not read WAL record at' in e.message and + 'WAL file is from different database system: WAL file database system identifier is' in e.message and + 'pg_control database system identifier is' in e.message and 'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format( file_destination) in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( From 4995652ef5b1a225e6efb90b9512704654078009 Mon Sep 17 00:00:00 2001 From: Anastasia Date: Sat, 10 Nov 2018 15:49:36 +0300 Subject: [PATCH 18/26] fix decompression of BLCKSZ pages --- src/data.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/data.c b/src/data.c index 1d1a2ee9..d9fd8837 100644 --- a/src/data.c +++ b/src/data.c @@ -722,6 +722,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, size_t read_len; DataPage compressed_page; /* used as read buffer */ DataPage page; + int32 uncompressed_size = 0; /* File didn`t changed. Nothig to copy */ if (file->write_size == BYTES_INVALID) @@ -777,17 +778,23 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, Assert(header.compressed_size <= BLCKSZ); + /* read a page from file */ read_len = fread(compressed_page.data, 1, MAXALIGN(header.compressed_size), in); if (read_len != MAXALIGN(header.compressed_size)) elog(ERROR, "cannot read block %u of \"%s\" read %lu of %d", blknum, file->path, read_len, header.compressed_size); + /* + * if page size is smaller than BLCKSZ, decompress the page. + * BUGFIX for versions < 2.0.23: if page size is equal to BLCKSZ. + * we have to check, whether it is compressed or not using + * page_may_be_compressed() function. + */ if (header.compressed_size != BLCKSZ || page_may_be_compressed(compressed_page.data, file->compress_alg, backup_version)) { - int32 uncompressed_size = 0; const char *errormsg = NULL; uncompressed_size = do_decompress(page.data, BLCKSZ, @@ -820,7 +827,11 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, blknum, file->path, strerror(errno)); } - if (header.compressed_size < BLCKSZ) + /* if we uncompressed the page - write page.data, + * if page wasn't compressed - + * write what we've read - compressed_page.data + */ + if (uncompressed_size == BLCKSZ) { if (fwrite(page.data, 1, BLCKSZ, out) != BLCKSZ) elog(ERROR, "cannot write block %u of \"%s\": %s", @@ -828,7 +839,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, } else { - /* if page wasn't compressed, we've read full block */ + /* */ if (fwrite(compressed_page.data, 1, BLCKSZ, out) != BLCKSZ) elog(ERROR, "cannot write block %u of \"%s\": %s", blknum, file->path, strerror(errno)); From 88354a8dde65b0b60f1f64450b84596303b12b23 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 16 Oct 2018 18:13:27 +0300 Subject: [PATCH 19/26] PGPRO-1892: Continue failed merge command --- src/backup.c | 2 +- src/catalog.c | 14 ++++-- src/data.c | 16 ------- src/delete.c | 27 ++++++----- src/merge.c | 114 ++++++++++++++++++++------------------------- src/pg_probackup.h | 7 +-- tests/merge.py | 9 +++- 7 files changed, 88 insertions(+), 101 deletions(-) diff --git a/src/backup.c b/src/backup.c index b2aed318..036f3e91 100644 --- a/src/backup.c +++ b/src/backup.c @@ -790,7 +790,7 @@ do_backup_instance(void) } /* Print the list of files to backup catalog */ - pgBackupWriteFileList(¤t, backup_files_list, pgdata); + write_backup_filelist(¤t, backup_files_list, pgdata); /* Compute summary of size of regular files in the backup */ for (i = 0; i < parray_num(backup_files_list); i++) diff --git a/src/catalog.c b/src/catalog.c index 41676f4c..788c33f7 100644 --- a/src/catalog.c +++ b/src/catalog.c @@ -509,18 +509,22 @@ write_backup(pgBackup *backup) fp = fopen(conf_path, "wt"); if (fp == NULL) elog(ERROR, "Cannot open configuration file \"%s\": %s", conf_path, - strerror(errno)); + strerror(errno)); pgBackupWriteControl(fp, backup); - fclose(fp); + if (fflush(fp) != 0 || + fsync(fileno(fp)) != 0 || + fclose(fp)) + elog(ERROR, "Cannot write configuration file \"%s\": %s", + conf_path, strerror(errno)); } /* * Output the list of files to backup catalog DATABASE_FILE_LIST */ void -pgBackupWriteFileList(pgBackup *backup, parray *files, const char *root) +write_backup_filelist(pgBackup *backup, parray *files, const char *root) { FILE *fp; char path[MAXPGPATH]; @@ -529,7 +533,7 @@ pgBackupWriteFileList(pgBackup *backup, parray *files, const char *root) fp = fopen(path, "wt"); if (fp == NULL) - elog(ERROR, "cannot open file list \"%s\": %s", path, + elog(ERROR, "Cannot open file list \"%s\": %s", path, strerror(errno)); print_file_list(fp, files, root); @@ -537,7 +541,7 @@ pgBackupWriteFileList(pgBackup *backup, parray *files, const char *root) if (fflush(fp) != 0 || fsync(fileno(fp)) != 0 || fclose(fp)) - elog(ERROR, "cannot write file list \"%s\": %s", path, strerror(errno)); + elog(ERROR, "Cannot write file list \"%s\": %s", path, strerror(errno)); } /* diff --git a/src/data.c b/src/data.c index d9fd8837..9b6cbd23 100644 --- a/src/data.c +++ b/src/data.c @@ -1037,22 +1037,6 @@ copy_file(const char *from_root, const char *to_root, pgFile *file) return true; } -/* - * Move file from one backup to another. - * We do not apply compression to these files, because - * it is either small control file or already compressed cfs file. - */ -void -move_file(const char *from_root, const char *to_root, pgFile *file) -{ - char to_path[MAXPGPATH]; - - join_path_components(to_path, to_root, file->path + strlen(from_root) + 1); - if (rename(file->path, to_path) == -1) - elog(ERROR, "Cannot move file \"%s\" to path \"%s\": %s", - file->path, to_path, strerror(errno)); -} - #ifdef HAVE_LIBZ /* * Show error during work with compressed file diff --git a/src/delete.c b/src/delete.c index c5f16af7..599fd2fd 100644 --- a/src/delete.c +++ b/src/delete.c @@ -14,7 +14,6 @@ #include #include -static int delete_backup_files(pgBackup *backup); static void delete_walfiles(XLogRecPtr oldest_lsn, TimeLineID oldest_tli, uint32 xlog_seg_size); @@ -245,7 +244,7 @@ do_retention_purge(void) * Delete backup files of the backup and update the status of the backup to * BACKUP_STATUS_DELETED. */ -static int +void delete_backup_files(pgBackup *backup) { size_t i; @@ -257,11 +256,15 @@ delete_backup_files(pgBackup *backup) * If the backup was deleted already, there is nothing to do. */ if (backup->status == BACKUP_STATUS_DELETED) - return 0; + { + elog(WARNING, "Backup %s already deleted", + base36enc(backup->start_time)); + return; + } time2iso(timestamp, lengthof(timestamp), backup->recovery_time); - elog(INFO, "delete: %s %s", + elog(INFO, "Delete: %s %s", base36enc(backup->start_time), timestamp); /* @@ -283,17 +286,17 @@ delete_backup_files(pgBackup *backup) pgFile *file = (pgFile *) parray_get(files, i); /* print progress */ - elog(VERBOSE, "delete file(%zd/%lu) \"%s\"", i + 1, + elog(VERBOSE, "Delete file(%zd/%lu) \"%s\"", i + 1, (unsigned long) parray_num(files), file->path); if (remove(file->path)) { - elog(WARNING, "can't remove \"%s\": %s", file->path, - strerror(errno)); - parray_walk(files, pgFileFree); - parray_free(files); - - return 1; + if (errno == ENOENT) + elog(VERBOSE, "File \"%s\" is absent", file->path); + else + elog(ERROR, "Cannot remove \"%s\": %s", file->path, + strerror(errno)); + return; } } @@ -301,7 +304,7 @@ delete_backup_files(pgBackup *backup) parray_free(files); backup->status = BACKUP_STATUS_DELETED; - return 0; + return; } /* diff --git a/src/merge.c b/src/merge.c index 137f1acd..3f32fff2 100644 --- a/src/merge.c +++ b/src/merge.c @@ -77,7 +77,8 @@ do_merge(time_t backup_id) { if (backup->status != BACKUP_STATUS_OK && /* It is possible that previous merging was interrupted */ - backup->status != BACKUP_STATUS_MERGING) + backup->status != BACKUP_STATUS_MERGING && + backup->status != BACKUP_STATUS_DELETING) elog(ERROR, "Backup %s has status: %s", base36enc(backup->start_time), status2str(backup->status)); @@ -164,7 +165,14 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) int i; bool merge_isok = true; - elog(LOG, "Merging backup %s with backup %s", from_backup_id, to_backup_id); + elog(INFO, "Merging backup %s with backup %s", from_backup_id, to_backup_id); + + /* + * Previous merging was interrupted during deleting source backup. It is + * safe just to delete it again. + */ + if (from_backup->status == BACKUP_STATUS_DELETING) + goto delete_source_backup; to_backup->status = BACKUP_STATUS_MERGING; write_backup_status(to_backup); @@ -243,68 +251,10 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) if (!merge_isok) elog(ERROR, "Data files merging failed"); - /* - * Files were copied into to_backup and deleted from from_backup. Remove - * remaining directories from from_backup. - */ - parray_qsort(files, pgFileComparePathDesc); - for (i = 0; i < parray_num(files); i++) - { - pgFile *file = (pgFile *) parray_get(files, i); - - if (!S_ISDIR(file->mode)) - continue; - - if (rmdir(file->path)) - elog(ERROR, "Could not remove directory \"%s\": %s", - file->path, strerror(errno)); - } - if (rmdir(from_database_path)) - elog(ERROR, "Could not remove directory \"%s\": %s", - from_database_path, strerror(errno)); - if (unlink(control_file)) - elog(ERROR, "Could not remove file \"%s\": %s", - control_file, strerror(errno)); - - pgBackupGetPath(from_backup, control_file, lengthof(control_file), - BACKUP_CONTROL_FILE); - if (unlink(control_file)) - elog(ERROR, "Could not remove file \"%s\": %s", - control_file, strerror(errno)); - - if (rmdir(from_backup_path)) - elog(ERROR, "Could not remove directory \"%s\": %s", - from_backup_path, strerror(errno)); - - /* - * Delete files which are not in from_backup file list. - */ - for (i = 0; i < parray_num(to_files); i++) - { - pgFile *file = (pgFile *) parray_get(to_files, i); - - if (parray_bsearch(files, file, pgFileComparePathDesc) == NULL) - { - pgFileDelete(file); - elog(LOG, "Deleted \"%s\"", file->path); - } - } - - /* - * Rename FULL backup directory. - */ - if (rename(to_backup_path, from_backup_path) == -1) - elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", - to_backup_path, from_backup_path, strerror(errno)); - /* * Update to_backup metadata. */ - pgBackupCopy(to_backup, from_backup); - /* Correct metadata */ - to_backup->backup_mode = BACKUP_MODE_FULL; to_backup->status = BACKUP_STATUS_OK; - to_backup->parent_backup = INVALID_BACKUP_ID; /* Compute summary of size of regular files in the backup */ to_backup->data_bytes = 0; for (i = 0; i < parray_num(files); i++) @@ -325,7 +275,46 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) else to_backup->wal_bytes = BYTES_INVALID; - pgBackupWriteFileList(to_backup, files, from_database_path); + write_backup_filelist(to_backup, files, from_database_path); + write_backup(to_backup); + +delete_source_backup: + /* + * Files were copied into to_backup. It is time to remove source backup + * entirely. + */ + delete_backup_files(from_backup); + + /* + * Delete files which are not in from_backup file list. + */ + for (i = 0; i < parray_num(to_files); i++) + { + pgFile *file = (pgFile *) parray_get(to_files, i); + + if (parray_bsearch(files, file, pgFileComparePathDesc) == NULL) + { + pgFileDelete(file); + elog(VERBOSE, "Deleted \"%s\"", file->path); + } + } + + /* + * Rename FULL backup directory. + */ + if (rename(to_backup_path, from_backup_path) == -1) + elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", + to_backup_path, from_backup_path, strerror(errno)); + + /* + * Merging finished, now we can safely update ID of the destination backup. + */ + pgBackupCopy(to_backup, from_backup); + elog(INFO, "to_backup: %s", base36enc(to_backup->start_time)); + /* Correct metadata */ + to_backup->backup_mode = BACKUP_MODE_FULL; + to_backup->status = BACKUP_STATUS_OK; + to_backup->parent_backup = INVALID_BACKUP_ID; write_backup(to_backup); /* Cleanup */ @@ -508,10 +497,9 @@ merge_files(void *arg) file->write_size = pgFileSize(to_path_tmp); file->crc = pgFileGetCRC(to_path_tmp, false); } - pgFileDelete(file); } else - move_file(argument->from_root, argument->to_root, file); + copy_file(argument->from_root, argument->to_root, file); if (file->write_size != BYTES_INVALID) elog(LOG, "Moved file \"%s\": " INT64_FORMAT " bytes", diff --git a/src/pg_probackup.h b/src/pg_probackup.h index c6ff6343..cac1d747 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -448,6 +448,7 @@ extern int do_show(time_t requested_backup_id); /* in delete.c */ extern void do_delete(time_t backup_id); +extern void delete_backup_files(pgBackup *backup); extern int do_retention_purge(void); extern int do_delete_instance(void); @@ -478,10 +479,11 @@ extern pgBackup *catalog_get_last_data_backup(parray *backup_list, TimeLineID tli); extern void catalog_lock(void); extern void pgBackupWriteControl(FILE *out, pgBackup *backup); -extern void pgBackupWriteFileList(pgBackup *backup, parray *files, +extern void write_backup_filelist(pgBackup *backup, parray *files, const char *root); -extern void pgBackupGetPath(const pgBackup *backup, char *path, size_t len, const char *subdir); +extern void pgBackupGetPath(const pgBackup *backup, char *path, size_t len, + const char *subdir); extern void pgBackupGetPath2(const pgBackup *backup, char *path, size_t len, const char *subdir1, const char *subdir2); extern int pgBackupCreateDir(pgBackup *backup); @@ -543,7 +545,6 @@ extern void restore_data_file(const char *to_path, bool write_header, uint32 backup_version); extern bool copy_file(const char *from_root, const char *to_root, pgFile *file); -extern void move_file(const char *from_root, const char *to_root, pgFile *file); extern void push_wal_file(const char *from_path, const char *to_path, bool is_compress, bool overwrite); extern void get_wal_file(const char *from_path, const char *to_path); diff --git a/tests/merge.py b/tests/merge.py index 0169b275..db0dc2c3 100644 --- a/tests/merge.py +++ b/tests/merge.py @@ -602,7 +602,7 @@ class MergeTest(ProbackupTest, unittest.TestCase): gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) - gdb.set_breakpoint('move_file') + gdb.set_breakpoint('copy_file') gdb.run_until_break() if gdb.continue_execution_until_break(20) != 'breakpoint-hit': @@ -615,3 +615,10 @@ class MergeTest(ProbackupTest, unittest.TestCase): # Try to continue failed MERGE self.merge_backup(backup_dir, "node", backup_id) + + # Drop node and restore it + node.cleanup() + self.restore_node(backup_dir, 'node', node) + + # Clean after yourself + self.del_test_dir(module_name, fname) From 03c9fba7bd7157fad5f44d87a9ebe497ad9cb0f5 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Wed, 24 Oct 2018 18:53:42 +0300 Subject: [PATCH 20/26] PGPRO-1892: Remove obsolete INFO message --- src/merge.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/merge.c b/src/merge.c index 3f32fff2..5d22002d 100644 --- a/src/merge.c +++ b/src/merge.c @@ -310,7 +310,6 @@ delete_source_backup: * Merging finished, now we can safely update ID of the destination backup. */ pgBackupCopy(to_backup, from_backup); - elog(INFO, "to_backup: %s", base36enc(to_backup->start_time)); /* Correct metadata */ to_backup->backup_mode = BACKUP_MODE_FULL; to_backup->status = BACKUP_STATUS_OK; From fcc4ed74392e727a8c4536a8b410e07a0e3202f8 Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Sat, 27 Oct 2018 12:17:22 +0300 Subject: [PATCH 21/26] PGPRO-1892: added test_continue_failed_merge_with_corrupted_delta_backup --- tests/merge.py | 128 ++++++++++++++++++++++++++++++++++++++++- tests/validate_test.py | 11 ++++ 2 files changed, 138 insertions(+), 1 deletion(-) diff --git a/tests/merge.py b/tests/merge.py index db0dc2c3..abd64565 100644 --- a/tests/merge.py +++ b/tests/merge.py @@ -2,7 +2,7 @@ import unittest import os -from .helpers.ptrack_helpers import ProbackupTest +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException module_name = "merge" @@ -622,3 +622,129 @@ class MergeTest(ProbackupTest, unittest.TestCase): # Clean after yourself self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_continue_failed_merge_with_corrupted_delta_backup(self): + """ + Fail merge via gdb, corrupt DELTA backup, try to continue merge + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + set_replication=True, initdb_params=['--data-checksums'], + pg_options={ + 'wal_level': 'replica' + } + ) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.start() + + # FULL backup + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,1000) i" + ) + + old_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta' + ) + + node.safe_psql( + "postgres", + "update t_heap set id = 100500" + ) + + node.safe_psql( + "postgres", + "vacuum full t_heap" + ) + + new_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + # DELTA BACKUP + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='delta' + ) + + backup_id = self.show_pb(backup_dir, "node")[1]["id"] + + # Failed MERGE + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + gdb.set_breakpoint('copy_file') + gdb.run_until_break() + + if gdb.continue_execution_until_break(2) != 'breakpoint-hit': + print('Failed to hit breakpoint') + exit(1) + + gdb._execute('signal SIGKILL') + + print(self.show_pb(backup_dir, as_text=True, as_json=False)) + + # CORRUPT incremental backup + # read block from future + # block_size + backup_header = 8200 + file = os.path.join( + backup_dir, 'backups/node', backup_id_2, 'database', new_path) + with open(file, 'rb') as f: + f.seek(8200) + block_1 = f.read(8200) + f.close + + # write block from future + file = os.path.join( + backup_dir, 'backups/node', backup_id, 'database', old_path) + with open(file, 'r+b') as f: + f.seek(8200) + f.write(block_1) + f.close + + # Try to continue failed MERGE + try: + self.merge_backup(backup_dir, "node", backup_id) + self.assertEqual( + 1, 0, + "Expecting Error because of incremental backup corruption.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertEqual( + e.message, + 'INSERT ERROR MESSAGE HERE\n', + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Drop node and restore it + node.cleanup() + self.restore_node(backup_dir, 'node', node) + + # Clean after yourself + self.del_test_dir(module_name, fname) + +# 1. always use parent link when merging (intermediates may be from different chain) +# 2. page backup we are merging with may disappear after failed merge, +# it should not be possible to continue merge after that +# PAGE_A MERGING (disappear) +# FULL MERGING + +# FULL MERGING + +# PAGE_B OK (new backup) +# FULL MERGING + +# 3. Need new test with corrupted FULL backup diff --git a/tests/validate_test.py b/tests/validate_test.py index b3590de3..ed3c7f10 100644 --- a/tests/validate_test.py +++ b/tests/validate_test.py @@ -3136,3 +3136,14 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.del_test_dir(module_name, fname) # validate empty backup list +# page from future during validate +# page from future during backup + +# corrupt block, so file become unaligned: +# 712 Assert(header.compressed_size <= BLCKSZ); +# 713 +# 714 read_len = fread(compressed_page.data, 1, +# 715 MAXALIGN(header.compressed_size), in); +# 716 if (read_len != MAXALIGN(header.compressed_size)) +# -> 717 elog(ERROR, "cannot read block %u of \"%s\" read %lu of %d", +# 718 blknum, file->path, read_len, header.compressed_size); From ee6bab40a98cae698eb452f5ae050d6b91b2e9b0 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 9 Nov 2018 18:32:37 +0300 Subject: [PATCH 22/26] PGPRO-1892: Add validation of merged backups --- src/merge.c | 35 +++++++++++++++++++++++++++++++---- tests/merge.py | 14 +++++--------- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/src/merge.c b/src/merge.c index 5d22002d..e3d6b9f8 100644 --- a/src/merge.c +++ b/src/merge.c @@ -59,7 +59,7 @@ do_merge(time_t backup_id) if (instance_name == NULL) elog(ERROR, "required parameter is not specified: --instance"); - elog(LOG, "Merge started"); + elog(INFO, "Merge started"); catalog_lock(); @@ -129,17 +129,21 @@ do_merge(time_t backup_id) */ for (i = full_backup_idx; i > dest_backup_idx; i--) { - pgBackup *to_backup = (pgBackup *) parray_get(backups, i); pgBackup *from_backup = (pgBackup *) parray_get(backups, i - 1); - merge_backups(to_backup, from_backup); + full_backup = (pgBackup *) parray_get(backups, i); + merge_backups(full_backup, from_backup); } + pgBackupValidate(full_backup); + if (full_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Merging of backup %s failed", base36enc(backup_id)); + /* cleanup */ parray_walk(backups, pgBackupFree); parray_free(backups); - elog(LOG, "Merge completed"); + elog(INFO, "Merge of backup %s completed", base36enc(backup_id)); } /* @@ -167,6 +171,28 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) elog(INFO, "Merging backup %s with backup %s", from_backup_id, to_backup_id); + /* + * Validate to_backup only if it is BACKUP_STATUS_OK. If it has + * BACKUP_STATUS_MERGING status then it isn't valid backup until merging + * finished. + */ + if (to_backup->status == BACKUP_STATUS_OK) + { + pgBackupValidate(to_backup); + if (to_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Interrupt merging"); + } + + /* + * It is OK to validate from_backup if it has BACKUP_STATUS_OK or + * BACKUP_STATUS_MERGING status. + */ + Assert(from_backup->status == BACKUP_STATUS_OK || + from_backup->status == BACKUP_STATUS_MERGING); + pgBackupValidate(from_backup); + if (from_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Interrupt merging"); + /* * Previous merging was interrupted during deleting source backup. It is * safe just to delete it again. @@ -302,6 +328,7 @@ delete_source_backup: /* * Rename FULL backup directory. */ + elog(INFO, "Rename %s to %s", to_backup_id, from_backup_id); if (rename(to_backup_path, from_backup_path) == -1) elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", to_backup_path, from_backup_path, strerror(errno)); diff --git a/tests/merge.py b/tests/merge.py index abd64565..826d19f1 100644 --- a/tests/merge.py +++ b/tests/merge.py @@ -694,8 +694,6 @@ class MergeTest(ProbackupTest, unittest.TestCase): gdb._execute('signal SIGKILL') - print(self.show_pb(backup_dir, as_text=True, as_json=False)) - # CORRUPT incremental backup # read block from future # block_size + backup_header = 8200 @@ -723,16 +721,14 @@ class MergeTest(ProbackupTest, unittest.TestCase): "Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertEqual( - e.message, - 'INSERT ERROR MESSAGE HERE\n', + self.assertTrue( + "WARNING: Backup {0} data files are corrupted".format( + backup_id) in e.message and + "ERROR: Merging of backup {0} failed".format( + backup_id) in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(e.message), self.cmd)) - # Drop node and restore it - node.cleanup() - self.restore_node(backup_dir, 'node', node) - # Clean after yourself self.del_test_dir(module_name, fname) From 8e716791ba1f6942d93e7ddfd6e983663fe4bd3b Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Sun, 11 Nov 2018 21:53:00 +0300 Subject: [PATCH 23/26] tests: minor fixes --- tests/compatibility.py | 159 ++++++++++++++++++++++++++ tests/compression.py | 67 +++++++++++ tests/replica.py | 248 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 464 insertions(+), 10 deletions(-) diff --git a/tests/compatibility.py b/tests/compatibility.py index 3d67bf3e..7c3e137f 100644 --- a/tests/compatibility.py +++ b/tests/compatibility.py @@ -311,3 +311,162 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase): if self.paranoia: pgdata_restored = self.pgdata_content(node_restored.data_dir) self.compare_pgdata(pgdata, pgdata_restored) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_compression(self): + """Description in jira issue PGPRO-434""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.pgbench_init(scale=10) + + # FULL backup with OLD binary + backup_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + # restore OLD FULL with new binary + node_restored = self.make_simple_node( + base_dir="{0}/{1}/node_restored".format(module_name, fname)) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # PAGE backup with OLD binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='page', + old_binary=True, + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # PAGE backup with new binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='page', + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Delta backup with old binary + self.delete_pb(backup_dir, 'node', backup_id) + + self.backup_node( + backup_dir, 'node', node, + old_binary=True, + options=['--compress']) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=['--compress'], + old_binary=True) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Delta backup with new binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) diff --git a/tests/compression.py b/tests/compression.py index aa275382..54bc299c 100644 --- a/tests/compression.py +++ b/tests/compression.py @@ -494,3 +494,70 @@ class CompressionTest(ProbackupTest, unittest.TestCase): # Clean after yourself self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_uncompressable_pages(self): + """ + make archive node, create table with uncompressable toast pages, + take backup with compression, make sure that page was not compressed, + restore backup and check data correctness + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'wal_level': 'replica', + 'max_wal_senders': '2', + 'checkpoint_timeout': '30s'} + ) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + +# node.safe_psql( +# "postgres", +# "create table t_heap as select i, " +# "repeat('1234567890abcdefghiyklmn', 1)::bytea, " +# "point(0,0) from generate_series(0,1) i") + + node.safe_psql( + "postgres", + "create table t as select i, " + "repeat(md5(i::text),5006056) as fat_attr " + "from generate_series(0,10) i;") + + self.backup_node( + backup_dir, 'node', node, + backup_type='full', + options=[ + '--compress', + '--log-level-file=verbose']) + + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + node.slow_start() + + self.backup_node( + backup_dir, 'node', node, + backup_type='full', + options=[ + '--compress', + '--log-level-file=verbose']) + + # Clean after yourself + # self.del_test_dir(module_name, fname) + +# create table t as select i, repeat(md5('1234567890'), 1)::bytea, point(0,0) from generate_series(0,1) i; + + +# create table t_bytea_1(file oid); +# INSERT INTO t_bytea_1 (file) +# VALUES (lo_import('/home/gsmol/git/postgres/contrib/pg_probackup/tests/expected/sample.random', 24593)); +# insert into t_bytea select string_agg(data,'') from pg_largeobject where pageno > 0; +# \ No newline at end of file diff --git a/tests/replica.py b/tests/replica.py index d74c375c..f8c16607 100644 --- a/tests/replica.py +++ b/tests/replica.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta import subprocess from sys import exit import time +from shutil import copyfile module_name = 'replica' @@ -64,6 +65,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "from generate_series(256,512) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") self.add_instance(backup_dir, 'replica', replica) + backup_id = self.backup_node( backup_dir, 'replica', replica, options=[ @@ -80,9 +82,11 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): base_dir="{0}/{1}/node".format(module_name, fname)) node.cleanup() self.restore_node(backup_dir, 'replica', data_dir=node.data_dir) + node.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(node.port)) node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) @@ -95,7 +99,9 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " "from generate_series(512,768) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") + backup_id = self.backup_node( backup_dir, 'replica', replica, backup_type='ptrack', options=[ @@ -111,9 +117,11 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): node.cleanup() self.restore_node( backup_dir, 'replica', data_dir=node.data_dir, backup_id=backup_id) + node.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(node.port)) node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) @@ -136,13 +144,12 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): pg_options={ 'wal_level': 'replica', 'max_wal_senders': '2', - 'checkpoint_timeout': '30s'} + 'checkpoint_timeout': '30s', + 'archive_timeout': '10s'} ) self.init_pb(backup_dir) self.add_instance(backup_dir, 'master', master) self.set_archiving(backup_dir, 'master', master) - # force more frequent wal switch - master.append_conf('postgresql.auto.conf', 'archive_timeout = 10') master.slow_start() replica = self.make_simple_node( @@ -180,8 +187,14 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " "from generate_series(256,512) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") self.add_instance(backup_dir, 'replica', replica) + + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000003'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000003')) + backup_id = self.backup_node( backup_dir, 'replica', replica, options=[ @@ -201,9 +214,11 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): node.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(node.port)) node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) + node.cleanup() # Change data on master, make PAGE backup from replica, # restore taken backup and check that restored data equal @@ -212,30 +227,41 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "postgres", "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(512,768) i") + "from generate_series(512,22680) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") + backup_id = self.backup_node( - backup_dir, 'replica', replica, backup_type='page', + backup_dir, 'replica', + replica, backup_type='page', options=[ '--archive-timeout=300', '--master-host=localhost', '--master-db=postgres', '--master-port={0}'.format(master.port)]) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) # RESTORE PAGE BACKUP TAKEN FROM replica - node.cleanup() self.restore_node( backup_dir, 'replica', data_dir=node.data_dir, backup_id=backup_id) + node.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(node.port)) + node.append_conf( + 'postgresql.auto.conf', 'archive_mode = off') node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) + self.add_instance(backup_dir, 'node', node) + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + # Clean after yourself self.del_test_dir(module_name, fname) @@ -279,15 +305,217 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): backup_id = self.backup_node( backup_dir, 'master', master, backup_type='page') self.restore_node( - backup_dir, 'master', replica, - options=['-R', '--recovery-target-action=promote']) + backup_dir, 'master', replica, options=['-R']) # Settings for Replica - # self.set_replica(master, replica) self.set_archiving(backup_dir, 'replica', replica, replica=True) replica.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(replica.port)) - replica.start() + replica.append_conf( + 'postgresql.auto.conf', 'hot_standby = on') + + replica.slow_start(replica=True) + + self.add_instance(backup_dir, 'replica', replica) + + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000003'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000003')) + + self.backup_node(backup_dir, 'replica', replica) # Clean after yourself self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_take_backup_from_delayed_replica(self): + """ + make archive master, take full backups from master, + restore full backup as delayed replica, launch pgbench, + take FULL, PAGE and DELTA backups from replica + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir="{0}/{1}/master".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'wal_level': 'replica', 'max_wal_senders': '2', + 'checkpoint_timeout': '30s'} + ) + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + self.set_archiving(backup_dir, 'master', master) + # force more frequent wal switch + #master.append_conf('postgresql.auto.conf', 'archive_timeout = 10') + master.slow_start() + + replica = self.make_simple_node( + base_dir="{0}/{1}/replica".format(module_name, fname)) + replica.cleanup() + + self.backup_node(backup_dir, 'master', master) + + self.restore_node( + backup_dir, 'master', replica, options=['-R']) + + # Settings for Replica + self.add_instance(backup_dir, 'replica', replica) + self.set_archiving(backup_dir, 'replica', replica, replica=True) + + # stupid hack + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000001'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000001')) + + replica.append_conf( + 'postgresql.auto.conf', 'port = {0}'.format(replica.port)) + + replica.append_conf( + 'postgresql.auto.conf', 'hot_standby = on') + + replica.append_conf( + 'recovery.conf', "recovery_min_apply_delay = '300s'") + + replica.slow_start(replica=True) + + master.pgbench_init(scale=10) + + pgbench = master.pgbench( + options=['-T', '30', '-c', '2', '--no-vacuum']) + + self.backup_node( + backup_dir, 'replica', replica) + + self.backup_node( + backup_dir, 'replica', replica, + data_dir=replica.data_dir, backup_type='page') + + self.backup_node( + backup_dir, 'replica', replica, backup_type='delta') + + pgbench.wait() + + pgbench = master.pgbench( + options=['-T', '30', '-c', '2', '--no-vacuum']) + + self.backup_node( + backup_dir, 'replica', replica, + options=['--stream']) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='page', options=['--stream']) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='delta', options=['--stream']) + + pgbench.wait() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_make_block_from_future(self): + """ + make archive master, take full backups from master, + restore full backup as replica, launch pgbench, + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir="{0}/{1}/master".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'wal_level': 'replica', 'max_wal_senders': '2', + 'checkpoint_timeout': '30s'} + ) + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + self.set_archiving(backup_dir, 'master', master) + # force more frequent wal switch + #master.append_conf('postgresql.auto.conf', 'archive_timeout = 10') + master.slow_start() + + replica = self.make_simple_node( + base_dir="{0}/{1}/replica".format(module_name, fname)) + replica.cleanup() + + self.backup_node(backup_dir, 'master', master) + + self.restore_node( + backup_dir, 'master', replica, options=['-R']) + + # Settings for Replica + self.set_archiving(backup_dir, 'replica', replica, replica=True) + replica.append_conf( + 'postgresql.auto.conf', 'port = {0}'.format(replica.port)) + replica.append_conf( + 'postgresql.auto.conf', 'hot_standby = on') + + replica.slow_start(replica=True) + + self.add_instance(backup_dir, 'replica', replica) + + replica.safe_psql( + 'postgres', + 'checkpoint') + + master.pgbench_init(scale=10) + + self.wait_until_replica_catch_with_master(master, replica) + + +# print(replica.safe_psql( +# 'postgres', +# 'select * from pg_catalog.pg_last_xlog_receive_location()')) +# +# print(replica.safe_psql( +# 'postgres', +# 'select * from pg_catalog.pg_last_xlog_replay_location()')) +# +# print(replica.safe_psql( +# 'postgres', +# 'select * from pg_catalog.pg_control_checkpoint()')) +# +# replica.safe_psql( +# 'postgres', +# 'checkpoint') + + pgbench = master.pgbench(options=['-T', '30', '-c', '2', '--no-vacuum']) + + time.sleep(5) + + #self.backup_node(backup_dir, 'replica', replica, options=['--stream']) + exit(1) + self.backup_node(backup_dir, 'replica', replica, options=["--log-level-file=verbose"]) + pgbench.wait() + + # pgbench + master.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,256000) i") + + + master.safe_psql( + 'postgres', + 'checkpoint') + + replica.safe_psql( + 'postgres', + 'checkpoint') + + replica.safe_psql( + 'postgres', + 'select * from pg_') + + self.backup_node(backup_dir, 'replica', replica) + exit(1) + + # Clean after yourself + self.del_test_dir(module_name, fname) \ No newline at end of file From d2271554a2173270857e57d715d1d64a07e6ebaf Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Mon, 12 Nov 2018 11:51:58 +0300 Subject: [PATCH 24/26] tests: minor fixes --- tests/archive.py | 70 +++++++++++++++++++++++++++-------------- tests/backup_test.py | 2 +- tests/compatibility.py | 9 ++---- tests/compression.py | 10 ++---- tests/delta.py | 34 +++++++------------- tests/exclude.py | 2 +- tests/false_positive.py | 4 +-- tests/merge.py | 4 +-- tests/page.py | 11 +++---- tests/ptrack.py | 52 +++++++++++++----------------- tests/ptrack_clean.py | 2 +- tests/ptrack_empty.py | 2 +- tests/replica.py | 26 ++++++++++----- tests/validate_test.py | 24 ++++++-------- 14 files changed, 125 insertions(+), 127 deletions(-) diff --git a/tests/archive.py b/tests/archive.py index 8b8eb71a..4ed783d6 100644 --- a/tests/archive.py +++ b/tests/archive.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta import subprocess from sys import exit from time import sleep +from shutil import copyfile module_name = 'archive' @@ -39,8 +40,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): result = node.safe_psql("postgres", "SELECT * FROM t_heap") self.backup_node( - backup_dir, 'node', node, - options=["--log-level-file=verbose"]) + backup_dir, 'node', node) node.cleanup() self.restore_node( @@ -53,8 +53,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): # Make backup self.backup_node( - backup_dir, 'node', node, - options=["--log-level-file=verbose"]) + backup_dir, 'node', node) node.cleanup() # Restore Database @@ -253,7 +252,6 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): backup_dir, 'node', node, options=[ "--archive-timeout=60", - "--log-level-file=verbose", "--stream"] ) # we should die here because exception is what we expect to happen @@ -402,7 +400,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): self.del_test_dir(module_name, fname) # @unittest.expectedFailure - # @unittest.skip("skip") + @unittest.skip("skip") def test_replica_archive(self): """ make node without archiving, take stream backup and @@ -417,7 +415,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): initdb_params=['--data-checksums'], pg_options={ 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', + 'archive_timeout': '10s', 'max_wal_size': '1GB'} ) self.init_pb(backup_dir) @@ -433,7 +431,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): "postgres", "create table t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,256) i") + "from generate_series(0,2560) i") self.backup_node(backup_dir, 'master', master, options=['--stream']) before = master.safe_psql("postgres", "SELECT * FROM t_heap") @@ -459,9 +457,6 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): "md5(repeat(i::text,10))::tsvector as tsvector " "from generate_series(256,512) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") - # ADD INSTANCE 'REPLICA' - - sleep(1) backup_id = self.backup_node( backup_dir, 'replica', replica, @@ -469,7 +464,9 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): '--archive-timeout=30', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)]) + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) @@ -493,16 +490,28 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): "postgres", "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(512,768) i") + "from generate_series(512,20680) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") + + master.safe_psql( + "postgres", + "CHECKPOINT") + +# copyfile( +# os.path.join(backup_dir, 'wal/master/000000010000000000000002'), +# os.path.join(backup_dir, 'wal/replica/000000010000000000000002')) + backup_id = self.backup_node( backup_dir, 'replica', replica, backup_type='page', options=[ - '--archive-timeout=30', '--log-level-file=verbose', - '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)] - ) + '--archive-timeout=30', + '--master-db=postgres', + '--master-host=localhost', + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) @@ -511,8 +520,10 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): node.cleanup() self.restore_node( backup_dir, 'replica', data_dir=node.data_dir, backup_id=backup_id) + node.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(node.port)) + node.slow_start() # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") @@ -537,7 +548,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'checkpoint_timeout': '30s'} + 'archive_timeout': '10s'} ) replica = self.make_simple_node( base_dir="{0}/{1}/replica".format(module_name, fname)) @@ -568,7 +579,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): pgdata_replica = self.pgdata_content(replica.data_dir) self.compare_pgdata(pgdata_master, pgdata_replica) - self.set_replica(master, replica, synchronous=True) + self.set_replica(master, replica) # ADD INSTANCE REPLICA self.add_instance(backup_dir, 'replica', replica) # SET ARCHIVING FOR REPLICA @@ -579,16 +590,26 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): after = replica.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) + master.psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0, 60000) i") + # TAKE FULL ARCHIVE BACKUP FROM REPLICA + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000001'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000001')) + backup_id = self.backup_node( backup_dir, 'replica', replica, options=[ - '--archive-timeout=20', - '--log-level-file=verbose', + '--archive-timeout=30', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)] - ) + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) @@ -618,7 +639,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'checkpoint_timeout': '30s'} + 'checkpoint_timeout': '30s', + 'archive_timeout': '10s'} ) replica = self.make_simple_node( base_dir="{0}/{1}/replica".format(module_name, fname)) diff --git a/tests/backup_test.py b/tests/backup_test.py index b21aab9b..9c8a0955 100644 --- a/tests/backup_test.py +++ b/tests/backup_test.py @@ -328,7 +328,7 @@ class BackupTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type="full", - options=["-j", "4", "--stream", '--log-level-file=verbose']) + options=["-j", "4", "--stream", "--log-level-file=verbose"]) # open log file and check with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: diff --git a/tests/compatibility.py b/tests/compatibility.py index 7c3e137f..39070b3f 100644 --- a/tests/compatibility.py +++ b/tests/compatibility.py @@ -94,8 +94,7 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase): pgbench.stdout.close() self.backup_node( - backup_dir, 'node', node, backup_type='page', - options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='page') if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -195,8 +194,7 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase): pgbench.stdout.close() self.backup_node( - backup_dir, 'node', node, backup_type='delta', - options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='delta') if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -296,8 +294,7 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase): pgbench.stdout.close() self.backup_node( - backup_dir, 'node', node, backup_type='delta', - options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='delta') if self.paranoia: pgdata = self.pgdata_content(node.data_dir) diff --git a/tests/compression.py b/tests/compression.py index 54bc299c..2e712a15 100644 --- a/tests/compression.py +++ b/tests/compression.py @@ -55,9 +55,7 @@ class CompressionTest(ProbackupTest, unittest.TestCase): page_backup_id = self.backup_node( backup_dir, 'node', node, backup_type='page', options=[ - '--stream', '--compress-algorithm=zlib', - '--log-level-console=verbose', - '--log-level-file=verbose']) + '--stream', '--compress-algorithm=zlib']) # PTRACK BACKUP node.safe_psql( @@ -535,8 +533,7 @@ class CompressionTest(ProbackupTest, unittest.TestCase): backup_dir, 'node', node, backup_type='full', options=[ - '--compress', - '--log-level-file=verbose']) + '--compress']) node.cleanup() @@ -547,8 +544,7 @@ class CompressionTest(ProbackupTest, unittest.TestCase): backup_dir, 'node', node, backup_type='full', options=[ - '--compress', - '--log-level-file=verbose']) + '--compress']) # Clean after yourself # self.del_test_dir(module_name, fname) diff --git a/tests/delta.py b/tests/delta.py index bdbfac91..55cc03be 100644 --- a/tests/delta.py +++ b/tests/delta.py @@ -80,13 +80,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): pgdata = self.pgdata_content(node.data_dir) self.restore_node( - backup_dir, - 'node', - node_restored, - options=[ - "-j", "1", - "--log-level-file=verbose" - ] + backup_dir, 'node', node_restored ) # Physical comparison @@ -176,8 +170,6 @@ class DeltaTest(ProbackupTest, unittest.TestCase): 'node', node_restored, options=[ - "-j", "1", - "--log-level-file=verbose", "-T", "{0}={1}".format( old_tablespace, new_tablespace)] ) @@ -251,13 +243,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): pgdata = self.pgdata_content(node.data_dir) self.restore_node( - backup_dir, - 'node', - node_restored, - options=[ - "-j", "1", - "--log-level-file=verbose" - ] + backup_dir, 'node', node_restored ) # Physical comparison @@ -683,7 +669,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): node_restored, backup_id=backup_id, options=[ - "-j", "4", "--log-level-file=verbose", + "-j", "4", "--immediate", "--recovery-target-action=promote"]) @@ -717,7 +703,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): node_restored, backup_id=backup_id, options=[ - "-j", "4", "--log-level-file=verbose", + "-j", "4", "--immediate", "--recovery-target-action=promote"] ) @@ -815,7 +801,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): backup_id = self.backup_node( backup_dir, 'node', node, backup_type='delta', - options=["--stream", "--log-level-file=verbose"] + options=["--stream"] ) # if self.paranoia: # pgdata_delta = self.pgdata_content( @@ -844,7 +830,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): node_restored, backup_id=backup_id, options=[ - "-j", "4", "--log-level-file=verbose", + "-j", "4", "--immediate", "--recovery-target-action=promote"]) @@ -1135,7 +1121,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): self.del_test_dir(module_name, fname) # @unittest.skip("skip") - def test_page_corruption_heal_via_ptrack_1(self): + def test_delta_corruption_heal_via_ptrack_1(self): """make node, corrupt some page, check that backup failed""" fname = self.id().split('.')[3] node = self.make_simple_node( @@ -1174,8 +1160,10 @@ class DeltaTest(ProbackupTest, unittest.TestCase): f.close self.backup_node( - backup_dir, 'node', node, backup_type="delta", - options=["-j", "4", "--stream", "--log-level-file=verbose"]) + backup_dir, 'node', node, + backup_type="delta", + options=["-j", "4", "--stream", '--log-level-file=verbose']) + # open log file and check with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: diff --git a/tests/exclude.py b/tests/exclude.py index 48b7889c..3fd3341f 100644 --- a/tests/exclude.py +++ b/tests/exclude.py @@ -143,7 +143,7 @@ class ExcludeTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + options=['--stream'] ) pgdata = self.pgdata_content(node.data_dir) diff --git a/tests/false_positive.py b/tests/false_positive.py index 04062b79..df7b1334 100644 --- a/tests/false_positive.py +++ b/tests/false_positive.py @@ -143,7 +143,7 @@ class FalsePositive(ProbackupTest, unittest.TestCase): self.backup_node(backup_dir, 'node', node, options=['--stream']) gdb = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'], + options=['--stream'], gdb=True ) @@ -227,7 +227,7 @@ class FalsePositive(ProbackupTest, unittest.TestCase): self.backup_node(backup_dir, 'node', node, options=['--stream']) gdb = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'], + options=['--stream'], gdb=True ) diff --git a/tests/merge.py b/tests/merge.py index 826d19f1..5f7ae7da 100644 --- a/tests/merge.py +++ b/tests/merge.py @@ -407,17 +407,17 @@ class MergeTest(ProbackupTest, unittest.TestCase): node.safe_psql( "postgres", "delete from t_heap where ctid >= '(11,0)'") + node.safe_psql( "postgres", "vacuum t_heap") - self.backup_node( + page_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack') if self.paranoia: pgdata = self.pgdata_content(node.data_dir) - page_id = self.show_pb(backup_dir, "node")[1]["id"] self.merge_backup(backup_dir, "node", page_id) self.validate_pb(backup_dir) diff --git a/tests/page.py b/tests/page.py index 1d8238e1..d31b8f60 100644 --- a/tests/page.py +++ b/tests/page.py @@ -62,8 +62,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): "vacuum t_heap") self.backup_node( - backup_dir, 'node', node, backup_type='page', - options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='page') self.backup_node( backup_dir, 'node', node, backup_type='page') @@ -333,8 +332,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): result = node.safe_psql("postgres", "select * from pgbench_accounts") # PAGE BACKUP self.backup_node( - backup_dir, 'node', node, backup_type='page', - options=["--log-level-file=verbose"]) + backup_dir, 'node', node, backup_type='page') # GET PHYSICAL CONTENT FROM NODE pgdata = self.pgdata_content(node.data_dir) @@ -727,7 +725,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='page', - options=["-j", "4", '--log-level-file=verbose']) + options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of wal segment disappearance.\n " @@ -797,8 +795,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): # Single-thread PAGE backup try: self.backup_node( - backup_dir, 'node', node, - backup_type='page', options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='page') self.assertEqual( 1, 0, "Expecting Error because of wal segment disappearance.\n " diff --git a/tests/ptrack.py b/tests/ptrack.py index 72159318..5d01d882 100644 --- a/tests/ptrack.py +++ b/tests/ptrack.py @@ -157,13 +157,13 @@ class PtrackTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + options=['--stream'] ) pgdata = self.pgdata_content(node.data_dir) self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + options=['--stream'] ) self.restore_node( @@ -246,14 +246,11 @@ class PtrackTest(ProbackupTest, unittest.TestCase): exit(1) self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack') self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack') + if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -336,14 +333,10 @@ class PtrackTest(ProbackupTest, unittest.TestCase): ) self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack') self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack') if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -409,7 +402,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + options=['--stream'] ) node.safe_psql( @@ -479,7 +472,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): self.backup_node(backup_dir, 'node', node, options=['--stream']) gdb = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'], + options=['--stream'], gdb=True ) @@ -566,7 +559,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): ptrack_backup_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + options=['--stream'] ) if self.paranoia: @@ -989,7 +982,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): node.safe_psql("postgres", "SELECT * FROM t_heap") self.backup_node( backup_dir, 'node', node, - options=["--stream", "--log-level-file=verbose"]) + options=["--stream"]) # CREATE DATABASE DB1 node.safe_psql("postgres", "create database db1") @@ -1002,7 +995,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): backup_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"] + options=["--stream"] ) if self.paranoia: @@ -1133,7 +1126,8 @@ class PtrackTest(ProbackupTest, unittest.TestCase): '-j10', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(node.port) + '--master-port={0}'.format(node.port), + '--stream' ] ) @@ -1229,7 +1223,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"] + options=["--stream"] ) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -1315,7 +1309,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): # PTRACK BACKUP self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", '--log-level-file=verbose']) + options=["--stream"]) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -1476,7 +1470,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): # FIRTS PTRACK BACKUP self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"]) + options=["--stream"]) # GET PHYSICAL CONTENT FROM NODE if self.paranoia: @@ -1517,7 +1511,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): # SECOND PTRACK BACKUP self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"]) + options=["--stream"]) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -1612,9 +1606,8 @@ class PtrackTest(ProbackupTest, unittest.TestCase): #result = node.safe_psql("postgres", "select * from pgbench_accounts") # FIRTS PTRACK BACKUP self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=["--log-level-file=verbose"] - ) + backup_dir, 'node', node, backup_type='ptrack') + # GET PHYSICAL CONTENT FROM NODE pgdata = self.pgdata_content(node.data_dir) @@ -1683,9 +1676,8 @@ class PtrackTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='ptrack', options=[ - "--stream", "-j 30", - "--log-level-file=verbose"] - ) + "--stream", "-j 30"]) + # we should die here because exception is what we expect to happen self.assertEqual( 1, 0, diff --git a/tests/ptrack_clean.py b/tests/ptrack_clean.py index ae16c662..076291a6 100644 --- a/tests/ptrack_clean.py +++ b/tests/ptrack_clean.py @@ -76,7 +76,7 @@ class SimpleTest(ProbackupTest, unittest.TestCase): # Take PTRACK backup to clean every ptrack backup_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['-j10', '--log-level-file=verbose']) + options=['-j10']) node.safe_psql('postgres', 'checkpoint') for i in idx_ptrack: diff --git a/tests/ptrack_empty.py b/tests/ptrack_empty.py index 750a7336..8656f941 100644 --- a/tests/ptrack_empty.py +++ b/tests/ptrack_empty.py @@ -67,7 +67,7 @@ class SimpleTest(ProbackupTest, unittest.TestCase): # Take PTRACK backup backup_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['-j10', '--log-level-file=verbose']) + options=['-j10']) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) diff --git a/tests/replica.py b/tests/replica.py index f8c16607..1ab8515e 100644 --- a/tests/replica.py +++ b/tests/replica.py @@ -162,7 +162,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "postgres", "create table t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,256) i") + "from generate_series(0,2560) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") @@ -173,6 +173,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): # Settings for Replica self.set_replica(master, replica) self.set_archiving(backup_dir, 'replica', replica, replica=True) + replica.slow_start(replica=True) # Check data correctness on replica @@ -186,7 +187,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "postgres", "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(256,512) i") + "from generate_series(256,5120) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") self.add_instance(backup_dir, 'replica', replica) @@ -195,13 +196,23 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): os.path.join(backup_dir, 'wal/master/000000010000000000000003'), os.path.join(backup_dir, 'wal/replica/000000010000000000000003')) + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000004'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000004')) + + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000005'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000005')) + backup_id = self.backup_node( backup_dir, 'replica', replica, options=[ - '--archive-timeout=300', + '--archive-timeout=30', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)]) + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) @@ -235,10 +246,11 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): backup_dir, 'replica', replica, backup_type='page', options=[ - '--archive-timeout=300', + '--archive-timeout=30', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)]) + '--master-port={0}'.format(master.port), + '--stream']) self.validate_pb(backup_dir, 'replica') self.assertEqual( @@ -491,7 +503,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): #self.backup_node(backup_dir, 'replica', replica, options=['--stream']) exit(1) - self.backup_node(backup_dir, 'replica', replica, options=["--log-level-file=verbose"]) + self.backup_node(backup_dir, 'replica', replica) pgbench.wait() # pgbench diff --git a/tests/validate_test.py b/tests/validate_test.py index ed3c7f10..c0fd4943 100644 --- a/tests/validate_test.py +++ b/tests/validate_test.py @@ -50,7 +50,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): f.close self.backup_node( - backup_dir, 'node', node, options=["--log-level-file=verbose"]) + backup_dir, 'node', node, options=['--log-level-file=verbose']) log_file_path = os.path.join(backup_dir, "log", "pg_probackup.log") @@ -259,8 +259,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Simple validate try: self.validate_pb( - backup_dir, 'node', backup_id=backup_id_2, - options=['--log-level-file=verbose']) + backup_dir, 'node', backup_id=backup_id_2) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -364,8 +363,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate PAGE1 try: self.validate_pb( - backup_dir, 'node', backup_id=backup_id_2, - options=['--log-level-file=verbose']) + backup_dir, 'node', backup_id=backup_id_2) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -520,8 +518,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): try: self.validate_pb( backup_dir, 'node', - backup_id=backup_id_4, - options=['--log-level-file=verbose']) + backup_id=backup_id_4) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n" @@ -721,7 +718,6 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.validate_pb( backup_dir, 'node', options=[ - '--log-level-file=verbose', '-i', backup_id_4, '--xid={0}'.format(target_xid)]) self.assertEqual( 1, 0, @@ -866,7 +862,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate Instance try: self.validate_pb( - backup_dir, 'node', options=['--log-level-file=verbose']) + backup_dir, 'node') self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -1006,7 +1002,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate Instance try: - self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose']) + self.validate_pb(backup_dir, 'node') self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: @@ -1092,7 +1088,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate Instance try: - self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose']) + self.validate_pb(backup_dir, 'node') self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: @@ -1219,7 +1215,6 @@ class ValidateTest(ProbackupTest, unittest.TestCase): 'node', backup_id, options=[ - "--log-level-console=verbose", "--xid={0}".format(target_xid)]) self.assertEqual( 1, 0, @@ -1388,7 +1383,6 @@ class ValidateTest(ProbackupTest, unittest.TestCase): 'node', backup_id, options=[ - "--log-level-console=verbose", "--xid={0}".format(target_xid)]) self.assertEqual( 1, 0, @@ -1671,7 +1665,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(file_new, file) try: - self.validate_pb(backup_dir, options=['--log-level-file=verbose']) + self.validate_pb(backup_dir) except ProbackupException as e: self.assertIn( 'WARNING: Some backups are not valid'.format( @@ -1776,7 +1770,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(file, file_new) try: - self.validate_pb(backup_dir, options=['--log-level-file=verbose']) + self.validate_pb(backup_dir) except ProbackupException as e: self.assertIn( 'WARNING: Some backups are not valid'.format( From 6eefeeba8dfe7280252d3abb1e6f424e0f2929ac Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Mon, 12 Nov 2018 12:15:54 +0300 Subject: [PATCH 25/26] tests: expected help fixed --- tests/expected/option_help.out | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/expected/option_help.out b/tests/expected/option_help.out index 228598ed..ecc59a89 100644 --- a/tests/expected/option_help.out +++ b/tests/expected/option_help.out @@ -50,6 +50,7 @@ pg_probackup - utility to manage backup/recovery of PostgreSQL database. [--master-db=db_name] [--master-host=host_name] [--master-port=port] [--master-user=user_name] [--replica-timeout=timeout] + [--skip-block-validation] pg_probackup restore -B backup-path --instance=instance_name [-D pgdata-path] [-i backup-id] [--progress] @@ -59,12 +60,14 @@ pg_probackup - utility to manage backup/recovery of PostgreSQL database. [--recovery-target-action=pause|promote|shutdown] [--restore-as-replica] [--no-validate] + [--skip-block-validation] pg_probackup validate -B backup-path [--instance=instance_name] [-i backup-id] [--progress] [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]] [--recovery-target-name=target-name] [--timeline=timeline] + [--skip-block-validation] pg_probackup show -B backup-path [--instance=instance_name [-i backup-id]] From de9c3b64d72693283b6514f0d0d5773613b3c971 Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Mon, 12 Nov 2018 12:16:34 +0300 Subject: [PATCH 26/26] Version 2.0.24 - Major bugfix: incorrect handling of badly compressed blocks, previously there was a risk to restore block in uncompressed state, if compressed size was equal or larger than BLCKSZ - Impromevent: backup from replica >= 9.6 no longer need connection to master - Workaround: wrong minRecPoint in PostgreSQL thanks to commit 8d68ee6(block from future), overwrite minRecPoint with latest applied LSN - Impromevent: merge is now considered stable feature - Impromevent: validation now use more conservative and paranoid approach to file validation, during validation pg_probackup also check block checksumm, make sanity check based on block header information and try to detect blocks from future - New validate/restore options: '--skip-block-validation' - disable aforementioned approach to file validation - Multiple minor fixes --- src/pg_probackup.c | 2 +- tests/expected/option_version.out | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pg_probackup.c b/src/pg_probackup.c index ea99672d..00b0fc42 100644 --- a/src/pg_probackup.c +++ b/src/pg_probackup.c @@ -17,7 +17,7 @@ #include "utils/thread.h" -const char *PROGRAM_VERSION = "2.0.23"; +const char *PROGRAM_VERSION = "2.0.24"; const char *PROGRAM_URL = "https://github.com/postgrespro/pg_probackup"; const char *PROGRAM_EMAIL = "https://github.com/postgrespro/pg_probackup/issues"; diff --git a/tests/expected/option_version.out b/tests/expected/option_version.out index 6a0391c2..5280a6f9 100644 --- a/tests/expected/option_version.out +++ b/tests/expected/option_version.out @@ -1 +1 @@ -pg_probackup 2.0.23 \ No newline at end of file +pg_probackup 2.0.24 \ No newline at end of file