diff --git a/COPYRIGHT b/COPYRIGHT deleted file mode 100644 index 49d70472..00000000 --- a/COPYRIGHT +++ /dev/null @@ -1,29 +0,0 @@ -Copyright (c) 2015-2017, Postgres Professional -Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - -Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group -Portions Copyright (c) 1994, The Regents of the University of California - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the NIPPON TELEGRAPH AND TELEPHONE CORPORATION - (NTT) nor the names of its contributors may be used to endorse or - promote products derived from this software without specific prior - written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..3969eaa8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2015-2018, Postgres Professional +Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION + +Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group +Portions Copyright (c) 1994, The Regents of the University of California + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose, without fee, and without a written agreement +is hereby granted, provided that the above copyright notice and this +paragraph and the following two paragraphs appear in all copies. + +IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR +DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING +LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS +DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO +PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/README.md b/README.md index 73936ae1..53945eb8 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ The utility is compatible with: `PTRACK` backup support provided via following options: * vanilla PostgreSQL compiled with ptrack patch. Currently there are patches for [PostgreSQL 9.6](https://gist.githubusercontent.com/gsmol/5b615c971dfd461c76ef41a118ff4d97/raw/e471251983f14e980041f43bea7709b8246f4178/ptrack_9.6.6_v1.5.patch) and [PostgreSQL 10](https://gist.githubusercontent.com/gsmol/be8ee2a132b88463821021fd910d960e/raw/de24f9499f4f314a4a3e5fae5ed4edb945964df8/ptrack_10.1_v1.5.patch) -* Postgres Pro Standard 9.5, 9.6, 10 +* Postgres Pro Standard 9.5, 9.6, 10, 11 * Postgres Pro Enterprise 9.5, 9.6, 10 As compared to other backup solutions, `pg_probackup` offers the following benefits that can help you implement different backup strategies and deal with large amounts of data: @@ -38,7 +38,7 @@ Regardless of the chosen backup type, all backups taken with `pg_probackup` supp `pg_probackup` currently has the following limitations: * Creating backups from a remote server is currently not supported. -* The server from which the backup was taken and the restored server must be compatible by the [block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#guc-block-size) and [wal_block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#guc-wal-block-size) parameters and have the same major release number. +* The server from which the backup was taken and the restored server must be compatible by the [block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#GUC-BLOCK-SIZE) and [wal_block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#GUC-WAL-BLOCK-SIZE) parameters and have the same major release number. * Microsoft Windows operating system is not supported. * Configuration files outside of PostgreSQL data directory are not included into the backup and should be backed up separately. @@ -85,7 +85,7 @@ Currently the latest documentation can be found at [Postgres Pro Enterprise docu ## Licence -This module available under the same license as [PostgreSQL](https://www.postgresql.org/about/licence/). +This module available under the [license](LICENSE) similar to [PostgreSQL](https://www.postgresql.org/about/licence/). ## Feedback diff --git a/src/archive.c b/src/archive.c index e26d17b6..2953b89e 100644 --- a/src/archive.c +++ b/src/archive.c @@ -3,7 +3,7 @@ * archive.c: - pg_probackup specific archive commands for archive backups. * * - * Portions Copyright (c) 2017, Postgres Professional + * Portions Copyright (c) 2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/backup.c b/src/backup.c index 0dd681fa..2317bee9 100644 --- a/src/backup.c +++ b/src/backup.c @@ -3,7 +3,7 @@ * backup.c: backup DB cluster, archived WAL * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -475,6 +475,8 @@ do_backup_instance(void) pgBackup *prev_backup = NULL; parray *prev_backup_filelist = NULL; + pgFile *pg_control = NULL; + elog(LOG, "Database backup start"); /* Initialize size summary */ @@ -754,9 +756,34 @@ do_backup_instance(void) parray_free(prev_backup_filelist); } + /* In case of backup from replica >= 9.6 we must fix minRecPoint, + * First we must find pg_control in backup_files_list. + */ + if (current.from_replica && !exclusive_backup) + { + char pg_control_path[MAXPGPATH]; + + snprintf(pg_control_path, sizeof(pg_control_path), "%s/%s", pgdata, "global/pg_control"); + + for (i = 0; i < parray_num(backup_files_list); i++) + { + pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i); + + if (strcmp(tmp_file->path, pg_control_path) == 0) + { + pg_control = tmp_file; + break; + } + } + } + + /* Notify end of backup */ pg_stop_backup(¤t); + if (current.from_replica && !exclusive_backup) + set_min_recovery_point(pg_control, database_path, current.stop_lsn); + /* Add archived xlog files into the list of files of this backup */ if (stream_wal) { @@ -790,7 +817,7 @@ do_backup_instance(void) } /* Print the list of files to backup catalog */ - pgBackupWriteFileList(¤t, backup_files_list, pgdata); + write_backup_filelist(¤t, backup_files_list, pgdata); /* Compute summary of size of regular files in the backup */ for (i = 0; i < parray_num(backup_files_list); i++) @@ -883,7 +910,7 @@ do_backup(time_t start_time) } } - if (current.from_replica) + if (current.from_replica && exclusive_backup) { /* Check master connection options */ if (master_host == NULL) @@ -1089,8 +1116,11 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) params[0] = label; - /* For replica we call pg_start_backup() on master */ - conn = (backup->from_replica) ? master_conn : backup_conn; + /* For 9.5 replica we call pg_start_backup() on master */ + if (backup->from_replica && exclusive_backup) + conn = master_conn; + else + conn = backup_conn; /* 2nd argument is 'fast'*/ params[1] = smooth ? "false" : "true"; @@ -1118,16 +1148,18 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) PQclear(res); - if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) + if (current.backup_mode == BACKUP_MODE_DIFF_PAGE && + (!(backup->from_replica && !exclusive_backup))) /* * Switch to a new WAL segment. It is necessary to get archived WAL * segment, which includes start LSN of current backup. + * Don`t do this for replica backups unless it`s PG 9.5 */ pg_switch_wal(conn); if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) /* In PAGE mode wait for current segment... */ - wait_wal_lsn(backup->start_lsn, true, false); + wait_wal_lsn(backup->start_lsn, true, false); /* * Do not wait start_lsn for stream backup. * Because WAL streaming will start after pg_start_backup() in stream @@ -1137,8 +1169,10 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) /* ...for others wait for previous segment */ wait_wal_lsn(backup->start_lsn, true, true); - /* Wait for start_lsn to be replayed by replica */ - if (backup->from_replica) + /* In case of backup from replica for PostgreSQL 9.5 + * wait for start_lsn to be replayed by replica + */ + if (backup->from_replica && exclusive_backup) wait_replica_wal_lsn(backup->start_lsn, true); } @@ -1488,7 +1522,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment) GetXLogFileName(wal_segment, tli, targetSegNo, xlog_seg_size); /* - * In pg_start_backup we wait for 'lsn' in 'pg_wal' directory iff it is + * In pg_start_backup we wait for 'lsn' in 'pg_wal' directory if it is * stream and non-page backup. Page backup needs archived WAL files, so we * wait for 'lsn' in archive 'wal' directory for page backups. * @@ -1509,7 +1543,12 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment) { join_path_components(wal_segment_path, arclog_path, wal_segment); wal_segment_dir = arclog_path; - timeout = archive_timeout; + + if (archive_timeout > 0) + timeout = archive_timeout; + else + timeout = ARCHIVE_TIMEOUT_DEFAULT; + } if (wait_prev_segment) @@ -1669,7 +1708,7 @@ pg_stop_backup(pgBackup *backup) PGresult *tablespace_map_content = NULL; uint32 lsn_hi; uint32 lsn_lo; - XLogRecPtr restore_lsn = InvalidXLogRecPtr; + //XLogRecPtr restore_lsn = InvalidXLogRecPtr; int pg_stop_backup_timeout = 0; char path[MAXPGPATH]; char backup_label[MAXPGPATH]; @@ -1689,16 +1728,21 @@ pg_stop_backup(pgBackup *backup) if (!backup_in_progress) elog(ERROR, "backup is not in progress"); - /* For replica we call pg_stop_backup() on master */ - conn = (current.from_replica) ? master_conn : backup_conn; + /* For 9.5 replica we call pg_stop_backup() on master */ + if (current.from_replica && exclusive_backup) + conn = master_conn; + else + conn = backup_conn; /* Remove annoying NOTICE messages generated by backend */ res = pgut_execute(conn, "SET client_min_messages = warning;", 0, NULL); PQclear(res); - /* Create restore point */ - if (backup != NULL) + /* Create restore point + * only if it`s backup from master, or exclusive replica(wich connects to master) + */ + if (backup != NULL && (!current.from_replica || (current.from_replica && exclusive_backup))) { const char *params[1]; char name[1024]; @@ -1716,7 +1760,7 @@ pg_stop_backup(pgBackup *backup) /* Extract timeline and LSN from the result */ XLogDataFromLSN(PQgetvalue(res, 0, 0), &lsn_hi, &lsn_lo); /* Calculate LSN */ - restore_lsn = ((uint64) lsn_hi) << 32 | lsn_lo; + //restore_lsn = ((uint64) lsn_hi) << 32 | lsn_lo; PQclear(res); } @@ -1737,14 +1781,29 @@ pg_stop_backup(pgBackup *backup) * Stop the non-exclusive backup. Besides stop_lsn it returns from * pg_stop_backup(false) copy of the backup label and tablespace map * so they can be written to disk by the caller. + * In case of backup from replica >= 9.6 we do not trust minRecPoint + * and stop_backup LSN, so we use latest replayed LSN as STOP LSN. */ - stop_backup_query = "SELECT" - " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," - " current_timestamp(0)::timestamptz," - " lsn," - " labelfile," - " spcmapfile" - " FROM pg_catalog.pg_stop_backup(false)"; + if (current.from_replica) + stop_backup_query = "SELECT" + " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," + " current_timestamp(0)::timestamptz," +#if PG_VERSION_NUM >= 100000 + " pg_catalog.pg_last_wal_replay_lsn()," +#else + " pg_catalog.pg_last_xlog_replay_location()," +#endif + " labelfile," + " spcmapfile" + " FROM pg_catalog.pg_stop_backup(false)"; + else + stop_backup_query = "SELECT" + " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," + " current_timestamp(0)::timestamptz," + " lsn," + " labelfile," + " spcmapfile" + " FROM pg_catalog.pg_stop_backup(false)"; } else @@ -1832,13 +1891,13 @@ pg_stop_backup(pgBackup *backup) if (!XRecOffIsValid(stop_backup_lsn)) { - stop_backup_lsn = restore_lsn; + if (XRecOffIsNull(stop_backup_lsn)) + stop_backup_lsn = stop_backup_lsn + SizeOfXLogLongPHD; + else + elog(ERROR, "Invalid stop_backup_lsn value %X/%X", + (uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn)); } - if (!XRecOffIsValid(stop_backup_lsn)) - elog(ERROR, "Invalid stop_backup_lsn value %X/%X", - (uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn)); - /* Write backup_label and tablespace_map */ if (!exclusive_backup) { @@ -1939,7 +1998,7 @@ pg_stop_backup(pgBackup *backup) stream_xlog_path[MAXPGPATH]; /* Wait for stop_lsn to be received by replica */ - if (backup->from_replica) + if (current.from_replica) wait_replica_wal_lsn(stop_backup_lsn, false); /* * Wait for stop_lsn to be archived or streamed. @@ -1962,10 +2021,12 @@ pg_stop_backup(pgBackup *backup) elog(LOG, "Getting the Recovery Time from WAL"); + /* iterate over WAL from stop_backup lsn to start_backup lsn */ if (!read_recovery_info(xlog_path, backup->tli, xlog_seg_size, backup->start_lsn, backup->stop_lsn, &backup->recovery_time, &backup->recovery_xid)) { + elog(LOG, "Failed to find Recovery Time in WAL. Forced to trust current_timestamp"); backup->recovery_time = recovery_time; backup->recovery_xid = recovery_xid; } @@ -2074,7 +2135,7 @@ backup_files(void *arg) elog(ERROR, "interrupted during backup"); if (progress) - elog(LOG, "Progress: (%d/%d). Process file \"%s\"", + elog(INFO, "Progress: (%d/%d). Process file \"%s\"", i + 1, n_backup_files_list, file->path); /* stat file to check its current state */ @@ -2168,7 +2229,7 @@ backup_files(void *arg) file->path, file->write_size); } else - elog(LOG, "unexpected file type %d", buf.st_mode); + elog(WARNING, "unexpected file type %d", buf.st_mode); } /* Close connection */ diff --git a/src/catalog.c b/src/catalog.c index 74d8ee90..788c33f7 100644 --- a/src/catalog.c +++ b/src/catalog.c @@ -3,7 +3,7 @@ * catalog.c: backup catalog operation * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -509,18 +509,22 @@ write_backup(pgBackup *backup) fp = fopen(conf_path, "wt"); if (fp == NULL) elog(ERROR, "Cannot open configuration file \"%s\": %s", conf_path, - strerror(errno)); + strerror(errno)); pgBackupWriteControl(fp, backup); - fclose(fp); + if (fflush(fp) != 0 || + fsync(fileno(fp)) != 0 || + fclose(fp)) + elog(ERROR, "Cannot write configuration file \"%s\": %s", + conf_path, strerror(errno)); } /* * Output the list of files to backup catalog DATABASE_FILE_LIST */ void -pgBackupWriteFileList(pgBackup *backup, parray *files, const char *root) +write_backup_filelist(pgBackup *backup, parray *files, const char *root) { FILE *fp; char path[MAXPGPATH]; @@ -529,7 +533,7 @@ pgBackupWriteFileList(pgBackup *backup, parray *files, const char *root) fp = fopen(path, "wt"); if (fp == NULL) - elog(ERROR, "cannot open file list \"%s\": %s", path, + elog(ERROR, "Cannot open file list \"%s\": %s", path, strerror(errno)); print_file_list(fp, files, root); @@ -537,7 +541,7 @@ pgBackupWriteFileList(pgBackup *backup, parray *files, const char *root) if (fflush(fp) != 0 || fsync(fileno(fp)) != 0 || fclose(fp)) - elog(ERROR, "cannot write file list \"%s\": %s", path, strerror(errno)); + elog(ERROR, "Cannot write file list \"%s\": %s", path, strerror(errno)); } /* diff --git a/src/configure.c b/src/configure.c index 30845607..6d3cf9cb 100644 --- a/src/configure.c +++ b/src/configure.c @@ -257,14 +257,14 @@ readBackupCatalogConfigFile(void) { 's', 0, "master-port", &(config->master_port), SOURCE_FILE_STRICT }, { 's', 0, "master-db", &(config->master_db), SOURCE_FILE_STRICT }, { 's', 0, "master-user", &(config->master_user), SOURCE_FILE_STRICT }, - { 'u', 0, "replica-timeout", &(config->replica_timeout), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MS }, + { 'u', 0, "replica-timeout", &(config->replica_timeout), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_S }, /* other options */ { 'U', 0, "system-identifier", &(config->system_identifier), SOURCE_FILE_STRICT }, #if PG_VERSION_NUM >= 110000 {'u', 0, "xlog-seg-size", &config->xlog_seg_size, SOURCE_FILE_STRICT}, #endif /* archive options */ - { 'u', 0, "archive-timeout", &(config->archive_timeout), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_MS }, + { 'u', 0, "archive-timeout", &(config->archive_timeout), SOURCE_CMDLINE, SOURCE_DEFAULT, OPTION_UNIT_S }, {0} }; diff --git a/src/data.c b/src/data.c index c3fcc254..9b6cbd23 100644 --- a/src/data.c +++ b/src/data.c @@ -3,7 +3,7 @@ * data.c: utils to parse and backup data pages * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -57,7 +57,7 @@ zlib_decompress(void *dst, size_t dst_size, void const *src, size_t src_size) */ static int32 do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, - CompressAlg alg, int level) + CompressAlg alg, int level, const char **errormsg) { switch (alg) { @@ -66,7 +66,13 @@ do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, return -1; #ifdef HAVE_LIBZ case ZLIB_COMPRESS: - return zlib_compress(dst, dst_size, src, src_size, level); + { + int32 ret; + ret = zlib_compress(dst, dst_size, src, src_size, level); + if (ret < Z_OK && errormsg) + *errormsg = zError(ret); + return ret; + } #endif case PGLZ_COMPRESS: return pglz_compress(src, src_size, dst, PGLZ_strategy_always); @@ -81,7 +87,7 @@ do_compress(void* dst, size_t dst_size, void const* src, size_t src_size, */ static int32 do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, - CompressAlg alg) + CompressAlg alg, const char **errormsg) { switch (alg) { @@ -90,7 +96,13 @@ do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, return -1; #ifdef HAVE_LIBZ case ZLIB_COMPRESS: - return zlib_decompress(dst, dst_size, src, src_size); + { + int32 ret; + ret = zlib_decompress(dst, dst_size, src, src_size); + if (ret < Z_OK && errormsg) + *errormsg = zError(ret); + return ret; + } #endif case PGLZ_COMPRESS: return pglz_decompress(src, src_size, dst, dst_size); @@ -99,6 +111,54 @@ do_decompress(void* dst, size_t dst_size, void const* src, size_t src_size, return -1; } + +#define ZLIB_MAGIC 0x78 + +/* + * Before version 2.0.23 there was a bug in pro_backup that pages which compressed + * size is exactly the same as original size are not treated as compressed. + * This check tries to detect and decompress such pages. + * There is no 100% criteria to determine whether page is compressed or not. + * But at least we will do this check only for pages which will no pass validation step. + */ +static bool +page_may_be_compressed(Page page, CompressAlg alg, uint32 backup_version) +{ + PageHeader phdr; + + phdr = (PageHeader) page; + + /* First check if page header is valid (it seems to be fast enough check) */ + if (!(PageGetPageSize(phdr) == BLCKSZ && + PageGetPageLayoutVersion(phdr) == PG_PAGE_LAYOUT_VERSION && + (phdr->pd_flags & ~PD_VALID_FLAG_BITS) == 0 && + phdr->pd_lower >= SizeOfPageHeaderData && + phdr->pd_lower <= phdr->pd_upper && + phdr->pd_upper <= phdr->pd_special && + phdr->pd_special <= BLCKSZ && + phdr->pd_special == MAXALIGN(phdr->pd_special))) + { + /* ... end only if it is invalid, then do more checks */ + if (backup_version >= 20023) + { + /* Versions 2.0.23 and higher don't have such bug */ + return false; + } +#ifdef HAVE_LIBZ + /* For zlib we can check page magic: + * https://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like + */ + if (alg == ZLIB_COMPRESS && *(char*)page != ZLIB_MAGIC) + { + return false; + } +#endif + /* otherwize let's try to decompress the page */ + return true; + } + return false; +} + /* * When copying datafiles to backup we validate and compress them block * by block. Thus special header is required for each data block. @@ -368,7 +428,7 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum, BackupPageHeader header; size_t write_buffer_size = sizeof(header); char write_buffer[BLCKSZ+sizeof(header)]; - char compressed_page[BLCKSZ]; + char compressed_page[BLCKSZ*2]; /* compressed page may require more space than uncompressed */ if(page_state == SkipCurrentPage) return; @@ -386,16 +446,22 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum, } else { + const char *errormsg = NULL; + /* The page was not truncated, so we need to compress it */ - header.compressed_size = do_compress(compressed_page, BLCKSZ, - page, BLCKSZ, calg, clevel); + header.compressed_size = do_compress(compressed_page, sizeof(compressed_page), + page, BLCKSZ, calg, clevel, + &errormsg); + /* Something went wrong and errormsg was assigned, throw a warning */ + if (header.compressed_size < 0 && errormsg != NULL) + elog(WARNING, "An error occured during compressing block %u of file \"%s\": %s", + blknum, file->path, errormsg); file->compress_alg = calg; file->read_size += BLCKSZ; - Assert (header.compressed_size <= BLCKSZ); /* The page was successfully compressed. */ - if (header.compressed_size > 0) + if (header.compressed_size > 0 && header.compressed_size < BLCKSZ) { memcpy(write_buffer, &header, sizeof(header)); memcpy(write_buffer + sizeof(header), @@ -425,7 +491,7 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum, fclose(in); fclose(out); - elog(ERROR, "File: %s, cannot write backup at block %u : %s", + elog(ERROR, "File: %s, cannot write backup at block %u: %s", file->path, blknum, strerror(errno_tmp)); } @@ -613,7 +679,7 @@ backup_data_file(backup_files_arg* arguments, */ void restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, - bool write_header) + bool write_header, uint32 backup_version) { FILE *in = NULL; FILE *out = NULL; @@ -656,6 +722,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, size_t read_len; DataPage compressed_page; /* used as read buffer */ DataPage page; + int32 uncompressed_size = 0; /* File didn`t changed. Nothig to copy */ if (file->write_size == BYTES_INVALID) @@ -711,20 +778,32 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, Assert(header.compressed_size <= BLCKSZ); + /* read a page from file */ read_len = fread(compressed_page.data, 1, MAXALIGN(header.compressed_size), in); if (read_len != MAXALIGN(header.compressed_size)) elog(ERROR, "cannot read block %u of \"%s\" read %lu of %d", blknum, file->path, read_len, header.compressed_size); - if (header.compressed_size != BLCKSZ) + /* + * if page size is smaller than BLCKSZ, decompress the page. + * BUGFIX for versions < 2.0.23: if page size is equal to BLCKSZ. + * we have to check, whether it is compressed or not using + * page_may_be_compressed() function. + */ + if (header.compressed_size != BLCKSZ + || page_may_be_compressed(compressed_page.data, file->compress_alg, + backup_version)) { - int32 uncompressed_size = 0; + const char *errormsg = NULL; uncompressed_size = do_decompress(page.data, BLCKSZ, compressed_page.data, header.compressed_size, - file->compress_alg); + file->compress_alg, &errormsg); + if (uncompressed_size < 0 && errormsg != NULL) + elog(WARNING, "An error occured during decompressing block %u of file \"%s\": %s", + blknum, file->path, errormsg); if (uncompressed_size != BLCKSZ) elog(ERROR, "page of file \"%s\" uncompressed to %d bytes. != BLCKSZ", @@ -748,7 +827,11 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, blknum, file->path, strerror(errno)); } - if (header.compressed_size < BLCKSZ) + /* if we uncompressed the page - write page.data, + * if page wasn't compressed - + * write what we've read - compressed_page.data + */ + if (uncompressed_size == BLCKSZ) { if (fwrite(page.data, 1, BLCKSZ, out) != BLCKSZ) elog(ERROR, "cannot write block %u of \"%s\": %s", @@ -756,7 +839,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, } else { - /* if page wasn't compressed, we've read full block */ + /* */ if (fwrite(compressed_page.data, 1, BLCKSZ, out) != BLCKSZ) elog(ERROR, "cannot write block %u of \"%s\": %s", blknum, file->path, strerror(errno)); @@ -954,22 +1037,6 @@ copy_file(const char *from_root, const char *to_root, pgFile *file) return true; } -/* - * Move file from one backup to another. - * We do not apply compression to these files, because - * it is either small control file or already compressed cfs file. - */ -void -move_file(const char *from_root, const char *to_root, pgFile *file) -{ - char to_path[MAXPGPATH]; - - join_path_components(to_path, to_root, file->path + strlen(from_root) + 1); - if (rename(file->path, to_path) == -1) - elog(ERROR, "Cannot move file \"%s\" to path \"%s\": %s", - file->path, to_path, strerror(errno)); -} - #ifdef HAVE_LIBZ /* * Show error during work with compressed file @@ -1529,11 +1596,14 @@ validate_one_page(Page page, pgFile *file, /* Valiate pages of datafile in backup one by one */ bool -check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version) +check_file_pages(pgFile *file, XLogRecPtr stop_lsn, + uint32 checksum_version, uint32 backup_version) { size_t read_len = 0; bool is_valid = true; FILE *in; + pg_crc32 crc; + bool use_crc32c = (backup_version <= 20021); elog(VERBOSE, "validate relation blocks for file %s", file->name); @@ -1550,6 +1620,9 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version) file->path, strerror(errno)); } + /* calc CRC of backup file */ + INIT_FILE_CRC32(use_crc32c, crc); + /* read and validate pages one by one */ while (true) { @@ -1574,6 +1647,8 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version) blknum, file->path, strerror(errno_tmp)); } + COMP_FILE_CRC32(use_crc32c, crc, &header, read_len); + if (header.block < blknum) elog(ERROR, "backup is broken at file->path %s block %u", file->path, blknum); @@ -1595,19 +1670,34 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version) elog(ERROR, "cannot read block %u of \"%s\" read %lu of %d", blknum, file->path, read_len, header.compressed_size); - if (header.compressed_size != BLCKSZ) + COMP_FILE_CRC32(use_crc32c, crc, compressed_page.data, read_len); + + if (header.compressed_size != BLCKSZ + || page_may_be_compressed(compressed_page.data, file->compress_alg, + backup_version)) { int32 uncompressed_size = 0; + const char *errormsg = NULL; uncompressed_size = do_decompress(page.data, BLCKSZ, compressed_page.data, header.compressed_size, - file->compress_alg); + file->compress_alg, + &errormsg); + if (uncompressed_size < 0 && errormsg != NULL) + elog(WARNING, "An error occured during decompressing block %u of file \"%s\": %s", + blknum, file->path, errormsg); if (uncompressed_size != BLCKSZ) + { + if (header.compressed_size == BLCKSZ) + { + is_valid = false; + continue; + } elog(ERROR, "page of file \"%s\" uncompressed to %d bytes. != BLCKSZ", file->path, uncompressed_size); - + } if (validate_one_page(page.data, file, blknum, stop_lsn, checksum_version) == PAGE_IS_FOUND_AND_NOT_VALID) is_valid = false; @@ -1620,5 +1710,15 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version) } } + FIN_FILE_CRC32(use_crc32c, crc); + fclose(in); + + if (crc != file->crc) + { + elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", + file->path, file->crc, crc); + is_valid = false; + } + return is_valid; } diff --git a/src/delete.c b/src/delete.c index 9d1c3867..599fd2fd 100644 --- a/src/delete.c +++ b/src/delete.c @@ -3,7 +3,7 @@ * delete.c: delete backup files. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -14,7 +14,6 @@ #include #include -static int delete_backup_files(pgBackup *backup); static void delete_walfiles(XLogRecPtr oldest_lsn, TimeLineID oldest_tli, uint32 xlog_seg_size); @@ -245,7 +244,7 @@ do_retention_purge(void) * Delete backup files of the backup and update the status of the backup to * BACKUP_STATUS_DELETED. */ -static int +void delete_backup_files(pgBackup *backup) { size_t i; @@ -257,11 +256,15 @@ delete_backup_files(pgBackup *backup) * If the backup was deleted already, there is nothing to do. */ if (backup->status == BACKUP_STATUS_DELETED) - return 0; + { + elog(WARNING, "Backup %s already deleted", + base36enc(backup->start_time)); + return; + } time2iso(timestamp, lengthof(timestamp), backup->recovery_time); - elog(INFO, "delete: %s %s", + elog(INFO, "Delete: %s %s", base36enc(backup->start_time), timestamp); /* @@ -283,17 +286,17 @@ delete_backup_files(pgBackup *backup) pgFile *file = (pgFile *) parray_get(files, i); /* print progress */ - elog(VERBOSE, "delete file(%zd/%lu) \"%s\"", i + 1, + elog(VERBOSE, "Delete file(%zd/%lu) \"%s\"", i + 1, (unsigned long) parray_num(files), file->path); if (remove(file->path)) { - elog(WARNING, "can't remove \"%s\": %s", file->path, - strerror(errno)); - parray_walk(files, pgFileFree); - parray_free(files); - - return 1; + if (errno == ENOENT) + elog(VERBOSE, "File \"%s\" is absent", file->path); + else + elog(ERROR, "Cannot remove \"%s\": %s", file->path, + strerror(errno)); + return; } } @@ -301,7 +304,7 @@ delete_backup_files(pgBackup *backup) parray_free(files); backup->status = BACKUP_STATUS_DELETED; - return 0; + return; } /* diff --git a/src/dir.c b/src/dir.c index 1a93c1fe..5c6f60d8 100644 --- a/src/dir.c +++ b/src/dir.c @@ -3,7 +3,7 @@ * dir.c: directory operation utility. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -267,28 +267,6 @@ pgFileGetCRC(const char *file_path, bool use_crc32c) size_t len; int errno_tmp; -#define INIT_FILE_CRC32(crc) \ -do { \ - if (use_crc32c) \ - INIT_CRC32C(crc); \ - else \ - INIT_TRADITIONAL_CRC32(crc); \ -} while (0) -#define COMP_FILE_CRC32(crc, data, len) \ -do { \ - if (use_crc32c) \ - COMP_CRC32C((crc), (data), (len)); \ - else \ - COMP_TRADITIONAL_CRC32(crc, data, len); \ -} while (0) -#define FIN_FILE_CRC32(crc) \ -do { \ - if (use_crc32c) \ - FIN_CRC32C(crc); \ - else \ - FIN_TRADITIONAL_CRC32(crc); \ -} while (0) - /* open file in binary read mode */ fp = fopen(file_path, PG_BINARY_R); if (fp == NULL) @@ -296,20 +274,20 @@ do { \ file_path, strerror(errno)); /* calc CRC of backup file */ - INIT_FILE_CRC32(crc); + INIT_FILE_CRC32(use_crc32c, crc); while ((len = fread(buf, 1, sizeof(buf), fp)) == sizeof(buf)) { if (interrupted) elog(ERROR, "interrupted during CRC calculation"); - COMP_FILE_CRC32(crc, buf, len); + COMP_FILE_CRC32(use_crc32c, crc, buf, len); } errno_tmp = errno; if (!feof(fp)) elog(WARNING, "cannot read \"%s\": %s", file_path, strerror(errno_tmp)); if (len > 0) - COMP_FILE_CRC32(crc, buf, len); - FIN_FILE_CRC32(crc); + COMP_FILE_CRC32(use_crc32c, crc, buf, len); + FIN_FILE_CRC32(use_crc32c, crc); fclose(fp); diff --git a/src/fetch.c b/src/fetch.c index 17e77025..988ce283 100644 --- a/src/fetch.c +++ b/src/fetch.c @@ -3,7 +3,7 @@ * fetch.c * Functions for fetching files from PostgreSQL data directory * - * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/help.c b/src/help.c index f534f396..409c8f82 100644 --- a/src/help.c +++ b/src/help.c @@ -2,7 +2,7 @@ * * help.c * - * Copyright (c) 2017-2017, Postgres Professional + * Copyright (c) 2017-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -118,6 +118,7 @@ help_pg_probackup(void) printf(_(" [--master-db=db_name] [--master-host=host_name]\n")); printf(_(" [--master-port=port] [--master-user=user_name]\n")); printf(_(" [--replica-timeout=timeout]\n")); + printf(_(" [--skip-block-validation]\n")); printf(_("\n %s restore -B backup-path --instance=instance_name\n"), PROGRAM_NAME); printf(_(" [-D pgdata-path] [-i backup-id] [--progress]\n")); @@ -127,12 +128,14 @@ help_pg_probackup(void) printf(_(" [--recovery-target-action=pause|promote|shutdown]\n")); printf(_(" [--restore-as-replica]\n")); printf(_(" [--no-validate]\n")); + printf(_(" [--skip-block-validation]\n")); printf(_("\n %s validate -B backup-path [--instance=instance_name]\n"), PROGRAM_NAME); printf(_(" [-i backup-id] [--progress]\n")); printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); printf(_(" [--recovery-target-name=target-name]\n")); printf(_(" [--timeline=timeline]\n")); + printf(_(" [--skip-block-validation]\n")); printf(_("\n %s show -B backup-path\n"), PROGRAM_NAME); printf(_(" [--instance=instance_name [-i backup-id]]\n")); @@ -203,7 +206,8 @@ help_backup(void) printf(_(" [-w --no-password] [-W --password]\n")); printf(_(" [--master-db=db_name] [--master-host=host_name]\n")); printf(_(" [--master-port=port] [--master-user=user_name]\n")); - printf(_(" [--replica-timeout=timeout]\n\n")); + printf(_(" [--replica-timeout=timeout]\n")); + printf(_(" [--skip-block-validation]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" -b, --backup-mode=backup-mode backup mode=FULL|PAGE|DELTA|PTRACK\n")); @@ -215,6 +219,7 @@ help_backup(void) printf(_(" -j, --threads=NUM number of parallel threads\n")); printf(_(" --archive-timeout=timeout wait timeout for WAL segment archiving (default: 5min)\n")); printf(_(" --progress show progress\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -279,6 +284,7 @@ help_restore(void) printf(_(" [--immediate] [--recovery-target-name=target-name]\n")); printf(_(" [--recovery-target-action=pause|promote|shutdown]\n")); printf(_(" [--restore-as-replica] [--no-validate]\n\n")); + printf(_(" [--skip-block-validation]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); @@ -305,6 +311,7 @@ help_restore(void) printf(_(" -R, --restore-as-replica write a minimal recovery.conf in the output directory\n")); printf(_(" to ease setting up a standby server\n")); printf(_(" --no-validate disable backup validation during restore\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -335,6 +342,7 @@ help_validate(void) printf(_(" [-i backup-id] [--progress]\n")); printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); printf(_(" [--timeline=timeline]\n\n")); + printf(_(" [--skip-block-validation]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); @@ -348,6 +356,7 @@ help_validate(void) printf(_(" --timeline=timeline recovering into a particular timeline\n")); printf(_(" --recovery-target-name=target-name\n")); printf(_(" the named restore point to which recovery will proceed\n")); + printf(_(" --skip-block-validation set to validate only file-level checksum\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); diff --git a/src/init.c b/src/init.c index d8e238fd..75b50e4f 100644 --- a/src/init.c +++ b/src/init.c @@ -3,7 +3,7 @@ * init.c: - initialize backup catalog. * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/merge.c b/src/merge.c index 2464199f..e3d6b9f8 100644 --- a/src/merge.c +++ b/src/merge.c @@ -59,7 +59,7 @@ do_merge(time_t backup_id) if (instance_name == NULL) elog(ERROR, "required parameter is not specified: --instance"); - elog(LOG, "Merge started"); + elog(INFO, "Merge started"); catalog_lock(); @@ -77,7 +77,8 @@ do_merge(time_t backup_id) { if (backup->status != BACKUP_STATUS_OK && /* It is possible that previous merging was interrupted */ - backup->status != BACKUP_STATUS_MERGING) + backup->status != BACKUP_STATUS_MERGING && + backup->status != BACKUP_STATUS_DELETING) elog(ERROR, "Backup %s has status: %s", base36enc(backup->start_time), status2str(backup->status)); @@ -128,17 +129,21 @@ do_merge(time_t backup_id) */ for (i = full_backup_idx; i > dest_backup_idx; i--) { - pgBackup *to_backup = (pgBackup *) parray_get(backups, i); pgBackup *from_backup = (pgBackup *) parray_get(backups, i - 1); - merge_backups(to_backup, from_backup); + full_backup = (pgBackup *) parray_get(backups, i); + merge_backups(full_backup, from_backup); } + pgBackupValidate(full_backup); + if (full_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Merging of backup %s failed", base36enc(backup_id)); + /* cleanup */ parray_walk(backups, pgBackupFree); parray_free(backups); - elog(LOG, "Merge completed"); + elog(INFO, "Merge of backup %s completed", base36enc(backup_id)); } /* @@ -164,7 +169,36 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) int i; bool merge_isok = true; - elog(LOG, "Merging backup %s with backup %s", from_backup_id, to_backup_id); + elog(INFO, "Merging backup %s with backup %s", from_backup_id, to_backup_id); + + /* + * Validate to_backup only if it is BACKUP_STATUS_OK. If it has + * BACKUP_STATUS_MERGING status then it isn't valid backup until merging + * finished. + */ + if (to_backup->status == BACKUP_STATUS_OK) + { + pgBackupValidate(to_backup); + if (to_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Interrupt merging"); + } + + /* + * It is OK to validate from_backup if it has BACKUP_STATUS_OK or + * BACKUP_STATUS_MERGING status. + */ + Assert(from_backup->status == BACKUP_STATUS_OK || + from_backup->status == BACKUP_STATUS_MERGING); + pgBackupValidate(from_backup); + if (from_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Interrupt merging"); + + /* + * Previous merging was interrupted during deleting source backup. It is + * safe just to delete it again. + */ + if (from_backup->status == BACKUP_STATUS_DELETING) + goto delete_source_backup; to_backup->status = BACKUP_STATUS_MERGING; write_backup_status(to_backup); @@ -243,68 +277,10 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) if (!merge_isok) elog(ERROR, "Data files merging failed"); - /* - * Files were copied into to_backup and deleted from from_backup. Remove - * remaining directories from from_backup. - */ - parray_qsort(files, pgFileComparePathDesc); - for (i = 0; i < parray_num(files); i++) - { - pgFile *file = (pgFile *) parray_get(files, i); - - if (!S_ISDIR(file->mode)) - continue; - - if (rmdir(file->path)) - elog(ERROR, "Could not remove directory \"%s\": %s", - file->path, strerror(errno)); - } - if (rmdir(from_database_path)) - elog(ERROR, "Could not remove directory \"%s\": %s", - from_database_path, strerror(errno)); - if (unlink(control_file)) - elog(ERROR, "Could not remove file \"%s\": %s", - control_file, strerror(errno)); - - pgBackupGetPath(from_backup, control_file, lengthof(control_file), - BACKUP_CONTROL_FILE); - if (unlink(control_file)) - elog(ERROR, "Could not remove file \"%s\": %s", - control_file, strerror(errno)); - - if (rmdir(from_backup_path)) - elog(ERROR, "Could not remove directory \"%s\": %s", - from_backup_path, strerror(errno)); - - /* - * Delete files which are not in from_backup file list. - */ - for (i = 0; i < parray_num(to_files); i++) - { - pgFile *file = (pgFile *) parray_get(to_files, i); - - if (parray_bsearch(files, file, pgFileComparePathDesc) == NULL) - { - pgFileDelete(file); - elog(LOG, "Deleted \"%s\"", file->path); - } - } - - /* - * Rename FULL backup directory. - */ - if (rename(to_backup_path, from_backup_path) == -1) - elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", - to_backup_path, from_backup_path, strerror(errno)); - /* * Update to_backup metadata. */ - pgBackupCopy(to_backup, from_backup); - /* Correct metadata */ - to_backup->backup_mode = BACKUP_MODE_FULL; to_backup->status = BACKUP_STATUS_OK; - to_backup->parent_backup = INVALID_BACKUP_ID; /* Compute summary of size of regular files in the backup */ to_backup->data_bytes = 0; for (i = 0; i < parray_num(files); i++) @@ -325,7 +301,46 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) else to_backup->wal_bytes = BYTES_INVALID; - pgBackupWriteFileList(to_backup, files, from_database_path); + write_backup_filelist(to_backup, files, from_database_path); + write_backup(to_backup); + +delete_source_backup: + /* + * Files were copied into to_backup. It is time to remove source backup + * entirely. + */ + delete_backup_files(from_backup); + + /* + * Delete files which are not in from_backup file list. + */ + for (i = 0; i < parray_num(to_files); i++) + { + pgFile *file = (pgFile *) parray_get(to_files, i); + + if (parray_bsearch(files, file, pgFileComparePathDesc) == NULL) + { + pgFileDelete(file); + elog(VERBOSE, "Deleted \"%s\"", file->path); + } + } + + /* + * Rename FULL backup directory. + */ + elog(INFO, "Rename %s to %s", to_backup_id, from_backup_id); + if (rename(to_backup_path, from_backup_path) == -1) + elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", + to_backup_path, from_backup_path, strerror(errno)); + + /* + * Merging finished, now we can safely update ID of the destination backup. + */ + pgBackupCopy(to_backup, from_backup); + /* Correct metadata */ + to_backup->backup_mode = BACKUP_MODE_FULL; + to_backup->status = BACKUP_STATUS_OK; + to_backup->parent_backup = INVALID_BACKUP_ID; write_backup(to_backup); /* Cleanup */ @@ -457,14 +472,16 @@ merge_files(void *arg) file->path = to_path_tmp; /* Decompress first/target file */ - restore_data_file(tmp_file_path, file, false, false); + restore_data_file(tmp_file_path, file, false, false, + parse_program_version(to_backup->program_version)); file->path = prev_path; } /* Merge second/source file with first/target file */ restore_data_file(tmp_file_path, file, from_backup->backup_mode == BACKUP_MODE_DIFF_DELTA, - false); + false, + parse_program_version(from_backup->program_version)); elog(VERBOSE, "Compress file and save it to the directory \"%s\"", argument->to_root); @@ -496,7 +513,8 @@ merge_files(void *arg) /* We can merge in-place here */ restore_data_file(to_path_tmp, file, from_backup->backup_mode == BACKUP_MODE_DIFF_DELTA, - true); + true, + parse_program_version(from_backup->program_version)); /* * We need to calculate write_size, restore_data_file() doesn't @@ -505,10 +523,9 @@ merge_files(void *arg) file->write_size = pgFileSize(to_path_tmp); file->crc = pgFileGetCRC(to_path_tmp, false); } - pgFileDelete(file); } else - move_file(argument->from_root, argument->to_root, file); + copy_file(argument->from_root, argument->to_root, file); if (file->write_size != BYTES_INVALID) elog(LOG, "Moved file \"%s\": " INT64_FORMAT " bytes", diff --git a/src/parsexlog.c b/src/parsexlog.c index 7f7365f5..ee7b5076 100644 --- a/src/parsexlog.c +++ b/src/parsexlog.c @@ -237,10 +237,11 @@ doExtractPageMap(void *arg) */ if (XLogRecPtrIsInvalid(found)) { - elog(WARNING, "Thread [%d]: could not read WAL record at %X/%X", + elog(WARNING, "Thread [%d]: could not read WAL record at %X/%X. %s", private_data->thread_num, (uint32) (extract_arg->startpoint >> 32), - (uint32) (extract_arg->startpoint)); + (uint32) (extract_arg->startpoint), + (xlogreader->errormsg_buf[0] != '\0')?xlogreader->errormsg_buf:""); PrintXLogCorruptionMsg(private_data, ERROR); } extract_arg->startpoint = found; diff --git a/src/pg_probackup.c b/src/pg_probackup.c index 113e8901..00b0fc42 100644 --- a/src/pg_probackup.c +++ b/src/pg_probackup.c @@ -3,7 +3,7 @@ * pg_probackup.c: Backup/Recovery manager for PostgreSQL. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -17,7 +17,7 @@ #include "utils/thread.h" -const char *PROGRAM_VERSION = "2.0.22"; +const char *PROGRAM_VERSION = "2.0.24"; const char *PROGRAM_URL = "https://github.com/postgrespro/pg_probackup"; const char *PROGRAM_EMAIL = "https://github.com/postgrespro/pg_probackup/issues"; @@ -89,6 +89,8 @@ static pgRecoveryTarget *recovery_target_options = NULL; bool restore_as_replica = false; bool restore_no_validate = false; +bool skip_block_validation = false; + /* delete options */ bool delete_wal = false; bool delete_expired = false; @@ -179,6 +181,7 @@ static pgut_option options[] = { 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMDLINE }, { 'b', 27, "no-validate", &restore_no_validate, SOURCE_CMDLINE }, { 's', 28, "lsn", &target_lsn, SOURCE_CMDLINE }, + { 'b', 29, "skip-block-validation", &skip_block_validation, SOURCE_CMDLINE }, /* delete options */ { 'b', 130, "wal", &delete_wal, SOURCE_CMDLINE }, { 'b', 131, "expired", &delete_expired, SOURCE_CMDLINE }, diff --git a/src/pg_probackup.h b/src/pg_probackup.h index e337771e..ca45d604 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -3,7 +3,7 @@ * pg_probackup.h: Backup/Recovery manager for PostgreSQL. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -57,6 +57,10 @@ #define XID_FMT "%u" #endif +/* Check if an XLogRecPtr value is pointed to 0 offset */ +#define XRecOffIsNull(xlrp) \ + ((xlrp) % XLOG_BLCKSZ == 0) + typedef enum CompressAlg { NOT_DEFINED_COMPRESS = 0, @@ -65,6 +69,28 @@ typedef enum CompressAlg ZLIB_COMPRESS, } CompressAlg; +#define INIT_FILE_CRC32(use_crc32c, crc) \ +do { \ + if (use_crc32c) \ + INIT_CRC32C(crc); \ + else \ + INIT_TRADITIONAL_CRC32(crc); \ +} while (0) +#define COMP_FILE_CRC32(use_crc32c, crc, data, len) \ +do { \ + if (use_crc32c) \ + COMP_CRC32C((crc), (data), (len)); \ + else \ + COMP_TRADITIONAL_CRC32(crc, data, len); \ +} while (0) +#define FIN_FILE_CRC32(use_crc32c, crc) \ +do { \ + if (use_crc32c) \ + FIN_CRC32C(crc); \ + else \ + FIN_TRADITIONAL_CRC32(crc); \ +} while (0) + /* Information about single file (or dir) in backup */ typedef struct pgFile { @@ -339,6 +365,7 @@ extern bool exclusive_backup; /* restore options */ extern bool restore_as_replica; +extern bool skip_block_validation; /* delete options */ extern bool delete_wal; @@ -425,6 +452,7 @@ extern int do_show(time_t requested_backup_id); /* in delete.c */ extern void do_delete(time_t backup_id); +extern void delete_backup_files(pgBackup *backup); extern int do_retention_purge(void); extern int do_delete_instance(void); @@ -455,10 +483,11 @@ extern pgBackup *catalog_get_last_data_backup(parray *backup_list, TimeLineID tli); extern void catalog_lock(void); extern void pgBackupWriteControl(FILE *out, pgBackup *backup); -extern void pgBackupWriteFileList(pgBackup *backup, parray *files, +extern void write_backup_filelist(pgBackup *backup, parray *files, const char *root); -extern void pgBackupGetPath(const pgBackup *backup, char *path, size_t len, const char *subdir); +extern void pgBackupGetPath(const pgBackup *backup, char *path, size_t len, + const char *subdir); extern void pgBackupGetPath2(const pgBackup *backup, char *path, size_t len, const char *subdir1, const char *subdir2); extern int pgBackupCreateDir(pgBackup *backup); @@ -517,9 +546,9 @@ extern bool backup_data_file(backup_files_arg* arguments, CompressAlg calg, int clevel); extern void restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, - bool write_header); + bool write_header, + uint32 backup_version); extern bool copy_file(const char *from_root, const char *to_root, pgFile *file); -extern void move_file(const char *from_root, const char *to_root, pgFile *file); extern void push_wal_file(const char *from_path, const char *to_path, bool is_compress, bool overwrite); extern void get_wal_file(const char *from_path, const char *to_path); @@ -527,8 +556,8 @@ extern void get_wal_file(const char *from_path, const char *to_path); extern bool calc_file_checksum(pgFile *file); extern bool check_file_pages(pgFile* file, - XLogRecPtr stop_lsn, uint32 checksum_version); - + XLogRecPtr stop_lsn, + uint32 checksum_version, uint32 backup_version); /* parsexlog.c */ extern void extractPageMap(const char *archivedir, TimeLineID tli, uint32 seg_size, @@ -555,6 +584,7 @@ extern uint64 get_system_identifier(char *pgdata); extern uint64 get_remote_system_identifier(PGconn *conn); extern uint32 get_data_checksum_version(bool safe); extern uint32 get_xlog_seg_size(char *pgdata_path); +extern void set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn); extern void sanityChecks(void); extern void time2iso(char *buf, size_t len, time_t time); diff --git a/src/restore.c b/src/restore.c index 9c87cd39..439f3c4e 100644 --- a/src/restore.c +++ b/src/restore.c @@ -3,7 +3,7 @@ * restore.c: restore DB cluster and archived WAL. * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -621,7 +621,8 @@ restore_files(void *arg) file->path + strlen(from_root) + 1); restore_data_file(to_path, file, arguments->backup->backup_mode == BACKUP_MODE_DIFF_DELTA, - false); + false, + parse_program_version(arguments->backup->program_version)); } else copy_file(from_root, pgdata, file); diff --git a/src/util.c b/src/util.c index 27a0ca24..94a18a9a 100644 --- a/src/util.c +++ b/src/util.c @@ -3,7 +3,7 @@ * util.c: log messages to log file or stderr, and misc code. * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -14,6 +14,8 @@ #include +#include + const char * base36enc(long unsigned int value) { @@ -101,6 +103,44 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size) checkControlFile(ControlFile); } +/* + * Write ControlFile to pg_control + */ +static void +writeControlFile(ControlFileData *ControlFile, char *path) +{ + int fd; + char *buffer = NULL; + +#if PG_VERSION_NUM >= 100000 + int ControlFileSize = PG_CONTROL_FILE_SIZE; +#else + int ControlFileSize = PG_CONTROL_SIZE; +#endif + + /* copy controlFileSize */ + buffer = pg_malloc(ControlFileSize); + memcpy(buffer, ControlFile, sizeof(ControlFileData)); + + /* Write pg_control */ + unlink(path); + fd = open(path, + O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + S_IRUSR | S_IWUSR); + + if (fd < 0) + elog(ERROR, "Failed to open file: %s", path); + + if (write(fd, buffer, ControlFileSize) != ControlFileSize) + elog(ERROR, "Failed to overwrite file: %s", path); + + if (fsync(fd) != 0) + elog(ERROR, "Failed to fsync file: %s", path); + + close(fd); + pg_free(buffer); +} + /* * Utility shared by backup and restore to fetch the current timeline * used by a node. @@ -251,6 +291,52 @@ get_data_checksum_version(bool safe) return ControlFile.data_checksum_version; } +/* MinRecoveryPoint 'as-is' is not to be trusted */ +void +set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn) +{ + ControlFileData ControlFile; + char *buffer; + size_t size; + char fullpath[MAXPGPATH]; + + /* First fetch file content */ + buffer = slurpFile(pgdata, XLOG_CONTROL_FILE, &size, false); + if (buffer == NULL) + elog(ERROR, "ERROR"); + + digestControlFile(&ControlFile, buffer, size); + + elog(LOG, "Current minRecPoint %X/%X", + (uint32) (ControlFile.minRecoveryPoint >> 32), + (uint32) ControlFile.minRecoveryPoint); + + elog(LOG, "Setting minRecPoint to %X/%X", + (uint32) (stop_backup_lsn >> 32), + (uint32) stop_backup_lsn); + + ControlFile.minRecoveryPoint = stop_backup_lsn; + + /* Update checksum in pg_control header */ + INIT_CRC32C(ControlFile.crc); + COMP_CRC32C(ControlFile.crc, + (char *) &ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32C(ControlFile.crc); + + /* paranoia */ + checkControlFile(&ControlFile); + + /* overwrite pg_control */ + snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE); + writeControlFile(&ControlFile, fullpath); + + /* Update pg_control checksum in backup_list */ + file->crc = pgFileGetCRC(fullpath, false); + + pg_free(buffer); +} + /* * Convert time_t value to ISO-8601 format string. Always set timezone offset. diff --git a/src/utils/logger.c b/src/utils/logger.c index 563d2027..4cdbf721 100644 --- a/src/utils/logger.c +++ b/src/utils/logger.c @@ -2,7 +2,7 @@ * * logger.c: - log events into log file or stderr. * - * Copyright (c) 2017-2017, Postgres Professional + * Copyright (c) 2017-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/utils/logger.h b/src/utils/logger.h index e1feb86c..15ec38f1 100644 --- a/src/utils/logger.h +++ b/src/utils/logger.h @@ -2,7 +2,7 @@ * * logger.h: - prototypes of logger functions. * - * Copyright (c) 2017-2017, Postgres Professional + * Copyright (c) 2017-2018, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/utils/pgut.c b/src/utils/pgut.c index ec3fd8bb..b789a326 100644 --- a/src/utils/pgut.c +++ b/src/utils/pgut.c @@ -3,7 +3,7 @@ * pgut.c * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2017-2017, Postgres Professional + * Portions Copyright (c) 2017-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -1655,8 +1655,8 @@ pgut_disconnect(PGconn *conn) PGresult * -pgut_execute_parallel(PGconn* conn, - PGcancel* thread_cancel_conn, const char *query, +pgut_execute_parallel(PGconn* conn, + PGcancel* thread_cancel_conn, const char *query, int nParams, const char **params, bool text_result) { diff --git a/src/utils/pgut.h b/src/utils/pgut.h index 9aac75ca..6e9a50fd 100644 --- a/src/utils/pgut.h +++ b/src/utils/pgut.h @@ -3,7 +3,7 @@ * pgut.h * * Portions Copyright (c) 2009-2013, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2017-2017, Postgres Professional + * Portions Copyright (c) 2017-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -115,7 +115,7 @@ extern PGresult *pgut_execute(PGconn* conn, const char *query, int nParams, const char **params); extern PGresult *pgut_execute_extended(PGconn* conn, const char *query, int nParams, const char **params, bool text_result, bool ok_error); -extern PGresult *pgut_execute_parallel(PGconn* conn, PGcancel* thread_cancel_conn, +extern PGresult *pgut_execute_parallel(PGconn* conn, PGcancel* thread_cancel_conn, const char *query, int nParams, const char **params, bool text_result); extern bool pgut_send(PGconn* conn, const char *query, int nParams, const char **params, int elevel); diff --git a/src/validate.c b/src/validate.c index f61bcb79..55ea5ebe 100644 --- a/src/validate.c +++ b/src/validate.c @@ -3,7 +3,7 @@ * validate.c: validate backup files. * * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION - * Portions Copyright (c) 2015-2017, Postgres Professional + * Portions Copyright (c) 2015-2018, Postgres Professional * *------------------------------------------------------------------------- */ @@ -19,8 +19,6 @@ static void *pgBackupValidateFiles(void *arg); static void do_validate_instance(void); static bool corrupted_backup_found = false; -/* Program version of a current backup */ -static uint32 validate_backup_version = 0; typedef struct { @@ -28,6 +26,7 @@ typedef struct bool corrupted; XLogRecPtr stop_lsn; uint32 checksum_version; + uint32 backup_version; /* * Return value from the thread. @@ -92,11 +91,6 @@ pgBackupValidate(pgBackup *backup) pg_atomic_clear_flag(&file->lock); } - /* - * We use program version to calculate checksum in pgBackupValidateFiles() - */ - validate_backup_version = parse_program_version(backup->program_version); - /* init thread args with own file lists */ threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); threads_args = (validate_files_arg *) @@ -111,6 +105,7 @@ pgBackupValidate(pgBackup *backup) arg->corrupted = false; arg->stop_lsn = backup->stop_lsn; arg->checksum_version = backup->checksum_version; + arg->backup_version = parse_program_version(backup->program_version); /* By default there are some error */ threads_args[i].ret = 1; @@ -213,30 +208,42 @@ pgBackupValidateFiles(void *arg) } /* - * Pre 2.0.22 we use CRC-32C, but in newer version of pg_probackup we - * use CRC-32. - * - * pg_control stores its content and checksum of the content, calculated - * using CRC-32C. If we calculate checksum of the whole pg_control using - * CRC-32C we get same checksum constantly. It might be because of the - * CRC-32C algorithm. - * To avoid this problem we need to use different algorithm, CRC-32 in - * this case. + * If option skip-block-validation is set, compute only file-level CRC for + * datafiles, otherwise check them block by block. */ - crc = pgFileGetCRC(file->path, validate_backup_version <= 20021); - if (crc != file->crc) + if (!file->is_datafile || skip_block_validation) { - elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", - file->path, file->crc, crc); - arguments->corrupted = true; - - /* validate relation blocks */ - if (file->is_datafile) + /* + * Pre 2.0.22 we use CRC-32C, but in newer version of pg_probackup we + * use CRC-32. + * + * pg_control stores its content and checksum of the content, calculated + * using CRC-32C. If we calculate checksum of the whole pg_control using + * CRC-32C we get same checksum constantly. It might be because of the + * CRC-32C algorithm. + * To avoid this problem we need to use different algorithm, CRC-32 in + * this case. + */ + crc = pgFileGetCRC(file->path, arguments->backup_version <= 20021); + if (crc != file->crc) { - if (!check_file_pages(file, arguments->stop_lsn, arguments->checksum_version)) - arguments->corrupted = true; + elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", + file->path, file->crc, crc); + arguments->corrupted = true; } } + else + { + /* + * validate relation block by block + * check page headers, checksums (if enabled) + * and compute checksum of the file + */ + if (!check_file_pages(file, arguments->stop_lsn, + arguments->checksum_version, + arguments->backup_version)) + arguments->corrupted = true; + } } /* Data files validation is successful */ diff --git a/tests/archive.py b/tests/archive.py index 8b8eb71a..4ed783d6 100644 --- a/tests/archive.py +++ b/tests/archive.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta import subprocess from sys import exit from time import sleep +from shutil import copyfile module_name = 'archive' @@ -39,8 +40,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): result = node.safe_psql("postgres", "SELECT * FROM t_heap") self.backup_node( - backup_dir, 'node', node, - options=["--log-level-file=verbose"]) + backup_dir, 'node', node) node.cleanup() self.restore_node( @@ -53,8 +53,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): # Make backup self.backup_node( - backup_dir, 'node', node, - options=["--log-level-file=verbose"]) + backup_dir, 'node', node) node.cleanup() # Restore Database @@ -253,7 +252,6 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): backup_dir, 'node', node, options=[ "--archive-timeout=60", - "--log-level-file=verbose", "--stream"] ) # we should die here because exception is what we expect to happen @@ -402,7 +400,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): self.del_test_dir(module_name, fname) # @unittest.expectedFailure - # @unittest.skip("skip") + @unittest.skip("skip") def test_replica_archive(self): """ make node without archiving, take stream backup and @@ -417,7 +415,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): initdb_params=['--data-checksums'], pg_options={ 'max_wal_senders': '2', - 'checkpoint_timeout': '30s', + 'archive_timeout': '10s', 'max_wal_size': '1GB'} ) self.init_pb(backup_dir) @@ -433,7 +431,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): "postgres", "create table t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,256) i") + "from generate_series(0,2560) i") self.backup_node(backup_dir, 'master', master, options=['--stream']) before = master.safe_psql("postgres", "SELECT * FROM t_heap") @@ -459,9 +457,6 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): "md5(repeat(i::text,10))::tsvector as tsvector " "from generate_series(256,512) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") - # ADD INSTANCE 'REPLICA' - - sleep(1) backup_id = self.backup_node( backup_dir, 'replica', replica, @@ -469,7 +464,9 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): '--archive-timeout=30', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)]) + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) @@ -493,16 +490,28 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): "postgres", "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(512,768) i") + "from generate_series(512,20680) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") + + master.safe_psql( + "postgres", + "CHECKPOINT") + +# copyfile( +# os.path.join(backup_dir, 'wal/master/000000010000000000000002'), +# os.path.join(backup_dir, 'wal/replica/000000010000000000000002')) + backup_id = self.backup_node( backup_dir, 'replica', replica, backup_type='page', options=[ - '--archive-timeout=30', '--log-level-file=verbose', - '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)] - ) + '--archive-timeout=30', + '--master-db=postgres', + '--master-host=localhost', + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) @@ -511,8 +520,10 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): node.cleanup() self.restore_node( backup_dir, 'replica', data_dir=node.data_dir, backup_id=backup_id) + node.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(node.port)) + node.slow_start() # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") @@ -537,7 +548,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'checkpoint_timeout': '30s'} + 'archive_timeout': '10s'} ) replica = self.make_simple_node( base_dir="{0}/{1}/replica".format(module_name, fname)) @@ -568,7 +579,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): pgdata_replica = self.pgdata_content(replica.data_dir) self.compare_pgdata(pgdata_master, pgdata_replica) - self.set_replica(master, replica, synchronous=True) + self.set_replica(master, replica) # ADD INSTANCE REPLICA self.add_instance(backup_dir, 'replica', replica) # SET ARCHIVING FOR REPLICA @@ -579,16 +590,26 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): after = replica.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) + master.psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0, 60000) i") + # TAKE FULL ARCHIVE BACKUP FROM REPLICA + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000001'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000001')) + backup_id = self.backup_node( backup_dir, 'replica', replica, options=[ - '--archive-timeout=20', - '--log-level-file=verbose', + '--archive-timeout=30', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)] - ) + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) @@ -618,7 +639,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase): set_replication=True, initdb_params=['--data-checksums'], pg_options={ - 'checkpoint_timeout': '30s'} + 'checkpoint_timeout': '30s', + 'archive_timeout': '10s'} ) replica = self.make_simple_node( base_dir="{0}/{1}/replica".format(module_name, fname)) diff --git a/tests/backup_test.py b/tests/backup_test.py index b21aab9b..9c8a0955 100644 --- a/tests/backup_test.py +++ b/tests/backup_test.py @@ -328,7 +328,7 @@ class BackupTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type="full", - options=["-j", "4", "--stream", '--log-level-file=verbose']) + options=["-j", "4", "--stream", "--log-level-file=verbose"]) # open log file and check with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: diff --git a/tests/compatibility.py b/tests/compatibility.py index 3d67bf3e..39070b3f 100644 --- a/tests/compatibility.py +++ b/tests/compatibility.py @@ -94,8 +94,7 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase): pgbench.stdout.close() self.backup_node( - backup_dir, 'node', node, backup_type='page', - options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='page') if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -195,8 +194,7 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase): pgbench.stdout.close() self.backup_node( - backup_dir, 'node', node, backup_type='delta', - options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='delta') if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -296,8 +294,7 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase): pgbench.stdout.close() self.backup_node( - backup_dir, 'node', node, backup_type='delta', - options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='delta') if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -311,3 +308,162 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase): if self.paranoia: pgdata_restored = self.pgdata_content(node_restored.data_dir) self.compare_pgdata(pgdata, pgdata_restored) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_backward_compatibility_compression(self): + """Description in jira issue PGPRO-434""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.add_instance(backup_dir, 'node', node, old_binary=True) + + self.set_archiving(backup_dir, 'node', node, old_binary=True) + node.slow_start() + + node.pgbench_init(scale=10) + + # FULL backup with OLD binary + backup_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + # restore OLD FULL with new binary + node_restored = self.make_simple_node( + base_dir="{0}/{1}/node_restored".format(module_name, fname)) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # PAGE backup with OLD binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='page', + old_binary=True, + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # PAGE backup with new binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='page', + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Delta backup with old binary + self.delete_pb(backup_dir, 'node', backup_id) + + self.backup_node( + backup_dir, 'node', node, + old_binary=True, + options=['--compress']) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=['--compress'], + old_binary=True) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Delta backup with new binary + pgbench = node.pgbench( + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + options=["-c", "4", "-T", "10"]) + + pgbench.wait() + pgbench.stdout.close() + + self.backup_node( + backup_dir, 'node', node, + backup_type='delta', + options=['--compress']) + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + node_restored.cleanup() + + self.restore_node( + backup_dir, 'node', node_restored, + options=["-j", "4"]) + + if self.paranoia: + pgdata_restored = self.pgdata_content(node_restored.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) diff --git a/tests/compression.py b/tests/compression.py index aa275382..2e712a15 100644 --- a/tests/compression.py +++ b/tests/compression.py @@ -55,9 +55,7 @@ class CompressionTest(ProbackupTest, unittest.TestCase): page_backup_id = self.backup_node( backup_dir, 'node', node, backup_type='page', options=[ - '--stream', '--compress-algorithm=zlib', - '--log-level-console=verbose', - '--log-level-file=verbose']) + '--stream', '--compress-algorithm=zlib']) # PTRACK BACKUP node.safe_psql( @@ -494,3 +492,68 @@ class CompressionTest(ProbackupTest, unittest.TestCase): # Clean after yourself self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_uncompressable_pages(self): + """ + make archive node, create table with uncompressable toast pages, + take backup with compression, make sure that page was not compressed, + restore backup and check data correctness + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'wal_level': 'replica', + 'max_wal_senders': '2', + 'checkpoint_timeout': '30s'} + ) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + +# node.safe_psql( +# "postgres", +# "create table t_heap as select i, " +# "repeat('1234567890abcdefghiyklmn', 1)::bytea, " +# "point(0,0) from generate_series(0,1) i") + + node.safe_psql( + "postgres", + "create table t as select i, " + "repeat(md5(i::text),5006056) as fat_attr " + "from generate_series(0,10) i;") + + self.backup_node( + backup_dir, 'node', node, + backup_type='full', + options=[ + '--compress']) + + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + node.slow_start() + + self.backup_node( + backup_dir, 'node', node, + backup_type='full', + options=[ + '--compress']) + + # Clean after yourself + # self.del_test_dir(module_name, fname) + +# create table t as select i, repeat(md5('1234567890'), 1)::bytea, point(0,0) from generate_series(0,1) i; + + +# create table t_bytea_1(file oid); +# INSERT INTO t_bytea_1 (file) +# VALUES (lo_import('/home/gsmol/git/postgres/contrib/pg_probackup/tests/expected/sample.random', 24593)); +# insert into t_bytea select string_agg(data,'') from pg_largeobject where pageno > 0; +# \ No newline at end of file diff --git a/tests/delta.py b/tests/delta.py index bdbfac91..55cc03be 100644 --- a/tests/delta.py +++ b/tests/delta.py @@ -80,13 +80,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): pgdata = self.pgdata_content(node.data_dir) self.restore_node( - backup_dir, - 'node', - node_restored, - options=[ - "-j", "1", - "--log-level-file=verbose" - ] + backup_dir, 'node', node_restored ) # Physical comparison @@ -176,8 +170,6 @@ class DeltaTest(ProbackupTest, unittest.TestCase): 'node', node_restored, options=[ - "-j", "1", - "--log-level-file=verbose", "-T", "{0}={1}".format( old_tablespace, new_tablespace)] ) @@ -251,13 +243,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): pgdata = self.pgdata_content(node.data_dir) self.restore_node( - backup_dir, - 'node', - node_restored, - options=[ - "-j", "1", - "--log-level-file=verbose" - ] + backup_dir, 'node', node_restored ) # Physical comparison @@ -683,7 +669,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): node_restored, backup_id=backup_id, options=[ - "-j", "4", "--log-level-file=verbose", + "-j", "4", "--immediate", "--recovery-target-action=promote"]) @@ -717,7 +703,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): node_restored, backup_id=backup_id, options=[ - "-j", "4", "--log-level-file=verbose", + "-j", "4", "--immediate", "--recovery-target-action=promote"] ) @@ -815,7 +801,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): backup_id = self.backup_node( backup_dir, 'node', node, backup_type='delta', - options=["--stream", "--log-level-file=verbose"] + options=["--stream"] ) # if self.paranoia: # pgdata_delta = self.pgdata_content( @@ -844,7 +830,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): node_restored, backup_id=backup_id, options=[ - "-j", "4", "--log-level-file=verbose", + "-j", "4", "--immediate", "--recovery-target-action=promote"]) @@ -1135,7 +1121,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase): self.del_test_dir(module_name, fname) # @unittest.skip("skip") - def test_page_corruption_heal_via_ptrack_1(self): + def test_delta_corruption_heal_via_ptrack_1(self): """make node, corrupt some page, check that backup failed""" fname = self.id().split('.')[3] node = self.make_simple_node( @@ -1174,8 +1160,10 @@ class DeltaTest(ProbackupTest, unittest.TestCase): f.close self.backup_node( - backup_dir, 'node', node, backup_type="delta", - options=["-j", "4", "--stream", "--log-level-file=verbose"]) + backup_dir, 'node', node, + backup_type="delta", + options=["-j", "4", "--stream", '--log-level-file=verbose']) + # open log file and check with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f: diff --git a/tests/exclude.py b/tests/exclude.py index 48b7889c..3fd3341f 100644 --- a/tests/exclude.py +++ b/tests/exclude.py @@ -143,7 +143,7 @@ class ExcludeTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + options=['--stream'] ) pgdata = self.pgdata_content(node.data_dir) diff --git a/tests/expected/option_help.out b/tests/expected/option_help.out index 228598ed..ecc59a89 100644 --- a/tests/expected/option_help.out +++ b/tests/expected/option_help.out @@ -50,6 +50,7 @@ pg_probackup - utility to manage backup/recovery of PostgreSQL database. [--master-db=db_name] [--master-host=host_name] [--master-port=port] [--master-user=user_name] [--replica-timeout=timeout] + [--skip-block-validation] pg_probackup restore -B backup-path --instance=instance_name [-D pgdata-path] [-i backup-id] [--progress] @@ -59,12 +60,14 @@ pg_probackup - utility to manage backup/recovery of PostgreSQL database. [--recovery-target-action=pause|promote|shutdown] [--restore-as-replica] [--no-validate] + [--skip-block-validation] pg_probackup validate -B backup-path [--instance=instance_name] [-i backup-id] [--progress] [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]] [--recovery-target-name=target-name] [--timeline=timeline] + [--skip-block-validation] pg_probackup show -B backup-path [--instance=instance_name [-i backup-id]] diff --git a/tests/expected/option_version.out b/tests/expected/option_version.out index cb0a30d4..5280a6f9 100644 --- a/tests/expected/option_version.out +++ b/tests/expected/option_version.out @@ -1 +1 @@ -pg_probackup 2.0.22 \ No newline at end of file +pg_probackup 2.0.24 \ No newline at end of file diff --git a/tests/false_positive.py b/tests/false_positive.py index 04062b79..df7b1334 100644 --- a/tests/false_positive.py +++ b/tests/false_positive.py @@ -143,7 +143,7 @@ class FalsePositive(ProbackupTest, unittest.TestCase): self.backup_node(backup_dir, 'node', node, options=['--stream']) gdb = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'], + options=['--stream'], gdb=True ) @@ -227,7 +227,7 @@ class FalsePositive(ProbackupTest, unittest.TestCase): self.backup_node(backup_dir, 'node', node, options=['--stream']) gdb = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'], + options=['--stream'], gdb=True ) diff --git a/tests/merge.py b/tests/merge.py index 0169b275..5f7ae7da 100644 --- a/tests/merge.py +++ b/tests/merge.py @@ -2,7 +2,7 @@ import unittest import os -from .helpers.ptrack_helpers import ProbackupTest +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException module_name = "merge" @@ -407,17 +407,17 @@ class MergeTest(ProbackupTest, unittest.TestCase): node.safe_psql( "postgres", "delete from t_heap where ctid >= '(11,0)'") + node.safe_psql( "postgres", "vacuum t_heap") - self.backup_node( + page_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack') if self.paranoia: pgdata = self.pgdata_content(node.data_dir) - page_id = self.show_pb(backup_dir, "node")[1]["id"] self.merge_backup(backup_dir, "node", page_id) self.validate_pb(backup_dir) @@ -602,7 +602,7 @@ class MergeTest(ProbackupTest, unittest.TestCase): gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) - gdb.set_breakpoint('move_file') + gdb.set_breakpoint('copy_file') gdb.run_until_break() if gdb.continue_execution_until_break(20) != 'breakpoint-hit': @@ -615,3 +615,132 @@ class MergeTest(ProbackupTest, unittest.TestCase): # Try to continue failed MERGE self.merge_backup(backup_dir, "node", backup_id) + + # Drop node and restore it + node.cleanup() + self.restore_node(backup_dir, 'node', node) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_continue_failed_merge_with_corrupted_delta_backup(self): + """ + Fail merge via gdb, corrupt DELTA backup, try to continue merge + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir="{0}/{1}/node".format(module_name, fname), + set_replication=True, initdb_params=['--data-checksums'], + pg_options={ + 'wal_level': 'replica' + } + ) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.start() + + # FULL backup + self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select i as id," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(0,1000) i" + ) + + old_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + # DELTA BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='delta' + ) + + node.safe_psql( + "postgres", + "update t_heap set id = 100500" + ) + + node.safe_psql( + "postgres", + "vacuum full t_heap" + ) + + new_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + + # DELTA BACKUP + backup_id_2 = self.backup_node( + backup_dir, 'node', node, backup_type='delta' + ) + + backup_id = self.show_pb(backup_dir, "node")[1]["id"] + + # Failed MERGE + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + gdb.set_breakpoint('copy_file') + gdb.run_until_break() + + if gdb.continue_execution_until_break(2) != 'breakpoint-hit': + print('Failed to hit breakpoint') + exit(1) + + gdb._execute('signal SIGKILL') + + # CORRUPT incremental backup + # read block from future + # block_size + backup_header = 8200 + file = os.path.join( + backup_dir, 'backups/node', backup_id_2, 'database', new_path) + with open(file, 'rb') as f: + f.seek(8200) + block_1 = f.read(8200) + f.close + + # write block from future + file = os.path.join( + backup_dir, 'backups/node', backup_id, 'database', old_path) + with open(file, 'r+b') as f: + f.seek(8200) + f.write(block_1) + f.close + + # Try to continue failed MERGE + try: + self.merge_backup(backup_dir, "node", backup_id) + self.assertEqual( + 1, 0, + "Expecting Error because of incremental backup corruption.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "WARNING: Backup {0} data files are corrupted".format( + backup_id) in e.message and + "ERROR: Merging of backup {0} failed".format( + backup_id) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + +# 1. always use parent link when merging (intermediates may be from different chain) +# 2. page backup we are merging with may disappear after failed merge, +# it should not be possible to continue merge after that +# PAGE_A MERGING (disappear) +# FULL MERGING + +# FULL MERGING + +# PAGE_B OK (new backup) +# FULL MERGING + +# 3. Need new test with corrupted FULL backup diff --git a/tests/page.py b/tests/page.py index 3d19a81d..d31b8f60 100644 --- a/tests/page.py +++ b/tests/page.py @@ -62,8 +62,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): "vacuum t_heap") self.backup_node( - backup_dir, 'node', node, backup_type='page', - options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='page') self.backup_node( backup_dir, 'node', node, backup_type='page') @@ -333,8 +332,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): result = node.safe_psql("postgres", "select * from pgbench_accounts") # PAGE BACKUP self.backup_node( - backup_dir, 'node', node, backup_type='page', - options=["--log-level-file=verbose"]) + backup_dir, 'node', node, backup_type='page') # GET PHYSICAL CONTENT FROM NODE pgdata = self.pgdata_content(node.data_dir) @@ -727,7 +725,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='page', - options=["-j", "4", '--log-level-file=verbose']) + options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of wal segment disappearance.\n " @@ -797,8 +795,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): # Single-thread PAGE backup try: self.backup_node( - backup_dir, 'node', node, - backup_type='page', options=['--log-level-file=verbose']) + backup_dir, 'node', node, backup_type='page') self.assertEqual( 1, 0, "Expecting Error because of wal segment disappearance.\n " @@ -936,6 +933,8 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): 'INFO: Wait for LSN' in e.message and 'in archived WAL segment' in e.message and 'could not read WAL record at' in e.message and + 'WAL file is from different database system: WAL file database system identifier is' in e.message and + 'pg_control database system identifier is' in e.message and 'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format( file_destination) in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( @@ -961,6 +960,8 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): 'INFO: Wait for LSN' in e.message and 'in archived WAL segment' in e.message and 'could not read WAL record at' in e.message and + 'WAL file is from different database system: WAL file database system identifier is' in e.message and + 'pg_control database system identifier is' in e.message and 'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format( file_destination) in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( diff --git a/tests/ptrack.py b/tests/ptrack.py index 72159318..5d01d882 100644 --- a/tests/ptrack.py +++ b/tests/ptrack.py @@ -157,13 +157,13 @@ class PtrackTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + options=['--stream'] ) pgdata = self.pgdata_content(node.data_dir) self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + options=['--stream'] ) self.restore_node( @@ -246,14 +246,11 @@ class PtrackTest(ProbackupTest, unittest.TestCase): exit(1) self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack') self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack') + if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -336,14 +333,10 @@ class PtrackTest(ProbackupTest, unittest.TestCase): ) self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack') self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=['--log-level-file=verbose'] - ) + backup_dir, 'node', node, backup_type='ptrack') if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -409,7 +402,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + options=['--stream'] ) node.safe_psql( @@ -479,7 +472,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): self.backup_node(backup_dir, 'node', node, options=['--stream']) gdb = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'], + options=['--stream'], gdb=True ) @@ -566,7 +559,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): ptrack_backup_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['--stream', '--log-level-file=verbose'] + options=['--stream'] ) if self.paranoia: @@ -989,7 +982,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): node.safe_psql("postgres", "SELECT * FROM t_heap") self.backup_node( backup_dir, 'node', node, - options=["--stream", "--log-level-file=verbose"]) + options=["--stream"]) # CREATE DATABASE DB1 node.safe_psql("postgres", "create database db1") @@ -1002,7 +995,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): backup_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"] + options=["--stream"] ) if self.paranoia: @@ -1133,7 +1126,8 @@ class PtrackTest(ProbackupTest, unittest.TestCase): '-j10', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(node.port) + '--master-port={0}'.format(node.port), + '--stream' ] ) @@ -1229,7 +1223,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"] + options=["--stream"] ) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -1315,7 +1309,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): # PTRACK BACKUP self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", '--log-level-file=verbose']) + options=["--stream"]) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -1476,7 +1470,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): # FIRTS PTRACK BACKUP self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"]) + options=["--stream"]) # GET PHYSICAL CONTENT FROM NODE if self.paranoia: @@ -1517,7 +1511,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase): # SECOND PTRACK BACKUP self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=["--stream", "--log-level-file=verbose"]) + options=["--stream"]) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) @@ -1612,9 +1606,8 @@ class PtrackTest(ProbackupTest, unittest.TestCase): #result = node.safe_psql("postgres", "select * from pgbench_accounts") # FIRTS PTRACK BACKUP self.backup_node( - backup_dir, 'node', node, backup_type='ptrack', - options=["--log-level-file=verbose"] - ) + backup_dir, 'node', node, backup_type='ptrack') + # GET PHYSICAL CONTENT FROM NODE pgdata = self.pgdata_content(node.data_dir) @@ -1683,9 +1676,8 @@ class PtrackTest(ProbackupTest, unittest.TestCase): self.backup_node( backup_dir, 'node', node, backup_type='ptrack', options=[ - "--stream", "-j 30", - "--log-level-file=verbose"] - ) + "--stream", "-j 30"]) + # we should die here because exception is what we expect to happen self.assertEqual( 1, 0, diff --git a/tests/ptrack_clean.py b/tests/ptrack_clean.py index ae16c662..076291a6 100644 --- a/tests/ptrack_clean.py +++ b/tests/ptrack_clean.py @@ -76,7 +76,7 @@ class SimpleTest(ProbackupTest, unittest.TestCase): # Take PTRACK backup to clean every ptrack backup_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['-j10', '--log-level-file=verbose']) + options=['-j10']) node.safe_psql('postgres', 'checkpoint') for i in idx_ptrack: diff --git a/tests/ptrack_empty.py b/tests/ptrack_empty.py index 750a7336..8656f941 100644 --- a/tests/ptrack_empty.py +++ b/tests/ptrack_empty.py @@ -67,7 +67,7 @@ class SimpleTest(ProbackupTest, unittest.TestCase): # Take PTRACK backup backup_id = self.backup_node( backup_dir, 'node', node, backup_type='ptrack', - options=['-j10', '--log-level-file=verbose']) + options=['-j10']) if self.paranoia: pgdata = self.pgdata_content(node.data_dir) diff --git a/tests/replica.py b/tests/replica.py index d74c375c..1ab8515e 100644 --- a/tests/replica.py +++ b/tests/replica.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta import subprocess from sys import exit import time +from shutil import copyfile module_name = 'replica' @@ -64,6 +65,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "from generate_series(256,512) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") self.add_instance(backup_dir, 'replica', replica) + backup_id = self.backup_node( backup_dir, 'replica', replica, options=[ @@ -80,9 +82,11 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): base_dir="{0}/{1}/node".format(module_name, fname)) node.cleanup() self.restore_node(backup_dir, 'replica', data_dir=node.data_dir) + node.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(node.port)) node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) @@ -95,7 +99,9 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " "from generate_series(512,768) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") + backup_id = self.backup_node( backup_dir, 'replica', replica, backup_type='ptrack', options=[ @@ -111,9 +117,11 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): node.cleanup() self.restore_node( backup_dir, 'replica', data_dir=node.data_dir, backup_id=backup_id) + node.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(node.port)) node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) @@ -136,13 +144,12 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): pg_options={ 'wal_level': 'replica', 'max_wal_senders': '2', - 'checkpoint_timeout': '30s'} + 'checkpoint_timeout': '30s', + 'archive_timeout': '10s'} ) self.init_pb(backup_dir) self.add_instance(backup_dir, 'master', master) self.set_archiving(backup_dir, 'master', master) - # force more frequent wal switch - master.append_conf('postgresql.auto.conf', 'archive_timeout = 10') master.slow_start() replica = self.make_simple_node( @@ -155,7 +162,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "postgres", "create table t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,256) i") + "from generate_series(0,2560) i") before = master.safe_psql("postgres", "SELECT * FROM t_heap") @@ -166,6 +173,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): # Settings for Replica self.set_replica(master, replica) self.set_archiving(backup_dir, 'replica', replica, replica=True) + replica.slow_start(replica=True) # Check data correctness on replica @@ -179,16 +187,32 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "postgres", "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(256,512) i") + "from generate_series(256,5120) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") self.add_instance(backup_dir, 'replica', replica) + + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000003'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000003')) + + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000004'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000004')) + + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000005'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000005')) + backup_id = self.backup_node( backup_dir, 'replica', replica, options=[ - '--archive-timeout=300', + '--archive-timeout=30', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)]) + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) @@ -201,9 +225,11 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): node.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(node.port)) node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) + node.cleanup() # Change data on master, make PAGE backup from replica, # restore taken backup and check that restored data equal @@ -212,30 +238,42 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): "postgres", "insert into t_heap as select i as id, md5(i::text) as text, " "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(512,768) i") + "from generate_series(512,22680) i") + before = master.safe_psql("postgres", "SELECT * FROM t_heap") + backup_id = self.backup_node( - backup_dir, 'replica', replica, backup_type='page', + backup_dir, 'replica', + replica, backup_type='page', options=[ - '--archive-timeout=300', + '--archive-timeout=30', '--master-host=localhost', '--master-db=postgres', - '--master-port={0}'.format(master.port)]) + '--master-port={0}'.format(master.port), + '--stream']) + self.validate_pb(backup_dir, 'replica') self.assertEqual( 'OK', self.show_pb(backup_dir, 'replica', backup_id)['status']) # RESTORE PAGE BACKUP TAKEN FROM replica - node.cleanup() self.restore_node( backup_dir, 'replica', data_dir=node.data_dir, backup_id=backup_id) + node.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(node.port)) + node.append_conf( + 'postgresql.auto.conf', 'archive_mode = off') node.slow_start() + # CHECK DATA CORRECTNESS after = node.safe_psql("postgres", "SELECT * FROM t_heap") self.assertEqual(before, after) + self.add_instance(backup_dir, 'node', node) + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + # Clean after yourself self.del_test_dir(module_name, fname) @@ -279,15 +317,217 @@ class ReplicaTest(ProbackupTest, unittest.TestCase): backup_id = self.backup_node( backup_dir, 'master', master, backup_type='page') self.restore_node( - backup_dir, 'master', replica, - options=['-R', '--recovery-target-action=promote']) + backup_dir, 'master', replica, options=['-R']) # Settings for Replica - # self.set_replica(master, replica) self.set_archiving(backup_dir, 'replica', replica, replica=True) replica.append_conf( 'postgresql.auto.conf', 'port = {0}'.format(replica.port)) - replica.start() + replica.append_conf( + 'postgresql.auto.conf', 'hot_standby = on') + + replica.slow_start(replica=True) + + self.add_instance(backup_dir, 'replica', replica) + + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000003'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000003')) + + self.backup_node(backup_dir, 'replica', replica) # Clean after yourself self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_take_backup_from_delayed_replica(self): + """ + make archive master, take full backups from master, + restore full backup as delayed replica, launch pgbench, + take FULL, PAGE and DELTA backups from replica + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir="{0}/{1}/master".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'wal_level': 'replica', 'max_wal_senders': '2', + 'checkpoint_timeout': '30s'} + ) + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + self.set_archiving(backup_dir, 'master', master) + # force more frequent wal switch + #master.append_conf('postgresql.auto.conf', 'archive_timeout = 10') + master.slow_start() + + replica = self.make_simple_node( + base_dir="{0}/{1}/replica".format(module_name, fname)) + replica.cleanup() + + self.backup_node(backup_dir, 'master', master) + + self.restore_node( + backup_dir, 'master', replica, options=['-R']) + + # Settings for Replica + self.add_instance(backup_dir, 'replica', replica) + self.set_archiving(backup_dir, 'replica', replica, replica=True) + + # stupid hack + copyfile( + os.path.join(backup_dir, 'wal/master/000000010000000000000001'), + os.path.join(backup_dir, 'wal/replica/000000010000000000000001')) + + replica.append_conf( + 'postgresql.auto.conf', 'port = {0}'.format(replica.port)) + + replica.append_conf( + 'postgresql.auto.conf', 'hot_standby = on') + + replica.append_conf( + 'recovery.conf', "recovery_min_apply_delay = '300s'") + + replica.slow_start(replica=True) + + master.pgbench_init(scale=10) + + pgbench = master.pgbench( + options=['-T', '30', '-c', '2', '--no-vacuum']) + + self.backup_node( + backup_dir, 'replica', replica) + + self.backup_node( + backup_dir, 'replica', replica, + data_dir=replica.data_dir, backup_type='page') + + self.backup_node( + backup_dir, 'replica', replica, backup_type='delta') + + pgbench.wait() + + pgbench = master.pgbench( + options=['-T', '30', '-c', '2', '--no-vacuum']) + + self.backup_node( + backup_dir, 'replica', replica, + options=['--stream']) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='page', options=['--stream']) + + self.backup_node( + backup_dir, 'replica', replica, + backup_type='delta', options=['--stream']) + + pgbench.wait() + + # Clean after yourself + self.del_test_dir(module_name, fname) + + @unittest.skip("skip") + def test_make_block_from_future(self): + """ + make archive master, take full backups from master, + restore full backup as replica, launch pgbench, + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + master = self.make_simple_node( + base_dir="{0}/{1}/master".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'wal_level': 'replica', 'max_wal_senders': '2', + 'checkpoint_timeout': '30s'} + ) + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'master', master) + self.set_archiving(backup_dir, 'master', master) + # force more frequent wal switch + #master.append_conf('postgresql.auto.conf', 'archive_timeout = 10') + master.slow_start() + + replica = self.make_simple_node( + base_dir="{0}/{1}/replica".format(module_name, fname)) + replica.cleanup() + + self.backup_node(backup_dir, 'master', master) + + self.restore_node( + backup_dir, 'master', replica, options=['-R']) + + # Settings for Replica + self.set_archiving(backup_dir, 'replica', replica, replica=True) + replica.append_conf( + 'postgresql.auto.conf', 'port = {0}'.format(replica.port)) + replica.append_conf( + 'postgresql.auto.conf', 'hot_standby = on') + + replica.slow_start(replica=True) + + self.add_instance(backup_dir, 'replica', replica) + + replica.safe_psql( + 'postgres', + 'checkpoint') + + master.pgbench_init(scale=10) + + self.wait_until_replica_catch_with_master(master, replica) + + +# print(replica.safe_psql( +# 'postgres', +# 'select * from pg_catalog.pg_last_xlog_receive_location()')) +# +# print(replica.safe_psql( +# 'postgres', +# 'select * from pg_catalog.pg_last_xlog_replay_location()')) +# +# print(replica.safe_psql( +# 'postgres', +# 'select * from pg_catalog.pg_control_checkpoint()')) +# +# replica.safe_psql( +# 'postgres', +# 'checkpoint') + + pgbench = master.pgbench(options=['-T', '30', '-c', '2', '--no-vacuum']) + + time.sleep(5) + + #self.backup_node(backup_dir, 'replica', replica, options=['--stream']) + exit(1) + self.backup_node(backup_dir, 'replica', replica) + pgbench.wait() + + # pgbench + master.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, " + "md5(repeat(i::text,10))::tsvector as tsvector " + "from generate_series(0,256000) i") + + + master.safe_psql( + 'postgres', + 'checkpoint') + + replica.safe_psql( + 'postgres', + 'checkpoint') + + replica.safe_psql( + 'postgres', + 'select * from pg_') + + self.backup_node(backup_dir, 'replica', replica) + exit(1) + + # Clean after yourself + self.del_test_dir(module_name, fname) \ No newline at end of file diff --git a/tests/validate_test.py b/tests/validate_test.py index b3590de3..c0fd4943 100644 --- a/tests/validate_test.py +++ b/tests/validate_test.py @@ -50,7 +50,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): f.close self.backup_node( - backup_dir, 'node', node, options=["--log-level-file=verbose"]) + backup_dir, 'node', node, options=['--log-level-file=verbose']) log_file_path = os.path.join(backup_dir, "log", "pg_probackup.log") @@ -259,8 +259,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Simple validate try: self.validate_pb( - backup_dir, 'node', backup_id=backup_id_2, - options=['--log-level-file=verbose']) + backup_dir, 'node', backup_id=backup_id_2) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -364,8 +363,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate PAGE1 try: self.validate_pb( - backup_dir, 'node', backup_id=backup_id_2, - options=['--log-level-file=verbose']) + backup_dir, 'node', backup_id=backup_id_2) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -520,8 +518,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): try: self.validate_pb( backup_dir, 'node', - backup_id=backup_id_4, - options=['--log-level-file=verbose']) + backup_id=backup_id_4) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n" @@ -721,7 +718,6 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.validate_pb( backup_dir, 'node', options=[ - '--log-level-file=verbose', '-i', backup_id_4, '--xid={0}'.format(target_xid)]) self.assertEqual( 1, 0, @@ -866,7 +862,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate Instance try: self.validate_pb( - backup_dir, 'node', options=['--log-level-file=verbose']) + backup_dir, 'node') self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -1006,7 +1002,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate Instance try: - self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose']) + self.validate_pb(backup_dir, 'node') self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: @@ -1092,7 +1088,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate Instance try: - self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose']) + self.validate_pb(backup_dir, 'node') self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: @@ -1219,7 +1215,6 @@ class ValidateTest(ProbackupTest, unittest.TestCase): 'node', backup_id, options=[ - "--log-level-console=verbose", "--xid={0}".format(target_xid)]) self.assertEqual( 1, 0, @@ -1388,7 +1383,6 @@ class ValidateTest(ProbackupTest, unittest.TestCase): 'node', backup_id, options=[ - "--log-level-console=verbose", "--xid={0}".format(target_xid)]) self.assertEqual( 1, 0, @@ -1671,7 +1665,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(file_new, file) try: - self.validate_pb(backup_dir, options=['--log-level-file=verbose']) + self.validate_pb(backup_dir) except ProbackupException as e: self.assertIn( 'WARNING: Some backups are not valid'.format( @@ -1776,7 +1770,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(file, file_new) try: - self.validate_pb(backup_dir, options=['--log-level-file=verbose']) + self.validate_pb(backup_dir) except ProbackupException as e: self.assertIn( 'WARNING: Some backups are not valid'.format( @@ -3136,3 +3130,14 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.del_test_dir(module_name, fname) # validate empty backup list +# page from future during validate +# page from future during backup + +# corrupt block, so file become unaligned: +# 712 Assert(header.compressed_size <= BLCKSZ); +# 713 +# 714 read_len = fread(compressed_page.data, 1, +# 715 MAXALIGN(header.compressed_size), in); +# 716 if (read_len != MAXALIGN(header.compressed_size)) +# -> 717 elog(ERROR, "cannot read block %u of \"%s\" read %lu of %d", +# 718 blknum, file->path, read_len, header.compressed_size);