diff --git a/README.md b/README.md index a356b81b..b70ae8ad 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,13 @@ As compared to other backup solutions, `pg_probackup` offers the following benef * Implementing a single backup strategy for multi-server PostgreSQL clusters * Automatic data consistency checks and on-demand backup validation without actual data recovery * Managing backups in accordance with retention policy -* Running backup, restore, and validation processes on multiple parallel threads +* Merging incremental into full backups without actual data recovery +* Running backup, restore, merge and validation processes on multiple parallel threads * Storing backup data in a compressed state to save disk space * Taking backups from a standby server to avoid extra load on the master server * Extended logging settings * Custom commands to simplify WAL log archiving +* External to PGDATA directories, such as directories with config files and scripts, can be included in backup To manage backup data, `pg_probackup` creates a backup catalog. This directory stores all backup files with additional meta information, as well as WAL archives required for [point-in-time recovery](https://postgrespro.com/docs/postgresql/current/continuous-archiving.html). You can store backups for different instances in separate subdirectories of a single backup catalog. @@ -40,7 +42,6 @@ Regardless of the chosen backup type, all backups taken with `pg_probackup` supp * Creating backups from a remote server is currently not supported. * The server from which the backup was taken and the restored server must be compatible by the [block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#GUC-BLOCK-SIZE) and [wal_block_size](https://postgrespro.com/docs/postgresql/current/runtime-config-preset#GUC-WAL-BLOCK-SIZE) parameters and have the same major release number. * Microsoft Windows operating system support is in beta stage. -* Configuration files outside of PostgreSQL data directory are not included into the backup and should be backed up separately. ## Installation and Setup ### Linux Installation diff --git a/src/backup.c b/src/backup.c index 06baf08b..01b5d7be 100644 --- a/src/backup.c +++ b/src/backup.c @@ -117,6 +117,7 @@ static void *StreamLog(void *arg); static void get_remote_pgdata_filelist(parray *files); static void ReceiveFileList(parray* files, PGconn *conn, PGresult *res, int rownum); static void remote_copy_file(PGconn *conn, pgFile* file); +static void check_external_for_tablespaces(parray *external_list); /* Ptrack functions */ static void pg_ptrack_clear(void); @@ -407,7 +408,7 @@ remote_backup_files(void *arg) instance_config.pguser); /* check for interrupt */ - if (interrupted) + if (interrupted || thread_interrupted) elog(ERROR, "interrupted during backup"); query_str = psprintf("FILE_BACKUP FILEPATH '%s'",file->path); @@ -470,6 +471,7 @@ do_backup_instance(void) { int i; char database_path[MAXPGPATH]; + char external_prefix[MAXPGPATH]; /* Temp value. Used as template */ char dst_backup_path[MAXPGPATH]; char label[1024]; XLogRecPtr prev_backup_start_lsn = InvalidXLogRecPtr; @@ -482,9 +484,16 @@ do_backup_instance(void) pgBackup *prev_backup = NULL; parray *prev_backup_filelist = NULL; parray *backup_list = NULL; + parray *external_dirs = NULL; + pgFile *pg_control = NULL; elog(LOG, "Database backup start"); + if(current.external_dir_str) + { + external_dirs = make_external_directory_list(current.external_dir_str); + check_external_for_tablespaces(external_dirs); + } /* Initialize size summary */ current.data_bytes = 0; @@ -538,7 +547,7 @@ do_backup_instance(void) pgBackupGetPath(prev_backup, prev_backup_filelist_path, lengthof(prev_backup_filelist_path), DATABASE_FILE_LIST); /* Files of previous backup needed by DELTA backup */ - prev_backup_filelist = dir_read_file_list(NULL, prev_backup_filelist_path, FIO_BACKUP_HOST); + prev_backup_filelist = dir_read_file_list(NULL, NULL, prev_backup_filelist_path, FIO_BACKUP_HOST); /* If lsn is not NULL, only pages with higher lsn will be copied. */ prev_backup_start_lsn = prev_backup->start_lsn; @@ -580,6 +589,8 @@ do_backup_instance(void) pgBackupGetPath(¤t, database_path, lengthof(database_path), DATABASE_DIR); + pgBackupGetPath(¤t, external_prefix, lengthof(external_prefix), + EXTERNAL_DIR); /* start stream replication */ if (stream_wal) @@ -622,6 +633,7 @@ do_backup_instance(void) /* By default there are some error */ stream_thread_arg.ret = 1; + thread_interrupted = false; pthread_create(&stream_thread, NULL, StreamLog, &stream_thread_arg); } @@ -634,7 +646,18 @@ do_backup_instance(void) get_remote_pgdata_filelist(backup_files_list); else dir_list_file(backup_files_list, instance_config.pgdata, - true, true, false, FIO_DB_HOST); + true, true, false, 0, FIO_DB_HOST); + + /* + * Append to backup list all files and directories + * from external directory option + */ + if (external_dirs) + for (i = 0; i < parray_num(external_dirs); i++) + /* External dirs numeration starts with 1. + * 0 value is not external dir */ + dir_list_file(backup_files_list, parray_get(external_dirs, i), + false, true, false, i+1, FIO_DB_HOST); /* Sanity check for backup_files_list, thank you, Windows: * https://github.com/postgrespro/pg_probackup/issues/48 @@ -680,8 +703,7 @@ do_backup_instance(void) * where this backup has started. */ extractPageMap(arclog_path, current.tli, instance_config.xlog_seg_size, - prev_backup->start_lsn, current.start_lsn, - backup_files_list); + prev_backup->start_lsn, current.start_lsn); } else if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK) { @@ -703,18 +725,28 @@ do_backup_instance(void) { char dirpath[MAXPGPATH]; char *dir_name; - char database_path[MAXPGPATH]; if (!IsReplicationProtocol()) - dir_name = GetRelativePath(file->path, instance_config.pgdata); + if (file->external_dir_num) + dir_name = GetRelativePath(file->path, + parray_get(external_dirs, + file->external_dir_num - 1)); + else + dir_name = GetRelativePath(file->path, instance_config.pgdata); else dir_name = file->path; elog(VERBOSE, "Create directory \"%s\"", dir_name); - pgBackupGetPath(¤t, database_path, lengthof(database_path), - DATABASE_DIR); - join_path_components(dirpath, database_path, dir_name); + if (file->external_dir_num) + { + char temp[MAXPGPATH]; + snprintf(temp, MAXPGPATH, "%s%d", external_prefix, + file->external_dir_num); + join_path_components(dirpath, temp, dir_name); + } + else + join_path_components(dirpath, database_path, dir_name); fio_mkdir(dirpath, DIR_PERMISSION, FIO_BACKUP_HOST); } @@ -726,7 +758,7 @@ do_backup_instance(void) parray_qsort(backup_files_list, pgFileCompareSize); /* Sort the array for binary search */ if (prev_backup_filelist) - parray_qsort(prev_backup_filelist, pgFileComparePath); + parray_qsort(prev_backup_filelist, pgFileComparePathWithExternal); /* init thread args with own file lists */ threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); @@ -738,6 +770,8 @@ do_backup_instance(void) arg->from_root = instance_config.pgdata; arg->to_root = database_path; + arg->external_prefix = external_prefix; + arg->external_dirs = external_dirs; arg->files_list = backup_files_list; arg->prev_filelist = prev_backup_filelist; arg->prev_start_lsn = prev_backup_start_lsn; @@ -748,6 +782,7 @@ do_backup_instance(void) } /* Run threads */ + thread_interrupted = false; elog(INFO, "Start transfering data files"); for (i = 0; i < num_threads; i++) { @@ -831,7 +866,7 @@ do_backup_instance(void) /* Scan backup PG_XLOG_DIR */ xlog_files_list = parray_new(); join_path_components(pg_xlog_path, database_path, PG_XLOG_DIR); - dir_list_file(xlog_files_list, pg_xlog_path, false, true, false, FIO_BACKUP_HOST); + dir_list_file(xlog_files_list, pg_xlog_path, false, true, false, 0, FIO_BACKUP_HOST); for (i = 0; i < parray_num(xlog_files_list); i++) { @@ -853,7 +888,12 @@ do_backup_instance(void) } /* Print the list of files to backup catalog */ - write_backup_filelist(¤t, backup_files_list, instance_config.pgdata); + write_backup_filelist(¤t, backup_files_list, instance_config.pgdata, + NULL, external_dirs); + + /* clean external directories list */ + if (external_dirs) + free_dir_list(external_dirs); /* Compute summary of size of regular files in the backup */ for (i = 0; i < parray_num(backup_files_list); i++) @@ -982,6 +1022,13 @@ do_backup(time_t start_time) StrNCpy(current.program_version, PROGRAM_VERSION, sizeof(current.program_version)); + /* Save list of external directories */ + if (instance_config.external_dir_str && + pg_strcasecmp(instance_config.external_dir_str, "none") != 0) + { + current.external_dir_str = instance_config.external_dir_str; + } + /* Create backup directory and BACKUP_CONTROL_FILE */ if (pgBackupCreateDir(¤t)) elog(ERROR, "Cannot create backup directory"); @@ -1023,8 +1070,8 @@ do_backup(time_t start_time) * After successfil backup completion remove backups * which are expired according to retention policies */ - if (delete_expired || delete_wal) - do_retention_purge(); + if (delete_expired || merge_expired || delete_wal) + do_retention(); return 0; } @@ -2034,7 +2081,7 @@ pg_stop_backup(pgBackup *backup) */ if (backup_files_list) { - file = pgFileNew(backup_label, true, FIO_BACKUP_HOST); + file = pgFileNew(backup_label, true, 0, FIO_BACKUP_HOST); calc_file_checksum(file, FIO_BACKUP_HOST); free(file->path); file->path = strdup(PG_BACKUP_LABEL_FILE); @@ -2077,7 +2124,7 @@ pg_stop_backup(pgBackup *backup) if (backup_files_list) { - file = pgFileNew(tablespace_map, true, FIO_BACKUP_HOST); + file = pgFileNew(tablespace_map, true, 0, FIO_BACKUP_HOST); if (S_ISREG(file->mode)) calc_file_checksum(file, FIO_BACKUP_HOST); free(file->path); @@ -2242,7 +2289,7 @@ backup_files(void *arg) elog(VERBOSE, "Copying file: \"%s\" ", file->path); /* check for interrupt */ - if (interrupted) + if (interrupted || thread_interrupted) elog(ERROR, "interrupted during backup"); if (progress) @@ -2278,6 +2325,11 @@ backup_files(void *arg) if (S_ISREG(buf.st_mode)) { pgFile **prev_file = NULL; + char *external_path = NULL; + + if (file->external_dir_num) + external_path = parray_get(arguments->external_dirs, + file->external_dir_num - 1); /* Check that file exist in previous backup */ if (current.backup_mode != BACKUP_MODE_FULL) @@ -2285,11 +2337,13 @@ backup_files(void *arg) char *relative; pgFile key; - relative = GetRelativePath(file->path, arguments->from_root); + relative = GetRelativePath(file->path, file->external_dir_num ? + external_path : arguments->from_root); key.path = relative; + key.external_dir_num = file->external_dir_num; prev_file = (pgFile **) parray_bsearch(arguments->prev_filelist, - &key, pgFileComparePath); + &key, pgFileComparePathWithExternal); if (prev_file) /* File exists in previous backup */ file->exists_in_prev = true; @@ -2316,13 +2370,17 @@ backup_files(void *arg) continue; } } - else if (strcmp(file->name, "pg_control") == 0) + else if (!file->external_dir_num && + strcmp(file->name, "pg_control") == 0) copy_pgcontrol_file(arguments->from_root, FIO_DB_HOST, arguments->to_root, FIO_BACKUP_HOST, file); else { + const char *src; + const char *dst; bool skip = false; + char external_dst[MAXPGPATH]; /* If non-data file has not changed since last backup... */ if (prev_file && file->exists_in_prev && @@ -2333,8 +2391,22 @@ backup_files(void *arg) if (EQ_TRADITIONAL_CRC32(file->crc, (*prev_file)->crc)) skip = true; /* ...skip copying file. */ } + /* Set file paths */ + if (file->external_dir_num) + { + makeExternalDirPathByNum(external_dst, + arguments->external_prefix, + file->external_dir_num); + src = external_path; + dst = external_dst; + } + else + { + src = arguments->from_root; + dst = arguments->to_root; + } if (skip || - !copy_file(arguments->from_root, FIO_DB_HOST, arguments->to_root, FIO_BACKUP_HOST, file)) + !copy_file(src, FIO_DB_HOST, dst, FIO_BACKUP_HOST, file)) { /* disappeared file not to be confused with 'not changed' */ if (file->write_size != FILE_NOT_FOUND) @@ -2688,7 +2760,7 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished) static XLogRecPtr prevpos = InvalidXLogRecPtr; /* check for interrupt */ - if (interrupted) + if (interrupted || thread_interrupted) elog(ERROR, "Interrupted during backup"); /* we assume that we get called once at the end of each segment */ @@ -2931,3 +3003,42 @@ pg_ptrack_get_block(backup_files_arg *arguments, return result; } + +static void +check_external_for_tablespaces(parray *external_list) +{ + PGconn *conn; + PGresult *res; + int i = 0; + int j = 0; + char *tablespace_path = NULL; + char *query = "SELECT pg_catalog.pg_tablespace_location(oid)\n" + "FROM pg_tablespace\n" + "WHERE pg_catalog.pg_tablespace_location(oid) <> '';"; + + conn = backup_conn; + res = pgut_execute(conn, query, 0, NULL); + + /* Check successfull execution of query */ + if (!res) + elog(ERROR, "Failed to get list of tablespaces"); + + for (i = 0; i < res->ntups; i++) + { + tablespace_path = PQgetvalue(res, i, 0); + Assert (strlen(tablespace_path) > 0); + for (j = 0; j < parray_num(external_list); j++) + { + char *external_path = parray_get(external_list, j); + if (path_is_prefix_of_path(external_path, tablespace_path)) + elog(ERROR, "External directory path (-E option) \"%s\" " + "contains tablespace \"%s\"", + external_path, tablespace_path); + if (path_is_prefix_of_path(tablespace_path, external_path)) + elog(WARNING, "External directory path (-E option) \"%s\" " + "is in tablespace directory \"%s\"", + tablespace_path, external_path); + } + } + PQclear(res); +} diff --git a/src/catalog.c b/src/catalog.c index 31007d67..8b64f55d 100644 --- a/src/catalog.c +++ b/src/catalog.c @@ -74,6 +74,14 @@ write_backup_status(pgBackup *backup, BackupStatus status) pgBackup *tmp; tmp = read_backup(backup->start_time); + if (!tmp) + { + /* + * Silently exit the function, since read_backup already logged the + * warning message. + */ + return; + } backup->status = status; tmp->status = backup->status; @@ -301,11 +309,10 @@ IsDir(const char *dirpath, const char *entry, fio_location location) parray * catalog_get_backup_list(time_t requested_backup_id) { - DIR *data_dir = NULL; - struct dirent *data_ent = NULL; - parray *backups = NULL; - pgBackup *backup = NULL; - int i; + DIR *data_dir = NULL; + struct dirent *data_ent = NULL; + parray *backups = NULL; + int i; /* open backup instance backups directory */ data_dir = fio_opendir(backup_instance_path, FIO_BACKUP_HOST); @@ -320,8 +327,9 @@ catalog_get_backup_list(time_t requested_backup_id) backups = parray_new(); for (; (data_ent = fio_readdir(data_dir)) != NULL; errno = 0) { - char backup_conf_path[MAXPGPATH]; - char data_path[MAXPGPATH]; + char backup_conf_path[MAXPGPATH]; + char data_path[MAXPGPATH]; + pgBackup *backup = NULL; /* skip not-directory entries and hidden entries */ if (!IsDir(backup_instance_path, data_ent->d_name, FIO_BACKUP_HOST) @@ -355,7 +363,6 @@ catalog_get_backup_list(time_t requested_backup_id) continue; } parray_append(backups, backup); - backup = NULL; if (errno && errno != ENOENT) { @@ -379,25 +386,18 @@ catalog_get_backup_list(time_t requested_backup_id) /* Link incremental backups with their ancestors.*/ for (i = 0; i < parray_num(backups); i++) { - pgBackup *curr = parray_get(backups, i); - - int j; + pgBackup *curr = parray_get(backups, i); + pgBackup **ancestor; + pgBackup key; if (curr->backup_mode == BACKUP_MODE_FULL) continue; - for (j = i+1; j < parray_num(backups); j++) - { - pgBackup *ancestor = parray_get(backups, j); - - if (ancestor->start_time == curr->parent_backup) - { - curr->parent_backup_link = ancestor; - /* elog(INFO, "curr %s, ancestor %s j=%d", base36enc_dup(curr->start_time), - base36enc_dup(ancestor->start_time), j); */ - break; - } - } + key.start_time = curr->parent_backup; + ancestor = (pgBackup **) parray_bsearch(backups, &key, + pgBackupCompareIdDesc); + if (ancestor) + curr->parent_backup_link = *ancestor; } return backups; @@ -405,8 +405,6 @@ catalog_get_backup_list(time_t requested_backup_id) err_proc: if (data_dir) fio_closedir(data_dir); - if (backup) - pgBackupFree(backup); if (backups) parray_walk(backups, pgBackupFree); parray_free(backups); @@ -468,7 +466,25 @@ pgBackupCreateDir(pgBackup *backup) { int i; char path[MAXPGPATH]; - char *subdirs[] = { DATABASE_DIR, NULL }; + parray *subdirs = parray_new(); + + parray_append(subdirs, pg_strdup(DATABASE_DIR)); + + /* Add external dirs containers */ + if (backup->external_dir_str) + { + parray *external_list; + + external_list = make_external_directory_list(backup->external_dir_str); + for (int i = 0; i < parray_num(external_list); i++) + { + char temp[MAXPGPATH]; + /* Numeration of externaldirs starts with 1 */ + makeExternalDirPathByNum(temp, EXTERNAL_DIR, i+1); + parray_append(subdirs, pg_strdup(temp)); + } + free_dir_list(external_list); + } pgBackupGetPath(backup, path, lengthof(path), NULL); @@ -478,12 +494,13 @@ pgBackupCreateDir(pgBackup *backup) fio_mkdir(path, DIR_PERMISSION, FIO_BACKUP_HOST); /* create directories for actual backup files */ - for (i = 0; subdirs[i]; i++) + for (i = 0; i < parray_num(subdirs); i++) { - pgBackupGetPath(backup, path, lengthof(path), subdirs[i]); + pgBackupGetPath(backup, path, lengthof(path), parray_get(subdirs, i)); fio_mkdir(path, DIR_PERMISSION, FIO_BACKUP_HOST); } + free_dir_list(subdirs); return 0; } @@ -561,6 +578,10 @@ pgBackupWriteControl(FILE *out, pgBackup *backup) /* print connection info except password */ if (backup->primary_conninfo) fio_fprintf(out, "primary_conninfo = '%s'\n", backup->primary_conninfo); + + /* print external directories list */ + if (backup->external_dir_str) + fio_fprintf(out, "external-dirs = '%s'\n", backup->external_dir_str); } /* @@ -569,42 +590,75 @@ pgBackupWriteControl(FILE *out, pgBackup *backup) void write_backup(pgBackup *backup) { - FILE *fp = NULL; - char conf_path[MAXPGPATH]; + FILE *fp = NULL; + char path[MAXPGPATH]; + char path_temp[MAXPGPATH]; + int errno_temp; - pgBackupGetPath(backup, conf_path, lengthof(conf_path), BACKUP_CONTROL_FILE); - fp = fio_fopen(conf_path, PG_BINARY_W, FIO_BACKUP_HOST); + pgBackupGetPath(backup, path, lengthof(path), BACKUP_CONTROL_FILE); + snprintf(path_temp, sizeof(path_temp), "%s.tmp", path); + + fp = fio_fopen(path_temp, PG_BINARY_W, FIO_BACKUP_HOST); if (fp == NULL) - elog(ERROR, "Cannot open configuration file \"%s\": %s", conf_path, - strerror(errno)); + elog(ERROR, "Cannot open configuration file \"%s\": %s", + path_temp, strerror(errno)); pgBackupWriteControl(fp, backup); if (fio_fflush(fp) || fio_fclose(fp)) + { + errno_temp = errno; + fio_unlink(path_temp, FIO_BACKUP_HOST); elog(ERROR, "Cannot write configuration file \"%s\": %s", - conf_path, strerror(errno)); + path_temp, strerror(errno_temp)); + } + + if (fio_rename(path_temp, path, FIO_BACKUP_HOST) < 0) + { + errno_temp = errno; + fio_unlink(path_temp, FIO_BACKUP_HOST); + elog(ERROR, "Cannot rename configuration file \"%s\" to \"%s\": %s", + path_temp, path, strerror(errno_temp)); + } } /* * Output the list of files to backup catalog DATABASE_FILE_LIST */ void -write_backup_filelist(pgBackup *backup, parray *files, const char *root) +write_backup_filelist(pgBackup *backup, parray *files, const char *root, + const char *external_prefix, parray *external_list) { FILE *fp; char path[MAXPGPATH]; + char path_temp[MAXPGPATH]; + int errno_temp; pgBackupGetPath(backup, path, lengthof(path), DATABASE_FILE_LIST); + snprintf(path_temp, sizeof(path_temp), "%s.tmp", path); - fp = fio_fopen(path, PG_BINARY_W, FIO_BACKUP_HOST); + fp = fio_fopen(path_temp, PG_BINARY_W, FIO_BACKUP_HOST); if (fp == NULL) - elog(ERROR, "Cannot open file list \"%s\": %s", path, - strerror(errno)); + elog(ERROR, "Cannot open file list \"%s\": %s", path_temp, + strerror(errno)); - print_file_list(fp, files, root); + print_file_list(fp, files, root, external_prefix, external_list); if (fio_fflush(fp) || fio_fclose(fp)) - elog(ERROR, "cannot write file list \"%s\": %s", path, strerror(errno)); + { + errno_temp = errno; + fio_unlink(path_temp, FIO_BACKUP_HOST); + elog(ERROR, "Cannot write file list \"%s\": %s", + path_temp, strerror(errno)); + } + + if (fio_rename(path_temp, path, FIO_BACKUP_HOST) < 0) + { + errno_temp = errno; + fio_unlink(path_temp, FIO_BACKUP_HOST); + elog(ERROR, "Cannot rename configuration file \"%s\" to \"%s\": %s", + path_temp, path, strerror(errno_temp)); + } } /* @@ -651,6 +705,7 @@ readBackupControlFile(const char *path) {'u', 0, "compress-level", &backup->compress_level, SOURCE_FILE_STRICT}, {'b', 0, "from-replica", &backup->from_replica, SOURCE_FILE_STRICT}, {'s', 0, "primary-conninfo", &backup->primary_conninfo, SOURCE_FILE_STRICT}, + {'s', 0, "external-dirs", &backup->external_dir_str, SOURCE_FILE_STRICT}, {0} }; @@ -662,7 +717,7 @@ readBackupControlFile(const char *path) return NULL; } - parsed_options = config_read_opt(path, options, WARNING, true); + parsed_options = config_read_opt(path, options, WARNING, true, true); if (parsed_options == 0) { @@ -881,6 +936,7 @@ pgBackupInit(pgBackup *backup) backup->primary_conninfo = NULL; backup->program_version[0] = '\0'; backup->server_version[0] = '\0'; + backup->external_dir_str = NULL; } /* free pgBackup object */ @@ -890,6 +946,7 @@ pgBackupFree(void *backup) pgBackup *b = (pgBackup *) backup; pfree(b->primary_conninfo); + pfree(b->external_dir_str); pfree(backup); } @@ -948,6 +1005,32 @@ pgBackupGetPath2(const pgBackup *backup, char *path, size_t len, make_native_path(path); } +/* + * Check if multiple backups consider target backup to be their direct parent + */ +bool +is_prolific(parray *backup_list, pgBackup *target_backup) +{ + int i; + int child_counter = 0; + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *tmp_backup = (pgBackup *) parray_get(backup_list, i); + + /* consider only OK and DONE backups */ + if (tmp_backup->parent_backup == target_backup->start_time && + (tmp_backup->status == BACKUP_STATUS_OK || + tmp_backup->status == BACKUP_STATUS_DONE)) + child_counter++; + } + + if (child_counter > 1) + return true; + else + return false; +} + /* * Find parent base FULL backup for current backup using parent_backup_link */ @@ -957,6 +1040,7 @@ find_parent_full_backup(pgBackup *current_backup) pgBackup *base_full_backup = NULL; base_full_backup = current_backup; + /* sanity */ if (!current_backup) elog(ERROR, "Target backup cannot be NULL"); @@ -966,12 +1050,48 @@ find_parent_full_backup(pgBackup *current_backup) } if (base_full_backup->backup_mode != BACKUP_MODE_FULL) - elog(ERROR, "Failed to find FULL backup parent for %s", - base36enc(current_backup->start_time)); + { + if (base_full_backup->parent_backup) + elog(WARNING, "Backup %s is missing", + base36enc(base_full_backup->parent_backup)); + else + elog(WARNING, "Failed to find parent FULL backup for %s", + base36enc(current_backup->start_time)); + return NULL; + } return base_full_backup; } +/* + * Find closest child of target_backup. If there are several direct + * offsprings in backup_list, then first win. + */ +pgBackup* +find_direct_child(parray *backup_list, pgBackup *target_backup) +{ + int i; + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *tmp_backup = (pgBackup *) parray_get(backup_list, i); + + if (tmp_backup->backup_mode == BACKUP_MODE_FULL) + continue; + + /* Consider only OK and DONE children */ + if (tmp_backup->parent_backup == target_backup->start_time && + (tmp_backup->status == BACKUP_STATUS_OK || + tmp_backup->status == BACKUP_STATUS_DONE)) + { + return tmp_backup; + } + } + elog(WARNING, "Failed to find a direct child for backup %s", + base36enc(target_backup->start_time)); + return NULL; +} + /* * Interate over parent chain and look for any problems. * Return 0 if chain is broken. @@ -1078,6 +1198,6 @@ get_backup_index_number(parray *backup_list, pgBackup *backup) if (tmp_backup->start_time == backup->start_time) return i; } - elog(ERROR, "Failed to find backup %s", base36enc(backup->start_time)); - return 0; + elog(WARNING, "Failed to find backup %s", base36enc(backup->start_time)); + return -1; } diff --git a/src/configure.c b/src/configure.c index de245f40..ace6fb1b 100644 --- a/src/configure.c +++ b/src/configure.c @@ -9,6 +9,8 @@ #include "pg_probackup.h" +#include + #include "utils/configuration.h" #include "utils/json.h" @@ -62,6 +64,11 @@ ConfigOption instance_options[] = OPTION_INSTANCE_GROUP, 0, option_get_value }, #endif + { + 's', 'E', "external-dirs", + &instance_config.external_dir_str, SOURCE_CMD, 0, + OPTION_INSTANCE_GROUP, 0, option_get_value + }, /* Connection options */ { 's', 'd', "pgdatabase", @@ -245,16 +252,22 @@ do_show_config(void) * values into the file. */ void -do_set_config(void) +do_set_config(bool missing_ok) { char path[MAXPGPATH]; + char path_temp[MAXPGPATH]; FILE *fp; int i; join_path_components(path, backup_instance_path, BACKUP_CATALOG_CONF_FILE); - fp = fopen(path, "wt"); + snprintf(path_temp, sizeof(path_temp), "%s.tmp", path); + + if (!missing_ok && !fileExists(path, FIO_LOCAL_HOST)) + elog(ERROR, "Configuration file \"%s\" doesn't exist", path); + + fp = fopen(path_temp, "wt"); if (fp == NULL) - elog(ERROR, "cannot create %s: %s", + elog(ERROR, "Cannot create configuration file \"%s\": %s", BACKUP_CATALOG_CONF_FILE, strerror(errno)); current_group = NULL; @@ -288,6 +301,14 @@ do_set_config(void) } fclose(fp); + + if (rename(path_temp, path) < 0) + { + int errno_temp = errno; + unlink(path_temp); + elog(ERROR, "Cannot rename configuration file \"%s\" to \"%s\": %s", + path_temp, path, strerror(errno_temp)); + } } void diff --git a/src/data.c b/src/data.c index 36c04043..ee7604f4 100644 --- a/src/data.c +++ b/src/data.c @@ -22,6 +22,8 @@ #include #endif +#include "utils/thread.h" + /* Union to ease operations on relation pages */ typedef union DataPage { @@ -303,7 +305,7 @@ prepare_page(backup_files_arg *arguments, BlockNumber absolute_blknum = file->segno * RELSEG_SIZE + blknum; /* check for interrupt */ - if (interrupted) + if (interrupted || thread_interrupted) elog(ERROR, "Interrupted during backup"); /* @@ -702,7 +704,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, in = fopen(file->path, PG_BINARY_R); if (in == NULL) { - elog(ERROR, "cannot open backup file \"%s\": %s", file->path, + elog(ERROR, "Cannot open backup file \"%s\": %s", file->path, strerror(errno)); } } @@ -717,7 +719,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, { int errno_tmp = errno; fclose(in); - elog(ERROR, "cannot open restore target file \"%s\": %s", + elog(ERROR, "Cannot open restore target file \"%s\": %s", to_path, strerror(errno_tmp)); } @@ -757,16 +759,22 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, break; /* EOF found */ else if (read_len != 0 && feof(in)) elog(ERROR, - "odd size page found at block %u of \"%s\"", + "Odd size page found at block %u of \"%s\"", blknum, file->path); else - elog(ERROR, "cannot read header of block %u of \"%s\": %s", + elog(ERROR, "Cannot read header of block %u of \"%s\": %s", blknum, file->path, strerror(errno_tmp)); } + if (header.block == 0 && header.compressed_size == 0) + { + elog(VERBOSE, "Skip empty block of \"%s\"", file->path); + continue; + } + if (header.block < blknum) - elog(ERROR, "backup is broken at file->path %s block %u", - file->path, blknum); + elog(ERROR, "Backup is broken at block %u of \"%s\"", + blknum, file->path); blknum = header.block; @@ -787,7 +795,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, read_len = fread(compressed_page.data, 1, MAXALIGN(header.compressed_size), in); if (read_len != MAXALIGN(header.compressed_size)) - elog(ERROR, "cannot read block %u of \"%s\" read %zu of %d", + elog(ERROR, "Cannot read block %u of \"%s\" read %zu of %d", blknum, file->path, read_len, header.compressed_size); /* @@ -811,7 +819,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, blknum, file->path, errormsg); if (uncompressed_size != BLCKSZ) - elog(ERROR, "page of file \"%s\" uncompressed to %d bytes. != BLCKSZ", + elog(ERROR, "Page of file \"%s\" uncompressed to %d bytes. != BLCKSZ", file->path, uncompressed_size); } @@ -822,7 +830,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, * Seek and write the restored page. */ if (fio_fseek(out, write_pos) < 0) - elog(ERROR, "cannot seek block %u of \"%s\": %s", + elog(ERROR, "Cannot seek block %u of \"%s\": %s", blknum, to_path, strerror(errno)); if (write_header) @@ -830,7 +838,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, /* We uncompressed the page, so its size is BLCKSZ */ header.compressed_size = BLCKSZ; if (fio_fwrite(out, &header, sizeof(header)) != sizeof(header)) - elog(ERROR, "cannot write header of block %u of \"%s\": %s", + elog(ERROR, "Cannot write header of block %u of \"%s\": %s", blknum, file->path, strerror(errno)); } @@ -841,14 +849,13 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, if (uncompressed_size == BLCKSZ) { if (fio_fwrite(out, page.data, BLCKSZ) != BLCKSZ) - elog(ERROR, "cannot write block %u of \"%s\": %s", + elog(ERROR, "Cannot write block %u of \"%s\": %s", blknum, file->path, strerror(errno)); } else { - /* if page wasn't compressed, we've read full block */ if (fio_fwrite(out, compressed_page.data, BLCKSZ) != BLCKSZ) - elog(ERROR, "cannot write block %u of \"%s\": %s", + elog(ERROR, "Cannot write block %u of \"%s\": %s", blknum, file->path, strerror(errno)); } } @@ -881,7 +888,7 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, * Truncate file to this length. */ if (fio_ftruncate(out, write_pos) != 0) - elog(ERROR, "cannot truncate \"%s\": %s", + elog(ERROR, "Cannot truncate \"%s\": %s", file->path, strerror(errno)); elog(VERBOSE, "Delta truncate file %s to block %u", file->path, truncate_from); @@ -895,13 +902,14 @@ restore_data_file(const char *to_path, pgFile *file, bool allow_truncate, if (in) fclose(in); fio_fclose(out); - elog(ERROR, "cannot change mode of \"%s\": %s", to_path, + elog(ERROR, "Cannot change mode of \"%s\": %s", to_path, strerror(errno_tmp)); } if (fio_fflush(out) != 0 || fio_fclose(out)) - elog(ERROR, "cannot write \"%s\": %s", to_path, strerror(errno)); + elog(ERROR, "Cannot write \"%s\": %s", to_path, strerror(errno)); + if (in) fclose(in); } @@ -1093,7 +1101,7 @@ push_wal_file(const char *from_path, const char *to_path, bool is_compress, bool overwrite) { FILE *in = NULL; - int out; + int out = -1; char buf[XLOG_BLCKSZ]; const char *to_path_p; char to_path_temp[MAXPGPATH]; @@ -1558,7 +1566,7 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, pg_crc32 crc; bool use_crc32c = backup_version <= 20021 || backup_version >= 20025; - elog(VERBOSE, "validate relation blocks for file %s", file->path); + elog(VERBOSE, "Validate relation blocks for file %s", file->path); in = fopen(file->path, PG_BINARY_R); if (in == NULL) @@ -1569,7 +1577,7 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, return false; } - elog(ERROR, "cannot open file \"%s\": %s", + elog(ERROR, "Cannot open file \"%s\": %s", file->path, strerror(errno)); } @@ -1593,20 +1601,26 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, break; /* EOF found */ else if (read_len != 0 && feof(in)) elog(WARNING, - "odd size page found at block %u of \"%s\"", + "Odd size page found at block %u of \"%s\"", blknum, file->path); else - elog(WARNING, "cannot read header of block %u of \"%s\": %s", + elog(WARNING, "Cannot read header of block %u of \"%s\": %s", blknum, file->path, strerror(errno_tmp)); return false; } COMP_FILE_CRC32(use_crc32c, crc, &header, read_len); + if (header.block == 0 && header.compressed_size == 0) + { + elog(VERBOSE, "Skip empty block of \"%s\"", file->path); + continue; + } + if (header.block < blknum) { - elog(WARNING, "backup is broken at file->path %s block %u", - file->path, blknum); + elog(WARNING, "Backup is broken at block %u of \"%s\"", + blknum, file->path); return false; } @@ -1614,8 +1628,8 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, if (header.compressed_size == PageIsTruncated) { - elog(LOG, "File %s, block %u is truncated", - file->path, blknum); + elog(LOG, "Block %u of \"%s\" is truncated", + blknum, file->path); continue; } @@ -1625,7 +1639,7 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, MAXALIGN(header.compressed_size), in); if (read_len != MAXALIGN(header.compressed_size)) { - elog(WARNING, "cannot read block %u of \"%s\" read %zu of %d", + elog(WARNING, "Cannot read block %u of \"%s\" read %zu of %d", blknum, file->path, read_len, header.compressed_size); return false; } @@ -1655,7 +1669,7 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, is_valid = false; continue; } - elog(WARNING, "page of file \"%s\" uncompressed to %d bytes. != BLCKSZ", + elog(WARNING, "Page of file \"%s\" uncompressed to %d bytes. != BLCKSZ", file->path, uncompressed_size); return false; } @@ -1677,7 +1691,7 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version, if (crc != file->crc) { - elog(WARNING, "Invalid CRC of backup file \"%s\" : %X. Expected %X", + elog(WARNING, "Invalid CRC of backup file \"%s\": %X. Expected %X", file->path, file->crc, crc); is_valid = false; } diff --git a/src/delete.c b/src/delete.c index 5d4176f2..1ce54629 100644 --- a/src/delete.c +++ b/src/delete.c @@ -16,6 +16,15 @@ static void delete_walfiles(XLogRecPtr oldest_lsn, TimeLineID oldest_tli, uint32 xlog_seg_size); +static void do_retention_internal(parray *backup_list, parray *to_keep_list, + parray *to_purge_list); +static void do_retention_merge(parray *backup_list, parray *to_keep_list, + parray *to_purge_list); +static void do_retention_purge(parray *to_keep_list, parray *to_purge_list); +static void do_retention_wal(void); + +static bool backup_deleted = false; /* At least one backup was deleted */ +static bool backup_merged = false; /* At least one merge was enacted */ void do_delete(time_t backup_id) @@ -24,71 +33,57 @@ do_delete(time_t backup_id) parray *backup_list, *delete_list; pgBackup *target_backup = NULL; - time_t parent_id = 0; XLogRecPtr oldest_lsn = InvalidXLogRecPtr; TimeLineID oldest_tli = 0; /* Get complete list of backups */ backup_list = catalog_get_backup_list(INVALID_BACKUP_ID); - if (backup_id != 0) + delete_list = parray_new(); + + /* Find backup to be deleted and make increment backups array to be deleted */ + for (i = 0; i < parray_num(backup_list); i++) { - delete_list = parray_new(); + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); - /* Find backup to be deleted and make increment backups array to be deleted */ - for (i = (int) parray_num(backup_list) - 1; i >= 0; i--) + if (backup->start_time == backup_id) { - pgBackup *backup = (pgBackup *) parray_get(backup_list, (size_t) i); - - if (backup->start_time == backup_id) - { - parray_append(delete_list, backup); - - /* - * Do not remove next backups, if target backup was finished - * incorrectly. - */ - if (backup->status == BACKUP_STATUS_ERROR) - break; - - /* Save backup id to retreive increment backups */ - parent_id = backup->start_time; - target_backup = backup; - } - else if (target_backup) - { - if (backup->backup_mode != BACKUP_MODE_FULL && - backup->parent_backup == parent_id) - { - /* Append to delete list increment backup */ - parray_append(delete_list, backup); - /* Save backup id to retreive increment backups */ - parent_id = backup->start_time; - } - else - break; - } + target_backup = backup; + break; } - - if (parray_num(delete_list) == 0) - elog(ERROR, "no backup found, cannot delete"); - - catalog_lock_backup_list(delete_list, parray_num(delete_list) - 1, 0); - - /* Delete backups from the end of list */ - for (i = (int) parray_num(delete_list) - 1; i >= 0; i--) - { - pgBackup *backup = (pgBackup *) parray_get(delete_list, (size_t) i); - - if (interrupted) - elog(ERROR, "interrupted during delete backup"); - - delete_backup_files(backup); - } - - parray_free(delete_list); } + /* sanity */ + if (!target_backup) + elog(ERROR, "Failed to find backup %s, cannot delete", base36enc(backup_id)); + + /* form delete list */ + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* check if backup is descendant of delete target */ + if (is_parent(target_backup->start_time, backup, false)) + parray_append(delete_list, backup); + } + parray_append(delete_list, target_backup); + + /* Lock marked for delete backups */ + catalog_lock_backup_list(delete_list, parray_num(delete_list) - 1, 0); + + /* Delete backups from the end of list */ + for (i = (int) parray_num(delete_list) - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(delete_list, (size_t) i); + + if (interrupted) + elog(ERROR, "interrupted during delete backup"); + + delete_backup_files(backup); + } + + parray_free(delete_list); + /* Clean WAL segments */ if (delete_wal) { @@ -116,140 +111,511 @@ do_delete(time_t backup_id) } /* - * Remove backups by retention policy. Retention policy is configured by + * Merge and purge backups by retention policy. Retention policy is configured by * retention_redundancy and retention_window variables. + * + * Invalid backups handled in Oracle style, so invalid backups are ignored + * for the purpose of retention fulfillment, + * i.e. CORRUPT full backup do not taken in account when deteremine + * which FULL backup should be keeped for redundancy obligation(only valid do), + * but if invalid backup is not guarded by retention - it is removed */ -int -do_retention_purge(void) +int do_retention(void) { - parray *backup_list; - size_t i; - XLogRecPtr oldest_lsn = InvalidXLogRecPtr; - TimeLineID oldest_tli = 0; - bool keep_next_backup = true; /* Do not delete first full backup */ - bool backup_deleted = false; /* At least one backup was deleted */ + parray *backup_list = NULL; + parray *to_keep_list = parray_new(); + parray *to_purge_list = parray_new(); - if (delete_expired) + bool retention_is_set = false; /* At least one retention policy is set */ + bool backup_list_is_empty = false; + + /* Get a complete list of backups. */ + backup_list = catalog_get_backup_list(INVALID_BACKUP_ID); + + if (parray_num(backup_list) == 0) + backup_list_is_empty = true; + + if (delete_expired || merge_expired) { if (instance_config.retention_redundancy > 0) elog(LOG, "REDUNDANCY=%u", instance_config.retention_redundancy); if (instance_config.retention_window > 0) elog(LOG, "WINDOW=%u", instance_config.retention_window); - if (instance_config.retention_redundancy == 0 - && instance_config.retention_window == 0) + if (instance_config.retention_redundancy == 0 && + instance_config.retention_window == 0) { + /* Retention is disabled but we still can cleanup wal */ elog(WARNING, "Retention policy is not set"); if (!delete_wal) return 0; } + else + /* At least one retention policy is active */ + retention_is_set = true; } - /* Get a complete list of backups. */ - backup_list = catalog_get_backup_list(INVALID_BACKUP_ID); - if (parray_num(backup_list) == 0) - { - elog(INFO, "backup list is empty, purging won't be executed"); - return 0; - } + if (retention_is_set && backup_list_is_empty) + elog(WARNING, "Backup list is empty, retention purge and merge are problematic"); - /* Find target backups to be deleted */ - if (delete_expired && - (instance_config.retention_redundancy > 0 || - instance_config.retention_window > 0)) - { - time_t days_threshold; - uint32 backup_num = 0; + /* Populate purge and keep lists, and show retention state messages */ + if (retention_is_set && !backup_list_is_empty) + do_retention_internal(backup_list, to_keep_list, to_purge_list); - days_threshold = time(NULL) - - (instance_config.retention_window * 60 * 60 * 24); + if (merge_expired && !dry_run && !backup_list_is_empty) + do_retention_merge(backup_list, to_keep_list, to_purge_list); - for (i = 0; i < parray_num(backup_list); i++) - { - pgBackup *backup = (pgBackup *) parray_get(backup_list, i); - uint32 backup_num_evaluate = backup_num; + if (delete_expired && !dry_run && !backup_list_is_empty) + do_retention_purge(to_keep_list, to_purge_list); - /* Consider only validated and correct backups */ - if (backup->status != BACKUP_STATUS_OK) - continue; - /* - * When a valid full backup was found, we can delete the - * backup that is older than it using the number of generations. - */ - if (backup->backup_mode == BACKUP_MODE_FULL) - backup_num++; + /* TODO: some sort of dry run for delete_wal */ + if (delete_wal && !dry_run) + do_retention_wal(); - /* Evaluate retention_redundancy if this backup is eligible for removal */ - if (keep_next_backup || - instance_config.retention_redundancy >= backup_num_evaluate + 1 || - (instance_config.retention_window > 0 && - backup->recovery_time >= days_threshold)) - { - /* Save LSN and Timeline to remove unnecessary WAL segments */ - oldest_lsn = backup->start_lsn; - oldest_tli = backup->tli; - - /* Save parent backup of this incremental backup */ - if (backup->backup_mode != BACKUP_MODE_FULL) - keep_next_backup = true; - /* - * Previous incremental backup was kept or this is first backup - * so do not delete this backup. - */ - else - keep_next_backup = false; - - continue; - } - - /* - * If the backup still is used do not interrupt go to the next - * backup. - */ - if (!lock_backup(backup)) - { - elog(WARNING, "Cannot lock backup %s directory, skip purging", - base36enc(backup->start_time)); - continue; - } - - /* Delete backup and update status to DELETED */ - delete_backup_files(backup); - backup_deleted = true; - } - } - - /* - * If oldest_lsn and oldest_tli weren`t set because previous step was skipped - * then set them now if we are going to purge WAL - */ - if (delete_wal && (XLogRecPtrIsInvalid(oldest_lsn))) - { - pgBackup *backup = (pgBackup *) parray_get(backup_list, parray_num(backup_list) - 1); - oldest_lsn = backup->start_lsn; - oldest_tli = backup->tli; - } - - /* Be paranoid */ - if (XLogRecPtrIsInvalid(oldest_lsn)) - elog(ERROR, "Not going to purge WAL because LSN is invalid"); - - /* Purge WAL files */ - if (delete_wal) - { - delete_walfiles(oldest_lsn, oldest_tli, instance_config.xlog_seg_size); - } - - /* Cleanup */ - parray_walk(backup_list, pgBackupFree); - parray_free(backup_list); + if (!backup_merged) + elog(INFO, "There are no backups to merge by retention policy"); if (backup_deleted) elog(INFO, "Purging finished"); else elog(INFO, "There are no backups to delete by retention policy"); + /* Cleanup */ + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); + parray_free(to_keep_list); + parray_free(to_purge_list); + return 0; + +} + +/* Evaluate every backup by retention policies and populate purge and keep lists. + * Also for every backup print its status ('Active' or 'Expired') according + * to active retention policies. + */ +static void +do_retention_internal(parray *backup_list, parray *to_keep_list, parray *to_purge_list) +{ + int i; + time_t current_time; + + /* For retention calculation */ + uint32 n_full_backups = 0; + int cur_full_backup_num = 0; + time_t days_threshold = 0; + + /* For fancy reporting */ + float actual_window = 0; + + /* Get current time */ + current_time = time(NULL); + + /* Calculate n_full_backups and days_threshold */ + if (instance_config.retention_redundancy > 0) + { + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* Consider only valid backups for Redundancy */ + if (instance_config.retention_redundancy > 0 && + backup->backup_mode == BACKUP_MODE_FULL && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) + { + n_full_backups++; + } + } + } + + if (instance_config.retention_window > 0) + { + days_threshold = current_time - + (instance_config.retention_window * 60 * 60 * 24); + } + + elog(INFO, "Evaluate backups by retention"); + for (i = (int) parray_num(backup_list) - 1; i >= 0; i--) + { + + pgBackup *backup = (pgBackup *) parray_get(backup_list, (size_t) i); + + /* Remember the serial number of latest valid FULL backup */ + if (backup->backup_mode == BACKUP_MODE_FULL && + (backup->status == BACKUP_STATUS_OK || + backup->status == BACKUP_STATUS_DONE)) + { + cur_full_backup_num++; + } + + /* Check if backup in needed by retention policy */ + if ((days_threshold == 0 || (days_threshold > backup->recovery_time)) && + (instance_config.retention_redundancy <= (n_full_backups - cur_full_backup_num))) + { + /* This backup is not guarded by retention + * + * Redundancy = 1 + * FULL CORRUPT in retention (not count toward redundancy limit) + * FULL in retention + * ------retention redundancy ------- + * PAGE3 in retention + * ------retention window ----------- + * PAGE2 out of retention + * PAGE1 out of retention + * FULL out of retention <- We are here + * FULL CORRUPT out of retention + */ + + /* Add backup to purge_list */ + elog(VERBOSE, "Mark backup %s for purge.", base36enc(backup->start_time)); + parray_append(to_purge_list, backup); + continue; + } + } + + /* sort keep_list and purge list */ + parray_qsort(to_keep_list, pgBackupCompareIdDesc); + parray_qsort(to_purge_list, pgBackupCompareIdDesc); + + /* FULL + * PAGE + * PAGE <- Only such backups must go into keep list + ---------retention window ---- + * PAGE + * FULL + * PAGE + * FULL + */ + + for (i = 0; i < parray_num(backup_list); i++) + { + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + /* Do not keep invalid backups by retention */ + if (backup->status != BACKUP_STATUS_OK && + backup->status != BACKUP_STATUS_DONE) + continue; + + /* only incremental backups should be in keep list */ + if (backup->backup_mode == BACKUP_MODE_FULL) + continue; + + /* orphan backup cannot be in keep list */ + if (!backup->parent_backup_link) + continue; + + /* skip if backup already in purge list */ + if (parray_bsearch(to_purge_list, backup, pgBackupCompareIdDesc)) + continue; + + /* if parent in purge_list, add backup to keep list */ + if (parray_bsearch(to_purge_list, + backup->parent_backup_link, + pgBackupCompareIdDesc)) + { + /* make keep list a bit sparse */ + parray_append(to_keep_list, backup); + continue; + } + } + + /* Message about retention state of backups + * TODO: Float is ugly, rewrite somehow. + */ + + cur_full_backup_num = 1; + for (i = 0; i < parray_num(backup_list); i++) + { + char *action = "Active"; + + pgBackup *backup = (pgBackup *) parray_get(backup_list, i); + + if (parray_bsearch(to_purge_list, backup, pgBackupCompareIdDesc)) + action = "Expired"; + + if (backup->recovery_time == 0) + actual_window = 0; + else + actual_window = ((float)current_time - (float)backup->recovery_time)/(60 * 60 * 24); + + elog(INFO, "Backup %s, mode: %s, status: %s. Redundancy: %i/%i, Time Window: %.2fd/%ud. %s", + base36enc(backup->start_time), + pgBackupGetBackupMode(backup), + status2str(backup->status), + cur_full_backup_num, + instance_config.retention_redundancy, + actual_window, instance_config.retention_window, + action); + + if (backup->backup_mode == BACKUP_MODE_FULL) + cur_full_backup_num++; + } +} + +/* Merge partially expired incremental chains */ +static void +do_retention_merge(parray *backup_list, parray *to_keep_list, parray *to_purge_list) +{ + int i; + int j; + + /* IMPORTANT: we can merge to only those FULL backup, that is NOT + * guarded by retention and final target of such merge must be + * an incremental backup that is guarded by retention !!! + * + * PAGE4 E + * PAGE3 D + --------retention window --- + * PAGE2 C + * PAGE1 B + * FULL A + * + * after retention merge: + * PAGE4 E + * FULL D + */ + + /* Merging happens here */ + for (i = 0; i < parray_num(to_keep_list); i++) + { + char *keep_backup_id = NULL; + pgBackup *full_backup = NULL; + parray *merge_list = NULL; + + pgBackup *keep_backup = (pgBackup *) parray_get(to_keep_list, i); + + /* keep list may shrink during merge */ + if (!keep_backup) + continue; + + elog(INFO, "Consider backup %s for merge", base36enc(keep_backup->start_time)); + + /* Got valid incremental backup, find its FULL ancestor */ + full_backup = find_parent_full_backup(keep_backup); + + /* Failed to find parent */ + if (!full_backup) + { + elog(WARNING, "Failed to find FULL parent for %s", base36enc(keep_backup->start_time)); + continue; + } + + /* Check that ancestor is in purge_list */ + if (!parray_bsearch(to_purge_list, + full_backup, + pgBackupCompareIdDesc)) + { + elog(WARNING, "Skip backup %s for merging, " + "because his FULL parent is not marked for purge", base36enc(keep_backup->start_time)); + continue; + } + + /* FULL backup in purge list, thanks to sparsing of keep_list current backup is + * final target for merge, but there could be intermediate incremental + * backups from purge_list. + */ + + keep_backup_id = base36enc_dup(keep_backup->start_time); + elog(INFO, "Merge incremental chain between FULL backup %s and backup %s", + base36enc(full_backup->start_time), keep_backup_id); + pg_free(keep_backup_id); + + merge_list = parray_new(); + + /* Form up a merge list */ + while(keep_backup->parent_backup_link) + { + parray_append(merge_list, keep_backup); + keep_backup = keep_backup->parent_backup_link; + } + + /* sanity */ + if (!merge_list) + continue; + + /* sanity */ + if (parray_num(merge_list) == 0) + { + parray_free(merge_list); + continue; + } + + /* In the end add FULL backup for easy locking */ + parray_append(merge_list, full_backup); + + /* Remove FULL backup from purge list */ + parray_rm(to_purge_list, full_backup, pgBackupCompareId); + + /* Lock merge chain */ + catalog_lock_backup_list(merge_list, parray_num(merge_list) - 1, 0); + + + /* Merge list example: + * 0 PAGE3 + * 1 PAGE2 + * 2 PAGE1 + * 3 FULL + * + * Сonsequentially merge incremental backups from PAGE1 to PAGE3 + * into FULL. + */ + + for (j = parray_num(merge_list) - 2; j >= 0; j--) + { + pgBackup *from_backup = (pgBackup *) parray_get(merge_list, j); + + + /* Consider this extreme case */ + // PAGEa1 PAGEb1 both valid + // \ / + // FULL + + /* Check that FULL backup do not has multiple descendants + * full_backup always point to current full_backup after merge + */ + if (is_prolific(backup_list, full_backup)) + { + elog(WARNING, "Backup %s has multiple valid descendants. " + "Automatic merge is not possible.", base36enc(full_backup->start_time)); + break; + } + + merge_backups(full_backup, from_backup); + backup_merged = true; + + /* Try to remove merged incremental backup from both keep and purge lists */ + parray_rm(to_purge_list, from_backup, pgBackupCompareId); + parray_set(to_keep_list, i, NULL); + } + + /* Cleanup */ + parray_free(merge_list); + } + + elog(INFO, "Retention merging finished"); + +} + +/* Purge expired backups */ +static void +do_retention_purge(parray *to_keep_list, parray *to_purge_list) +{ + int i; + int j; + + /* Remove backups by retention policy. Retention policy is configured by + * retention_redundancy and retention_window + * Remove only backups, that do not have children guarded by retention + * + * TODO: We do not consider the situation if child is marked for purge + * but parent isn`t. Maybe something bad happened with time on server? + */ + + for (j = 0; j < parray_num(to_purge_list); j++) + { + bool purge = true; + + pgBackup *delete_backup = (pgBackup *) parray_get(to_purge_list, j); + + elog(LOG, "Consider backup %s for purge", + base36enc(delete_backup->start_time)); + + /* Evaluate marked for delete backup against every backup in keep list. + * If marked for delete backup is recognized as parent of one of those, + * then this backup should not be deleted. + */ + for (i = 0; i < parray_num(to_keep_list); i++) + { + char *keeped_backup_id; + + pgBackup *keep_backup = (pgBackup *) parray_get(to_keep_list, i); + + /* item could have been nullified in merge */ + if (!keep_backup) + continue; + + /* Full backup cannot be a descendant */ + if (keep_backup->backup_mode == BACKUP_MODE_FULL) + continue; + + keeped_backup_id = base36enc_dup(keep_backup->start_time); + + elog(LOG, "Check if backup %s is parent of backup %s", + base36enc(delete_backup->start_time), keeped_backup_id); + + if (is_parent(delete_backup->start_time, keep_backup, true)) + { + + /* We must not delete this backup, evict it from purge list */ + elog(LOG, "Retain backup %s from purge because his " + "descendant %s is guarded by retention", + base36enc(delete_backup->start_time), keeped_backup_id); + + purge = false; + pg_free(keeped_backup_id); + break; + } + pg_free(keeped_backup_id); + } + + /* Retain backup */ + if (!purge) + continue; + + /* Actual purge */ + if (!lock_backup(delete_backup)) + { + /* If the backup still is used, do not interrupt and go to the next */ + elog(WARNING, "Cannot lock backup %s directory, skip purging", + base36enc(delete_backup->start_time)); + continue; + } + + /* Delete backup and update status to DELETED */ + delete_backup_files(delete_backup); + backup_deleted = true; + + } +} + +/* Purge WAL */ +static void +do_retention_wal(void) +{ + parray *backup_list = NULL; + + XLogRecPtr oldest_lsn = InvalidXLogRecPtr; + TimeLineID oldest_tli = 0; + bool backup_list_is_empty = false; + + /* Get list of backups. */ + backup_list = catalog_get_backup_list(INVALID_BACKUP_ID); + + if (parray_num(backup_list) == 0) + backup_list_is_empty = true; + + /* Save LSN and Timeline to remove unnecessary WAL segments */ + if (!backup_list_is_empty) + { + pgBackup *backup = NULL; + /* Get LSN and TLI of oldest alive backup */ + backup = (pgBackup *) parray_get(backup_list, parray_num(backup_list) -1); + + oldest_tli = backup->tli; + oldest_lsn = backup->start_lsn; + } + + /* Be paranoid */ + if (!backup_list_is_empty && XLogRecPtrIsInvalid(oldest_lsn)) + elog(ERROR, "Not going to purge WAL because LSN is invalid"); + + /* Purge WAL files */ + delete_walfiles(oldest_lsn, oldest_tli, instance_config.xlog_seg_size); + + /* Cleanup */ + parray_walk(backup_list, pgBackupFree); + parray_free(backup_list); } /* @@ -289,7 +655,7 @@ delete_backup_files(pgBackup *backup) /* list files to be deleted */ files = parray_new(); pgBackupGetPath(backup, path, lengthof(path), NULL); - dir_list_file(files, path, false, true, true, FIO_BACKUP_HOST); + dir_list_file(files, path, false, true, true, 0, FIO_BACKUP_HOST); /* delete leaf node first */ parray_qsort(files, pgFileComparePathDesc); @@ -302,6 +668,9 @@ delete_backup_files(pgBackup *backup) elog(INFO, "Progress: (%zd/%zd). Process file \"%s\"", i + 1, num_files, file->path); + if (interrupted) + elog(ERROR, "interrupted during delete backup"); + fio_unlink(file->path, FIO_BACKUP_HOST); } diff --git a/src/dir.c b/src/dir.c index f81a47ff..c87d327f 100644 --- a/src/dir.c +++ b/src/dir.c @@ -123,14 +123,18 @@ static int BlackListCompare(const void *str1, const void *str2); static char dir_check_file(const char *root, pgFile *file); static void dir_list_file_internal(parray *files, const char *root, pgFile *parent, bool exclude, - bool omit_symlink, parray *black_list, fio_location location); + bool omit_symlink, parray *black_list, int external_dir_num, fio_location location); static void list_data_directories(parray *files, const char *path, bool is_root, bool exclude, fio_location location); +static void opt_path_map(ConfigOption *opt, const char *arg, + TablespaceList *list, const char *type); /* Tablespace mapping */ static TablespaceList tablespace_dirs = {NULL, NULL}; static TablespaceCreatedList tablespace_created_dirs = {NULL, NULL}; +/* Extra directories mapping */ +static TablespaceList external_remap_list = {NULL, NULL}; /* * Create directory, also create parent directories if necessary. @@ -159,7 +163,7 @@ dir_create_dir(const char *dir, mode_t mode) } pgFile * -pgFileNew(const char *path, bool omit_symlink, fio_location location) +pgFileNew(const char *path, bool omit_symlink, int external_dir_num, fio_location location) { struct stat st; pgFile *file; @@ -177,6 +181,7 @@ pgFileNew(const char *path, bool omit_symlink, fio_location location) file = pgFileInit(path); file->size = st.st_size; file->mode = st.st_mode; + file->external_dir_num = external_dir_num; return file; } @@ -227,6 +232,7 @@ pgFileInit(const char *path) /* Number of blocks readed during backup */ file->n_blocks = BLOCKNUM_INVALID; file->compress_alg = NOT_DEFINED_COMPRESS; + file->external_dir_num = 0; return file; } @@ -347,6 +353,30 @@ pgFileComparePath(const void *f1, const void *f2) return strcmp(f1p->path, f2p->path); } +/* + * Compare two pgFile with their path and external_dir_num + * in ascending order of ASCII code. + */ +int +pgFileComparePathWithExternal(const void *f1, const void *f2) +{ + pgFile *f1p = *(pgFile **)f1; + pgFile *f2p = *(pgFile **)f2; + int res; + + res = strcmp(f1p->path, f2p->path); + if (!res) + { + if (f1p->external_dir_num > f2p->external_dir_num) + return 1; + else if (f1p->external_dir_num < f2p->external_dir_num) + return -1; + else + return 0; + } + return res; +} + /* Compare two pgFile with their path in descending order of ASCII code. */ int pgFileComparePathDesc(const void *f1, const void *f2) @@ -354,6 +384,16 @@ pgFileComparePathDesc(const void *f1, const void *f2) return -pgFileComparePath(f1, f2); } +/* + * Compare two pgFile with their path and external_dir_num + * in descending order of ASCII code. + */ +int +pgFileComparePathWithExternalDesc(const void *f1, const void *f2) +{ + return -pgFileComparePathWithExternal(f1, f2); +} + /* Compare two pgFile with their linked directory path. */ int pgFileCompareLinked(const void *f1, const void *f2) @@ -394,7 +434,7 @@ BlackListCompare(const void *str1, const void *str2) */ void dir_list_file(parray *files, const char *root, bool exclude, bool omit_symlink, - bool add_root, fio_location location) + bool add_root, int external_dir_num, fio_location location) { pgFile *file; parray *black_list = NULL; @@ -433,19 +473,24 @@ dir_list_file(parray *files, const char *root, bool exclude, bool omit_symlink, parray_qsort(black_list, BlackListCompare); } - file = pgFileNew(root, false, location); + file = pgFileNew(root, external_dir_num ? omit_symlink : false, external_dir_num, location); if (file == NULL) return; if (!S_ISDIR(file->mode)) { - elog(WARNING, "Skip \"%s\": unexpected file format", file->path); + if (external_dir_num) + elog(ERROR, " --external-dirs option \"%s\": directory or symbolic link expected", + file->path); + else + elog(WARNING, "Skip \"%s\": unexpected file format", file->path); return; } if (add_root) parray_append(files, file); - dir_list_file_internal(files, root, file, exclude, omit_symlink, black_list, location); + dir_list_file_internal(files, root, file, exclude, omit_symlink, black_list, + external_dir_num, location); if (!add_root) pgFileFree(file); @@ -664,7 +709,8 @@ dir_check_file(const char *root, pgFile *file) */ static void dir_list_file_internal(parray *files, const char *root, pgFile *parent, - bool exclude, bool omit_symlink, parray *black_list, fio_location location) + bool exclude, bool omit_symlink, parray *black_list, + int external_dir_num, fio_location location) { DIR *dir; struct dirent *dent; @@ -694,7 +740,7 @@ dir_list_file_internal(parray *files, const char *root, pgFile *parent, join_path_components(child, parent->path, dent->d_name); - file = pgFileNew(child, omit_symlink, location); + file = pgFileNew(child, omit_symlink, external_dir_num, location); if (file == NULL) continue; @@ -751,7 +797,7 @@ dir_list_file_internal(parray *files, const char *root, pgFile *parent, */ if (S_ISDIR(file->mode)) dir_list_file_internal(files, root, file, exclude, omit_symlink, - black_list, location); + black_list, external_dir_num, location); } if (errno && errno != ENOENT) @@ -831,7 +877,7 @@ list_data_directories(parray *files, const char *path, bool is_root, { pgFile *dir; - dir = pgFileNew(path, false, location); + dir = pgFileNew(path, false, 0, location); parray_append(files, dir); } @@ -898,13 +944,14 @@ get_tablespace_created(const char *link) } /* - * Split argument into old_dir and new_dir and append to tablespace mapping + * Split argument into old_dir and new_dir and append to mapping * list. * * Copy of function tablespace_list_append() from pg_basebackup.c. */ -void -opt_tablespace_map(ConfigOption *opt, const char *arg) +static void +opt_path_map(ConfigOption *opt, const char *arg, TablespaceList *list, + const char *type) { TablespaceListCell *cell = pgut_new(TablespaceListCell); char *dst; @@ -923,7 +970,7 @@ opt_tablespace_map(ConfigOption *opt, const char *arg) else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\')) { if (*cell->new_dir) - elog(ERROR, "multiple \"=\" signs in tablespace mapping\n"); + elog(ERROR, "multiple \"=\" signs in %s mapping\n", type); else dst = dst_ptr = cell->new_dir; } @@ -932,8 +979,8 @@ opt_tablespace_map(ConfigOption *opt, const char *arg) } if (!*cell->old_dir || !*cell->new_dir) - elog(ERROR, "invalid tablespace mapping format \"%s\", " - "must be \"OLDDIR=NEWDIR\"", arg); + elog(ERROR, "invalid %s mapping format \"%s\", " + "must be \"OLDDIR=NEWDIR\"", type, arg); /* * This check isn't absolutely necessary. But all tablespaces are created @@ -942,18 +989,32 @@ opt_tablespace_map(ConfigOption *opt, const char *arg) * consistent with the new_dir check. */ if (!is_absolute_path(cell->old_dir)) - elog(ERROR, "old directory is not an absolute path in tablespace mapping: %s\n", - cell->old_dir); + elog(ERROR, "old directory is not an absolute path in %s mapping: %s\n", + type, cell->old_dir); if (!is_absolute_path(cell->new_dir)) - elog(ERROR, "new directory is not an absolute path in tablespace mapping: %s\n", - cell->new_dir); + elog(ERROR, "new directory is not an absolute path in %s mapping: %s\n", + type, cell->new_dir); - if (tablespace_dirs.tail) - tablespace_dirs.tail->next = cell; + if (list->tail) + list->tail->next = cell; else - tablespace_dirs.head = cell; - tablespace_dirs.tail = cell; + list->head = cell; + list->tail = cell; +} + +/* Parse tablespace mapping */ +void +opt_tablespace_map(ConfigOption *opt, const char *arg) +{ + opt_path_map(opt, arg, &tablespace_dirs, "tablespace"); +} + +/* Parse external directories mapping */ +void +opt_externaldir_map(ConfigOption *opt, const char *arg) +{ + opt_path_map(opt, arg, &external_remap_list, "external directory"); } /* @@ -1226,11 +1287,66 @@ check_tablespace_mapping(pgBackup *backup) parray_free(links); } +void +check_external_dir_mapping(pgBackup *backup) +{ + TablespaceListCell *cell; + parray *external_dirs_to_restore; + bool found; + int i; + + if (!backup->external_dir_str) + { + if (external_remap_list.head) + elog(ERROR, "--external-mapping option's old directory doesn't " + "have an entry in list of external directories of current " + "backup: \"%s\"", external_remap_list.head->old_dir); + return; + } + + external_dirs_to_restore = make_external_directory_list(backup->external_dir_str); + for (cell = external_remap_list.head; cell; cell = cell->next) + { + char *old_dir = cell->old_dir; + + found = false; + for (i = 0; i < parray_num(external_dirs_to_restore); i++) + { + char *external_dir = parray_get(external_dirs_to_restore, i); + if (strcmp(old_dir, external_dir) == 0) + { + found = true; + break; + } + } + if (!found) + elog(ERROR, "--external-mapping option's old directory doesn't " + "have an entry in list of external directories of current " + "backup: \"%s\"", cell->old_dir); + } +} + +char * +get_external_remap(char *current_dir) +{ + TablespaceListCell *cell; + + for (cell = external_remap_list.head; cell; cell = cell->next) + { + char *old_dir = cell->old_dir; + + if (strcmp(old_dir, current_dir) == 0) + return cell->new_dir; + } + return current_dir; +} + /* * Print backup content list. */ void -print_file_list(FILE *out, const parray *files, const char *root) +print_file_list(FILE *out, const parray *files, const char *root, + const char *external_prefix, parray *external_list) { size_t i; @@ -1243,14 +1359,20 @@ print_file_list(FILE *out, const parray *files, const char *root) /* omit root directory portion */ if (root && strstr(path, root) == path) path = GetRelativePath(path, root); + else if (file->external_dir_num && !external_prefix) + { + Assert(external_list); + path = GetRelativePath(path, parray_get(external_list, + file->external_dir_num - 1)); + } fio_fprintf(out, "{\"path\":\"%s\", \"size\":\"" INT64_FORMAT "\", " "\"mode\":\"%u\", \"is_datafile\":\"%u\", " "\"is_cfs\":\"%u\", \"crc\":\"%u\", " - "\"compress_alg\":\"%s\"", + "\"compress_alg\":\"%s\", \"external_dir_num\":\"%d\"", path, file->write_size, file->mode, file->is_datafile ? 1 : 0, file->is_cfs ? 1 : 0, file->crc, - deparse_compress_alg(file->compress_alg)); + deparse_compress_alg(file->compress_alg), file->external_dir_num); if (file->is_datafile) fio_fprintf(out, ",\"segno\":\"%d\"", file->segno); @@ -1413,7 +1535,8 @@ bad_format: * If root is not NULL, path will be absolute path. */ parray * -dir_read_file_list(const char *root, const char *file_txt, fio_location location) +dir_read_file_list(const char *root, const char *external_prefix, + const char *file_txt, fio_location location) { FILE *fp; parray *files; @@ -1435,6 +1558,7 @@ dir_read_file_list(const char *root, const char *file_txt, fio_location location mode, /* bit length of mode_t depends on platforms */ is_datafile, is_cfs, + external_dir_num, crc, segno, n_blocks; @@ -1447,8 +1571,16 @@ dir_read_file_list(const char *root, const char *file_txt, fio_location location get_control_value(buf, "is_cfs", NULL, &is_cfs, false); get_control_value(buf, "crc", NULL, &crc, true); get_control_value(buf, "compress_alg", compress_alg_string, NULL, false); + get_control_value(buf, "external_dir_num", NULL, &external_dir_num, false); - if (root) + if (external_dir_num && external_prefix) + { + char temp[MAXPGPATH]; + + makeExternalDirPathByNum(temp, external_prefix, external_dir_num); + join_path_components(filepath, temp, path); + } + else if (root) join_path_components(filepath, root, path); else strcpy(filepath, path); @@ -1461,6 +1593,7 @@ dir_read_file_list(const char *root, const char *file_txt, fio_location location file->is_cfs = is_cfs ? true : false; file->crc = (pg_crc32) crc; file->compress_alg = parse_compress_alg(compress_alg_string); + file->external_dir_num = external_dir_num; /* * Optional fields @@ -1546,3 +1679,56 @@ pgFileSize(const char *path) return buf.st_size; } + +/* + * Construct parray containing external directories paths + * from string like /path1:/path2 + */ +parray * +make_external_directory_list(const char *colon_separated_dirs) +{ + char *p; + parray *list = parray_new(); + char *tmp = pg_strdup(colon_separated_dirs); + + p = strtok(tmp,":"); + while(p!=NULL) + { + if (is_absolute_path(p)) + parray_append(list, pg_strdup(p)); + else + elog(ERROR, "External directory \"%s\" is not an absolute path", p); + p=strtok(NULL,":"); + } + pfree(tmp); + parray_qsort(list, BlackListCompare); + return list; +} + +/* Free memory of parray containing strings */ +void +free_dir_list(parray *list) +{ + parray_walk(list, pfree); + parray_free(list); +} + +/* Append to string "path_prefix" int "dir_num" */ +void +makeExternalDirPathByNum(char *ret_path, const char *path_prefix, + const int dir_num) +{ + sprintf(ret_path, "%s%d", path_prefix, dir_num); +} + +/* Check if "dir" presents in "dirs_list" */ +bool +backup_contains_external(const char *dir, parray *dirs_list) +{ + void *search_result; + + if (!dirs_list) /* There is no external dirs in backup */ + return false; + search_result = parray_bsearch(dirs_list, dir, BlackListCompare); + return search_result != NULL; +} diff --git a/src/help.c b/src/help.c index a5ee373e..279b894f 100644 --- a/src/help.c +++ b/src/help.c @@ -107,7 +107,7 @@ help_pg_probackup(void) printf(_(" [--log-directory=log-directory]\n")); printf(_(" [--log-rotation-size=log-rotation-size]\n")); printf(_(" [--log-rotation-age=log-rotation-age]\n")); - printf(_(" [--delete-expired] [--delete-wal]\n")); + printf(_(" [--delete-expired] [--delete-wal] [--merge-expired]\n")); printf(_(" [--retention-redundancy=retention-redundancy]\n")); printf(_(" [--retention-window=retention-window]\n")); printf(_(" [--compress]\n")); @@ -122,11 +122,13 @@ help_pg_probackup(void) printf(_(" [--remote-proto] [--remote-host]\n")); printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); printf(_(" [--ssh-options]\n")); + printf(_(" [--external-dirs=external-directory-path]\n")); printf(_("\n %s restore -B backup-path --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" [-D pgdata-path] [-i backup-id] [--progress]\n")); + printf(_(" [-D pgdata-path] [-i backup-id] [-j num-threads]\n")); printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); - printf(_(" [--timeline=timeline] [-T OLDDIR=NEWDIR]\n")); + printf(_(" [--timeline=timeline] [-T OLDDIR=NEWDIR] [--progress]\n")); + printf(_(" [--external-mapping=OLDDIR=NEWDIR]\n")); printf(_(" [--immediate] [--recovery-target-name=target-name]\n")); printf(_(" [--recovery-target-action=pause|promote|shutdown]\n")); printf(_(" [--restore-as-replica]\n")); @@ -135,9 +137,10 @@ help_pg_probackup(void) printf(_(" [--remote-proto] [--remote-host]\n")); printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); printf(_(" [--ssh-options]\n")); + printf(_(" [--skip-external-dirs]\n")); printf(_("\n %s validate -B backup-path [--instance=instance_name]\n"), PROGRAM_NAME); - printf(_(" [-i backup-id] [--progress]\n")); + printf(_(" [-i backup-id] [--progress] [-j num-threads]\n")); printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); printf(_(" [--recovery-target-name=target-name]\n")); printf(_(" [--timeline=timeline]\n")); @@ -148,9 +151,11 @@ help_pg_probackup(void) printf(_(" [--format=format]\n")); printf(_("\n %s delete -B backup-path --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" [--wal] [-i backup-id | --expired]\n")); + printf(_(" [--wal] [-i backup-id | --expired | --merge-expired]\n")); + printf(_(" [--dry-run]\n")); + printf(_("\n %s merge -B backup-path --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" -i backup-id\n")); + printf(_(" -i backup-id [--progress] [-j num-threads]\n")); printf(_("\n %s add-instance -B backup-path -D pgdata-path\n"), PROGRAM_NAME); printf(_(" --instance=instance_name\n")); @@ -211,7 +216,7 @@ help_backup(void) printf(_(" [--log-directory=log-directory]\n")); printf(_(" [--log-rotation-size=log-rotation-size]\n")); printf(_(" [--log-rotation-age=log-rotation-age]\n")); - printf(_(" [--delete-expired] [--delete-wal]\n")); + printf(_(" [--delete-expired] [--delete-wal] [--merge-expired]\n")); printf(_(" [--retention-redundancy=retention-redundancy]\n")); printf(_(" [--retention-window=retention-window]\n")); printf(_(" [--compress]\n")); @@ -225,7 +230,8 @@ help_backup(void) printf(_(" [--skip-block-validation]\n")); printf(_(" [--remote-proto] [--remote-host]\n")); printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); - printf(_(" [--ssh-options]\n\n")); + printf(_(" [--ssh-options]\n")); + printf(_(" [-E external-dirs=external-directory-path]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" -b, --backup-mode=backup-mode backup mode=FULL|PAGE|DELTA|PTRACK\n")); @@ -239,6 +245,9 @@ help_backup(void) printf(_(" --archive-timeout=timeout wait timeout for WAL segment archiving (default: 5min)\n")); printf(_(" --progress show progress\n")); printf(_(" --skip-block-validation set to validate only file-level checksum\n")); + printf(_(" -E --external-dirs=external-directory-path\n")); + printf(_(" backup some directories not from pgdata \n")); + printf(_(" (example: --external-dirs=/tmp/dir1:/tmp/dir2)\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -264,16 +273,19 @@ help_backup(void) printf(_("\n Retention options:\n")); printf(_(" --delete-expired delete backups expired according to current\n")); printf(_(" retention policy after successful backup completion\n")); + printf(_(" --merge-expired merge backups expired according to current\n")); + printf(_(" retention policy after successful backup completion\n")); printf(_(" --delete-wal remove redundant archived wal files\n")); printf(_(" --retention-redundancy=retention-redundancy\n")); printf(_(" number of full backups to keep; 0 disables; (default: 0)\n")); printf(_(" --retention-window=retention-window\n")); printf(_(" number of days of recoverability; 0 disables; (default: 0)\n")); + printf(_(" --dry-run perform a trial run without any changes\n")); printf(_("\n Compression options:\n")); - printf(_(" --compress compress data files\n")); + printf(_(" --compress alias for --compress-algorithm='zlib' and --compress-level=1\n")); printf(_(" --compress-algorithm=compress-algorithm\n")); - printf(_(" available options: 'zlib', 'pglz', 'none' (default: zlib)\n")); + printf(_(" available options: 'zlib', 'pglz', 'none' (default: none)\n")); printf(_(" --compress-level=compress-level\n")); printf(_(" level of compression [0-9] (default: 1)\n")); @@ -306,13 +318,15 @@ static void help_restore(void) { printf(_("%s restore -B backup-path --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" [-D pgdata-path] [-i backup-id] [--progress]\n")); + printf(_(" [-D pgdata-path] [-i backup-id] [-j num-threads] [--progress]\n")); printf(_(" [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]]\n")); printf(_(" [--timeline=timeline] [-T OLDDIR=NEWDIR]\n")); + printf(_(" [--external-mapping=OLDDIR=NEWDIR]\n")); printf(_(" [--immediate] [--recovery-target-name=target-name]\n")); printf(_(" [--recovery-target-action=pause|promote|shutdown]\n")); - printf(_(" [--restore-as-replica] [--no-validate]\n\n")); + printf(_(" [--restore-as-replica] [--no-validate]\n")); printf(_(" [--skip-block-validation]\n")); + printf(_(" [--skip-external-dirs]\n")); printf(_(" [--remote-proto] [--remote-host]\n")); printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); printf(_(" [--ssh-options]\n\n")); @@ -322,6 +336,7 @@ help_restore(void) printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); printf(_(" -i, --backup-id=backup-id backup to restore\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); printf(_(" --progress show progress\n")); printf(_(" --time=time time stamp up to which recovery will proceed\n")); @@ -331,6 +346,8 @@ help_restore(void) printf(_(" --timeline=timeline recovering into a particular timeline\n")); printf(_(" -T, --tablespace-mapping=OLDDIR=NEWDIR\n")); printf(_(" relocate the tablespace from directory OLDDIR to NEWDIR\n")); + printf(_(" --external-mapping=OLDDIR=NEWDIR\n")); + printf(_(" relocate the external directory from OLDDIR to NEWDIR\n")); printf(_(" --immediate end recovery as soon as a consistent state is reached\n")); printf(_(" --recovery-target-name=target-name\n")); @@ -343,6 +360,7 @@ help_restore(void) printf(_(" to ease setting up a standby server\n")); printf(_(" --no-validate disable backup validation during restore\n")); printf(_(" --skip-block-validation set to validate only file-level checksum\n")); + printf(_(" --skip-external-dirs do not restore all external directories\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -389,6 +407,7 @@ help_validate(void) printf(_(" -i, --backup-id=backup-id backup to validate\n")); printf(_(" --progress show progress\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); printf(_(" --time=time time stamp up to which recovery will proceed\n")); printf(_(" --xid=xid transaction ID up to which recovery will proceed\n")); printf(_(" --lsn=lsn LSN of the write-ahead log location up to which recovery will proceed\n")); @@ -437,14 +456,19 @@ static void help_delete(void) { printf(_("%s delete -B backup-path --instance=instance_name\n"), PROGRAM_NAME); - printf(_(" [-i backup-id | --expired] [--wal]\n\n")); + printf(_(" [-i backup-id | --expired | --merge-expired] [--wal]\n")); + printf(_(" [-j num-threads] [--dry-run]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); printf(_(" -i, --backup-id=backup-id backup to delete\n")); printf(_(" --expired delete backups expired according to current\n")); printf(_(" retention policy\n")); + printf(_(" --merge-expired merge backups expired according to current\n")); + printf(_(" retention policy\n")); printf(_(" --wal remove unnecessary wal files in WAL ARCHIVE\n")); + printf(_(" -j, --threads=NUM number of parallel threads\n")); + printf(_(" --dry-run perform a trial run without any changes\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -528,11 +552,15 @@ help_set_config(void) printf(_(" [-d dbname] [-h host] [-p port] [-U username]\n")); printf(_(" [--master-db=db_name] [--master-host=host_name]\n")); printf(_(" [--master-port=port] [--master-user=user_name]\n")); - printf(_(" [--replica-timeout=timeout]\n\n")); - printf(_(" [--archive-timeout=timeout]\n\n")); + printf(_(" [--replica-timeout=timeout]\n")); + printf(_(" [--archive-timeout=timeout]\n")); + printf(_(" [-E external-dirs=external-directory-path]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); + printf(_(" -E --external-dirs=external-directory-path\n")); + printf(_(" backup some directories not from pgdata \n")); + printf(_(" (example: --external-dirs=/tmp/dir1:/tmp/dir2)\n")); printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); @@ -562,8 +590,9 @@ help_set_config(void) printf(_(" number of days of recoverability; 0 disables; (default: 0)\n")); printf(_("\n Compression options:\n")); + printf(_(" --compress alias for --compress-algorithm='zlib' and --compress-level=1\n")); printf(_(" --compress-algorithm=compress-algorithm\n")); - printf(_(" available options: 'zlib','pglz','none'\n")); + printf(_(" available options: 'zlib','pglz','none' (default: 'none')\n")); printf(_(" --compress-level=compress-level\n")); printf(_(" level of compression [0-9] (default: 1)\n")); @@ -600,14 +629,18 @@ help_add_instance(void) { printf(_("%s add-instance -B backup-path -D pgdata-path\n"), PROGRAM_NAME); printf(_(" --instance=instance_name\n")); - printf(_(" [--remote-proto] [--remote-host]\n")); - printf(_(" [--remote-port] [--remote-path] [--remote-user]\n")); - printf(_(" [--ssh-options]\n\n")); + printf(_(" --remote-proto --remote-host\n")); + printf(_(" --remote-port --remote-path --remote-user\n")); + printf(_(" --ssh-options\n")); + printf(_(" -E external-dirs=external-directory-path\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); printf(_(" --instance=instance_name name of the new instance\n")); + printf(_(" -E --external-dirs=external-directory-path\n")); + printf(_(" backup some directories not from pgdata \n")); + printf(_(" (example: --external-dirs=/tmp/dir1:/tmp/dir2)\n")); printf(_("\n Remote options:\n")); printf(_(" --remote-proto=protocol remote protocol to use\n")); printf(_(" available options: 'ssh', 'none' (default: none)\n")); @@ -647,9 +680,9 @@ help_archive_push(void) printf(_(" relative path name of the WAL file on the server\n")); printf(_(" --wal-file-name=wal-file-name\n")); printf(_(" name of the WAL file to retrieve from the server\n")); - printf(_(" --compress compress WAL file during archiving\n")); + printf(_(" --compress alias for --compress-algorithm='zlib' and --compress-level=1\n")); printf(_(" --compress-algorithm=compress-algorithm\n")); - printf(_(" available options: 'zlib','none'\n")); + printf(_(" available options: 'zlib', 'none' (default: 'none')\n")); printf(_(" --compress-level=compress-level\n")); printf(_(" level of compression [0-9] (default: 1)\n")); printf(_(" --overwrite overwrite archived WAL file\n")); diff --git a/src/init.c b/src/init.c index fb9b7bbb..4fe1168c 100644 --- a/src/init.c +++ b/src/init.c @@ -104,7 +104,7 @@ do_add_instance(void) config_set_opt(instance_options, &instance_config.xlog_seg_size, SOURCE_FILE); /* pgdata was set through command line */ - do_set_config(); + do_set_config(true); elog(INFO, "Instance '%s' successfully inited", instance_name); return 0; diff --git a/src/merge.c b/src/merge.c index 3cf7f866..7ef57098 100644 --- a/src/merge.c +++ b/src/merge.c @@ -18,12 +18,14 @@ typedef struct { parray *to_files; parray *files; + parray *from_external; pgBackup *to_backup; pgBackup *from_backup; - const char *to_root; const char *from_root; + const char *to_external_prefix; + const char *from_external_prefix; /* * Return value from the thread. @@ -32,8 +34,12 @@ typedef struct int ret; } merge_files_arg; -static void merge_backups(pgBackup *backup, pgBackup *next_backup); static void *merge_files(void *arg); +static void +reorder_external_dirs(pgBackup *to_backup, parray *to_external, + parray *from_external); +static int +get_external_index(const char *key, const parray *list); /* * Implementation of MERGE command. @@ -46,12 +52,10 @@ void do_merge(time_t backup_id) { parray *backups; + parray *merge_list = parray_new(); pgBackup *dest_backup = NULL; pgBackup *full_backup = NULL; - time_t prev_parent = INVALID_BACKUP_ID; int i; - int dest_backup_idx = 0; - int full_backup_idx = 0; if (backup_id == INVALID_BACKUP_ID) elog(ERROR, "required parameter is not specified: --backup-id"); @@ -64,73 +68,79 @@ do_merge(time_t backup_id) /* Get list of all backups sorted in order of descending start time */ backups = catalog_get_backup_list(INVALID_BACKUP_ID); - /* Find destination and parent backups */ + /* Find destination backup first */ for (i = 0; i < parray_num(backups); i++) { pgBackup *backup = (pgBackup *) parray_get(backups, i); - if (backup->start_time > backup_id) - continue; - else if (backup->start_time == backup_id && !dest_backup) + /* found target */ + if (backup->start_time == backup_id) { + /* sanity */ if (backup->status != BACKUP_STATUS_OK && /* It is possible that previous merging was interrupted */ backup->status != BACKUP_STATUS_MERGING && backup->status != BACKUP_STATUS_DELETING) - elog(ERROR, "Backup %s has status: %s", - base36enc(backup->start_time), status2str(backup->status)); + elog(ERROR, "Backup %s has status: %s", + base36enc(backup->start_time), status2str(backup->status)); if (backup->backup_mode == BACKUP_MODE_FULL) elog(ERROR, "Backup %s is full backup", base36enc(backup->start_time)); dest_backup = backup; - dest_backup_idx = i; + break; } - else - { - if (dest_backup == NULL) - elog(ERROR, "Target backup %s was not found", base36enc(backup_id)); - - if (backup->start_time != prev_parent) - continue; - - if (backup->status != BACKUP_STATUS_OK && - /* It is possible that previous merging was interrupted */ - backup->status != BACKUP_STATUS_MERGING) - elog(ERROR, "Backup %s has status: %s", - base36enc(backup->start_time), status2str(backup->status)); - - /* If we already found dest_backup, look for full backup */ - if (dest_backup && backup->backup_mode == BACKUP_MODE_FULL) - { - full_backup = backup; - full_backup_idx = i; - - /* Found target and full backups, so break the loop */ - break; - } - } - - prev_parent = backup->parent_backup; } + /* sanity */ if (dest_backup == NULL) elog(ERROR, "Target backup %s was not found", base36enc(backup_id)); + + /* get full backup */ + full_backup = find_parent_full_backup(dest_backup); + + /* sanity */ if (full_backup == NULL) elog(ERROR, "Parent full backup for the given backup %s was not found", base36enc(backup_id)); - Assert(full_backup_idx != dest_backup_idx); + /* sanity */ + if (full_backup->status != BACKUP_STATUS_OK && + /* It is possible that previous merging was interrupted */ + full_backup->status != BACKUP_STATUS_MERGING) + elog(ERROR, "Backup %s has status: %s", + base36enc(full_backup->start_time), status2str(full_backup->status)); - catalog_lock_backup_list(backups, full_backup_idx, dest_backup_idx); + //Assert(full_backup_idx != dest_backup_idx); + + /* form merge list */ + while(dest_backup->parent_backup_link) + { + /* sanity */ + if (dest_backup->status != BACKUP_STATUS_OK && + /* It is possible that previous merging was interrupted */ + dest_backup->status != BACKUP_STATUS_MERGING && + dest_backup->status != BACKUP_STATUS_DELETING) + elog(ERROR, "Backup %s has status: %s", + base36enc(dest_backup->start_time), status2str(dest_backup->status)); + + parray_append(merge_list, dest_backup); + dest_backup = dest_backup->parent_backup_link; + } + + /* Add FULL backup for easy locking */ + parray_append(merge_list, full_backup); + + /* Lock merge chain */ + catalog_lock_backup_list(merge_list, parray_num(merge_list) - 1, 0); /* * Found target and full backups, merge them and intermediate backups */ - for (i = full_backup_idx; i > dest_backup_idx; i--) + for (i = parray_num(merge_list) - 2; i >= 0; i--) { - pgBackup *from_backup = (pgBackup *) parray_get(backups, i - 1); + pgBackup *from_backup = (pgBackup *) parray_get(merge_list, i); merge_backups(full_backup, from_backup); } @@ -142,6 +152,7 @@ do_merge(time_t backup_id) /* cleanup */ parray_walk(backups, pgBackupFree); parray_free(backups); + parray_free(merge_list); elog(INFO, "Merge of backup %s completed", base36enc(backup_id)); } @@ -152,18 +163,22 @@ do_merge(time_t backup_id) * - remove unnecessary directories and files from to_backup * - update metadata of from_backup, it becames FULL backup */ -static void +void merge_backups(pgBackup *to_backup, pgBackup *from_backup) { char *to_backup_id = base36enc_dup(to_backup->start_time), *from_backup_id = base36enc_dup(from_backup->start_time); char to_backup_path[MAXPGPATH], to_database_path[MAXPGPATH], + to_external_prefix[MAXPGPATH], from_backup_path[MAXPGPATH], from_database_path[MAXPGPATH], + from_external_prefix[MAXPGPATH], control_file[MAXPGPATH]; parray *files, *to_files; + parray *to_external = NULL, + *from_external = NULL; pthread_t *threads = NULL; merge_files_arg *threads_args = NULL; int i; @@ -201,16 +216,20 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) pgBackupGetPath(to_backup, to_backup_path, lengthof(to_backup_path), NULL); pgBackupGetPath(to_backup, to_database_path, lengthof(to_database_path), DATABASE_DIR); + pgBackupGetPath(to_backup, to_external_prefix, lengthof(to_database_path), + EXTERNAL_DIR); pgBackupGetPath(from_backup, from_backup_path, lengthof(from_backup_path), NULL); pgBackupGetPath(from_backup, from_database_path, lengthof(from_database_path), DATABASE_DIR); + pgBackupGetPath(from_backup, from_external_prefix, lengthof(from_database_path), + EXTERNAL_DIR); /* * Get list of files which will be modified or removed. */ pgBackupGetPath(to_backup, control_file, lengthof(control_file), DATABASE_FILE_LIST); - to_files = dir_read_file_list(NULL, control_file, FIO_BACKUP_HOST); + to_files = dir_read_file_list(NULL, NULL, control_file, FIO_BACKUP_HOST); /* To delete from leaf, sort in reversed order */ parray_qsort(to_files, pgFileComparePathDesc); /* @@ -218,7 +237,7 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) */ pgBackupGetPath(from_backup, control_file, lengthof(control_file), DATABASE_FILE_LIST); - files = dir_read_file_list(NULL, control_file, FIO_BACKUP_HOST); + files = dir_read_file_list(NULL, NULL, control_file, FIO_BACKUP_HOST); /* sort by size for load balancing */ parray_qsort(files, pgFileCompareSize); @@ -237,14 +256,39 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); threads_args = (merge_files_arg *) palloc(sizeof(merge_files_arg) * num_threads); + /* Create external directories lists */ + if (to_backup->external_dir_str) + to_external = make_external_directory_list(to_backup->external_dir_str); + if (from_backup->external_dir_str) + from_external = make_external_directory_list(from_backup->external_dir_str); + + /* + * Rename external directoties in to_backup (if exists) + * according to numeration of external dirs in from_backup. + */ + if (to_external) + reorder_external_dirs(to_backup, to_external, from_external); + /* Setup threads */ for (i = 0; i < parray_num(files); i++) { pgFile *file = (pgFile *) parray_get(files, i); + /* if the entry was an external directory, create it in the backup */ + if (file->external_dir_num && S_ISDIR(file->mode)) + { + char dirpath[MAXPGPATH]; + char new_container[MAXPGPATH]; + + makeExternalDirPathByNum(new_container, to_external_prefix, + file->external_dir_num); + join_path_components(dirpath, new_container, file->path); + dir_create_dir(dirpath, DIR_PERMISSION); + } pg_atomic_init_flag(&file->lock); } + thread_interrupted = false; for (i = 0; i < num_threads; i++) { merge_files_arg *arg = &(threads_args[i]); @@ -255,6 +299,9 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) arg->from_backup = from_backup; arg->to_root = to_database_path; arg->from_root = from_database_path; + arg->from_external = from_external; + arg->to_external_prefix = to_external_prefix; + arg->from_external_prefix = from_external_prefix; /* By default there are some error */ arg->ret = 1; @@ -284,6 +331,9 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) to_backup->stop_lsn = from_backup->stop_lsn; to_backup->recovery_time = from_backup->recovery_time; to_backup->recovery_xid = from_backup->recovery_xid; + pfree(to_backup->external_dir_str); + to_backup->external_dir_str = from_backup->external_dir_str; + from_backup->external_dir_str = NULL; /* For safe pgBackupFree() */ to_backup->merge_time = merge_time; to_backup->end_time = time(NULL); @@ -311,7 +361,8 @@ merge_backups(pgBackup *to_backup, pgBackup *from_backup) else to_backup->wal_bytes = BYTES_INVALID; - write_backup_filelist(to_backup, files, from_database_path); + write_backup_filelist(to_backup, files, from_database_path, + from_external_prefix, NULL); write_backup(to_backup); delete_source_backup: @@ -329,6 +380,14 @@ delete_source_backup: { pgFile *file = (pgFile *) parray_get(to_files, i); + if (file->external_dir_num && to_external) + { + char *dir_name = parray_get(to_external, file->external_dir_num - 1); + if (backup_contains_external(dir_name, from_external)) + /* Dir already removed*/ + continue; + } + if (parray_bsearch(files, file, pgFileComparePathDesc) == NULL) { char to_file_path[MAXPGPATH]; @@ -402,7 +461,7 @@ merge_files(void *arg) continue; /* check for interrupt */ - if (interrupted) + if (interrupted || thread_interrupted) elog(ERROR, "Interrupted during merging backups"); /* Directories were created before */ @@ -414,7 +473,7 @@ merge_files(void *arg) i + 1, num_files, file->path); res_file = parray_bsearch(argument->to_files, file, - pgFileComparePathDesc); + pgFileComparePathWithExternalDesc); to_file = (res_file) ? *res_file : NULL; join_path_components(to_file_path, argument->to_root, file->path); @@ -452,7 +511,17 @@ merge_files(void *arg) } /* We need to make full path, file object has relative path */ - join_path_components(from_file_path, argument->from_root, file->path); + if (file->external_dir_num) + { + char temp[MAXPGPATH]; + makeExternalDirPathByNum(temp, argument->from_external_prefix, + file->external_dir_num); + + join_path_components(from_file_path, temp, file->path); + } + else + join_path_components(from_file_path, argument->from_root, + file->path); prev_file_path = file->path; file->path = from_file_path; @@ -558,6 +627,23 @@ merge_files(void *arg) file->crc = pgFileGetCRC(to_file_path, true, true, NULL, FIO_LOCAL_HOST); } } + else if (file->external_dir_num) + { + char from_root[MAXPGPATH]; + char to_root[MAXPGPATH]; + int new_dir_num; + char *file_external_path = parray_get(argument->from_external, + file->external_dir_num - 1); + + Assert(argument->from_external); + new_dir_num = get_external_index(file_external_path, + argument->from_external); + makeExternalDirPathByNum(from_root, argument->from_external_prefix, + file->external_dir_num); + makeExternalDirPathByNum(to_root, argument->to_external_prefix, + new_dir_num); + copy_file(from_root, FIO_LOCAL_HOST, to_root, FIO_LOCAL_HOST, file); + } else if (strcmp(file->name, "pg_control") == 0) copy_pgcontrol_file(argument->from_root, FIO_LOCAL_HOST, argument->to_root, FIO_LOCAL_HOST, file); else @@ -570,7 +656,7 @@ merge_files(void *arg) file->compress_alg = to_backup->compress_alg; if (file->write_size != BYTES_INVALID) - elog(LOG, "Merged file \"%s\": " INT64_FORMAT " bytes", + elog(VERBOSE, "Merged file \"%s\": " INT64_FORMAT " bytes", file->path, file->write_size); /* Restore relative path */ @@ -582,3 +668,66 @@ merge_files(void *arg) return NULL; } + +/* Recursively delete a directory and its contents */ +static void +remove_dir_with_files(const char *path) +{ + parray *files = parray_new(); + dir_list_file(files, path, true, true, true, 0, FIO_LOCAL_HOST); + parray_qsort(files, pgFileComparePathDesc); + for (int i = 0; i < parray_num(files); i++) + { + pgFile *file = (pgFile *) parray_get(files, i); + + pgFileDelete(file); + elog(VERBOSE, "Deleted \"%s\"", file->path); + } +} + +/* Get index of external directory */ +static int +get_external_index(const char *key, const parray *list) +{ + if (!list) /* Nowhere to search */ + return -1; + for (int i = 0; i < parray_num(list); i++) + { + if (strcmp(key, parray_get(list, i)) == 0) + return i + 1; + } + return -1; +} + +/* Rename directories in to_backup according to order in from_external */ +static void +reorder_external_dirs(pgBackup *to_backup, parray *to_external, + parray *from_external) +{ + char externaldir_template[MAXPGPATH]; + + pgBackupGetPath(to_backup, externaldir_template, + lengthof(externaldir_template), EXTERNAL_DIR); + for (int i = 0; i < parray_num(to_external); i++) + { + int from_num = get_external_index(parray_get(to_external, i), + from_external); + if (from_num == -1) + { + char old_path[MAXPGPATH]; + makeExternalDirPathByNum(old_path, externaldir_template, i + 1); + remove_dir_with_files(old_path); + } + else if (from_num != i + 1) + { + char old_path[MAXPGPATH]; + char new_path[MAXPGPATH]; + makeExternalDirPathByNum(old_path, externaldir_template, i + 1); + makeExternalDirPathByNum(new_path, externaldir_template, from_num); + elog(VERBOSE, "Rename %s to %s", old_path, new_path); + if (rename (old_path, new_path) == -1) + elog(ERROR, "Could not rename directory \"%s\" to \"%s\": %s", + old_path, new_path, strerror(errno)); + } + } +} diff --git a/src/parsexlog.c b/src/parsexlog.c index f0118a72..46570924 100644 --- a/src/parsexlog.c +++ b/src/parsexlog.c @@ -83,43 +83,61 @@ typedef struct xl_xact_abort /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ } xl_xact_abort; -static void extractPageInfo(XLogReaderState *record); -static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime); - -typedef struct XLogPageReadPrivate +/* + * XLogRecTarget allows to track the last recovery targets. Currently used only + * within validate_wal(). + */ +typedef struct XLogRecTarget { - int thread_num; - const char *archivedir; - TimeLineID tli; - uint32 xlog_seg_size; + TimestampTz rec_time; + TransactionId rec_xid; + XLogRecPtr rec_lsn; +} XLogRecTarget; + +typedef struct XLogReaderData +{ + int thread_num; + TimeLineID tli; + + XLogRecTarget cur_rec; + XLogSegNo xlogsegno; + bool xlogexists; char page_buf[XLOG_BLCKSZ]; uint32 prev_page_off; - bool manual_switch; - bool need_switch; + bool need_switch; - int xlogfile; - XLogSegNo xlogsegno; - char xlogpath[MAXPGPATH]; - bool xlogexists; - fio_location location; + int xlogfile; + char xlogpath[MAXPGPATH]; #ifdef HAVE_LIBZ gzFile gz_xlogfile; char gz_xlogpath[MAXPGPATH]; #endif -} XLogPageReadPrivate; +} XLogReaderData; + +/* Function to process a WAL record */ +typedef void (*xlog_record_function) (XLogReaderState *record, + XLogReaderData *reader_data, + bool *stop_reading); /* An argument for a thread function */ typedef struct { - XLogPageReadPrivate private_data; + XLogReaderData reader_data; + + xlog_record_function process_record; XLogRecPtr startpoint; XLogRecPtr endpoint; XLogSegNo endSegNo; + /* + * The thread got the recovery target. + */ + bool got_target; + /* * Return value from the thread. * 0 means there is no error, 1 - there is an error. @@ -131,14 +149,43 @@ static int SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *pageTLI); -static XLogReaderState *InitXLogPageRead(XLogPageReadPrivate *private_data, +static XLogReaderState *InitXLogPageRead(XLogReaderData *reader_data, const char *archivedir, - TimeLineID tli, uint32 xlog_seg_size, + TimeLineID tli, uint32 segment_size, + bool manual_switch, + bool consistent_read, bool allocate_reader); +static bool RunXLogThreads(const char *archivedir, + time_t target_time, TransactionId target_xid, + XLogRecPtr target_lsn, + TimeLineID tli, uint32 segment_size, + XLogRecPtr startpoint, XLogRecPtr endpoint, + bool consistent_read, + xlog_record_function process_record, + XLogRecTarget *last_rec); +//static XLogReaderState *InitXLogThreadRead(xlog_thread_arg *arg); +static bool SwitchThreadToNextWal(XLogReaderState *xlogreader, + xlog_thread_arg *arg); +static bool XLogWaitForConsistency(XLogReaderState *xlogreader); +static void *XLogThreadWorker(void *arg); static void CleanupXLogPageRead(XLogReaderState *xlogreader); -static void PrintXLogCorruptionMsg(XLogPageReadPrivate *private_data, int elevel); +static void PrintXLogCorruptionMsg(XLogReaderData *reader_data, int elevel); -static XLogSegNo nextSegNoToRead = 0; +static void extractPageInfo(XLogReaderState *record, + XLogReaderData *reader_data, bool *stop_reading); +static void validateXLogRecord(XLogReaderState *record, + XLogReaderData *reader_data, bool *stop_reading); +static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime); + +static XLogSegNo segno_start = 0; +/* Segment number where target record is located */ +static XLogSegNo segno_target = 0; +/* Next segment number to read by a thread */ +static XLogSegNo segno_next = 0; +/* Number of segments already read by threads */ +static uint32 segnum_read = 0; +/* Number of detected corrupted or absent segments */ +static uint32 segnum_corrupted = 0; static pthread_mutex_t wal_segment_mutex = PTHREAD_MUTEX_INITIALIZER; /* copied from timestamp.c */ @@ -157,189 +204,25 @@ timestamptz_to_time_t(TimestampTz t) return result; } +static const char *wal_archivedir = NULL; +static uint32 wal_seg_size = 0; /* - * Do manual switch to the next WAL segment. - * - * Returns false if the reader reaches the end of a WAL segment list. + * If true a wal reader thread switches to the next segment using + * segno_next. */ -static bool -switchToNextWal(XLogReaderState *xlogreader, xlog_thread_arg *arg) -{ - XLogPageReadPrivate *private_data; - XLogRecPtr found; - - private_data = (XLogPageReadPrivate *) xlogreader->private_data; - private_data->need_switch = false; - - /* Critical section */ - pthread_lock(&wal_segment_mutex); - Assert(nextSegNoToRead); - private_data->xlogsegno = nextSegNoToRead; - nextSegNoToRead++; - pthread_mutex_unlock(&wal_segment_mutex); - - /* We've reached the end */ - if (private_data->xlogsegno > arg->endSegNo) - return false; - - /* Adjust next record position */ - GetXLogRecPtr(private_data->xlogsegno, 0, - private_data->xlog_seg_size, arg->startpoint); - /* We need to close previously opened file if it wasn't closed earlier */ - CleanupXLogPageRead(xlogreader); - /* Skip over the page header and contrecord if any */ - found = XLogFindNextRecord(xlogreader, arg->startpoint); - - /* - * We get invalid WAL record pointer usually when WAL segment is - * absent or is corrupted. - */ - if (XLogRecPtrIsInvalid(found)) - { - elog(WARNING, "Thread [%d]: could not read WAL record at %X/%X", - private_data->thread_num, - (uint32) (arg->startpoint >> 32), (uint32) (arg->startpoint)); - PrintXLogCorruptionMsg(private_data, ERROR); - } - arg->startpoint = found; - - elog(VERBOSE, "Thread [%d]: switched to LSN %X/%X", - private_data->thread_num, - (uint32) (arg->startpoint >> 32), (uint32) (arg->startpoint)); - - return true; -} +static bool wal_manual_switch = false; +/* + * If true a wal reader thread waits for other threads if the thread met absent + * wal segment. + */ +static bool wal_consistent_read = false; /* - * extractPageMap() worker. + * Variables used within validate_wal() and validateXLogRecord() to stop workers */ -static void * -doExtractPageMap(void *arg) -{ - xlog_thread_arg *extract_arg = (xlog_thread_arg *) arg; - XLogPageReadPrivate *private_data; - XLogReaderState *xlogreader; - XLogSegNo nextSegNo = 0; - XLogRecPtr found; - char *errormsg; - - private_data = &extract_arg->private_data; -#if PG_VERSION_NUM >= 110000 - xlogreader = XLogReaderAllocate(private_data->xlog_seg_size, - &SimpleXLogPageRead, private_data); -#else - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, private_data); -#endif - if (xlogreader == NULL) - elog(ERROR, "Thread [%d]: out of memory", private_data->thread_num); - xlogreader->system_identifier = instance_config.system_identifier; - - found = XLogFindNextRecord(xlogreader, extract_arg->startpoint); - - /* - * We get invalid WAL record pointer usually when WAL segment is absent or - * is corrupted. - */ - if (XLogRecPtrIsInvalid(found)) - { - if (xlogreader->errormsg_buf[0] != '\0') - elog(WARNING, "Thread [%d]: could not read WAL record at %X/%X: %s", - private_data->thread_num, - (uint32) (extract_arg->startpoint >> 32), - (uint32) (extract_arg->startpoint), - xlogreader->errormsg_buf); - else - elog(WARNING, "Thread [%d]: could not read WAL record at %X/%X", - private_data->thread_num, - (uint32) (extract_arg->startpoint >> 32), - (uint32) (extract_arg->startpoint)); - PrintXLogCorruptionMsg(private_data, ERROR); - } - extract_arg->startpoint = found; - - elog(VERBOSE, "Thread [%d]: Starting LSN: %X/%X", - private_data->thread_num, - (uint32) (extract_arg->startpoint >> 32), - (uint32) (extract_arg->startpoint)); - - /* Switch WAL segment manually below without using SimpleXLogPageRead() */ - private_data->manual_switch = true; - - do - { - XLogRecord *record; - - if (interrupted) - elog(ERROR, "Thread [%d]: Interrupted during WAL reading", - private_data->thread_num); - - /* - * We need to switch to the next WAL segment after reading previous - * record. It may happen if we read contrecord. - */ - if (private_data->need_switch) - { - if (!switchToNextWal(xlogreader, extract_arg)) - break; - } - - record = XLogReadRecord(xlogreader, extract_arg->startpoint, &errormsg); - - if (record == NULL) - { - XLogRecPtr errptr; - - /* - * There is no record, try to switch to the next WAL segment. - * Usually SimpleXLogPageRead() does it by itself. But here we need - * to do it manually to support threads. - */ - if (private_data->need_switch && errormsg == NULL) - { - if (switchToNextWal(xlogreader, extract_arg)) - continue; - else - break; - } - - errptr = extract_arg->startpoint ? - extract_arg->startpoint : xlogreader->EndRecPtr; - - if (errormsg) - elog(WARNING, "Thread [%d]: could not read WAL record at %X/%X: %s", - private_data->thread_num, - (uint32) (errptr >> 32), (uint32) (errptr), - errormsg); - else - elog(WARNING, "Thread [%d]: could not read WAL record at %X/%X", - private_data->thread_num, - (uint32) (errptr >> 32), (uint32) (errptr)); - - /* - * If we don't have all WAL files from prev backup start_lsn to current - * start_lsn, we won't be able to build page map and PAGE backup will - * be incorrect. Stop it and throw an error. - */ - PrintXLogCorruptionMsg(private_data, ERROR); - } - - extractPageInfo(xlogreader); - - /* continue reading at next record */ - extract_arg->startpoint = InvalidXLogRecPtr; - - GetXLogSegNo(xlogreader->EndRecPtr, nextSegNo, - private_data->xlog_seg_size); - } while (nextSegNo <= extract_arg->endSegNo && - xlogreader->ReadRecPtr < extract_arg->endpoint); - - CleanupXLogPageRead(xlogreader); - XLogReaderFree(xlogreader); - - /* Extracting is successful */ - extract_arg->ret = 0; - return NULL; -} +static time_t wal_target_time = 0; +static TransactionId wal_target_xid = InvalidTransactionId; +static XLogRecPtr wal_target_lsn = InvalidXLogRecPtr; /* * Read WAL from the archive directory, from 'startpoint' to 'endpoint' on the @@ -349,86 +232,20 @@ doExtractPageMap(void *arg) * file. */ void -extractPageMap(const char *archivedir, TimeLineID tli, uint32 seg_size, - XLogRecPtr startpoint, XLogRecPtr endpoint, parray *files) +extractPageMap(const char *archivedir, TimeLineID tli, uint32 wal_seg_size, + XLogRecPtr startpoint, XLogRecPtr endpoint) { - int i; - int threads_need = 0; - XLogSegNo endSegNo; bool extract_isok = true; - pthread_t *threads; - xlog_thread_arg *thread_args; time_t start_time, end_time; elog(LOG, "Compiling pagemap"); - if (!XRecOffIsValid(startpoint)) - elog(ERROR, "Invalid startpoint value %X/%X", - (uint32) (startpoint >> 32), (uint32) (startpoint)); - - if (!XRecOffIsValid(endpoint)) - elog(ERROR, "Invalid endpoint value %X/%X", - (uint32) (endpoint >> 32), (uint32) (endpoint)); - - GetXLogSegNo(endpoint, endSegNo, seg_size); - - nextSegNoToRead = 0; time(&start_time); - threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); - thread_args = (xlog_thread_arg *) palloc(sizeof(xlog_thread_arg)*num_threads); - - /* - * Initialize thread args. - * - * Each thread works with its own WAL segment and we need to adjust - * startpoint value for each thread. - */ - for (i = 0; i < num_threads; i++) - { - InitXLogPageRead(&thread_args[i].private_data, archivedir, tli, - seg_size, false); - thread_args[i].private_data.thread_num = i + 1; - - thread_args[i].startpoint = startpoint; - thread_args[i].endpoint = endpoint; - thread_args[i].endSegNo = endSegNo; - /* By default there is some error */ - thread_args[i].ret = 1; - - threads_need++; - - /* Adjust startpoint to the next thread */ - if (nextSegNoToRead == 0) - GetXLogSegNo(startpoint, nextSegNoToRead, seg_size); - - nextSegNoToRead++; - /* - * If we need to read less WAL segments than num_threads, create less - * threads. - */ - if (nextSegNoToRead > endSegNo) - break; - GetXLogRecPtr(nextSegNoToRead, 0, seg_size, startpoint); - } - - /* Run threads */ - for (i = 0; i < threads_need; i++) - { - elog(VERBOSE, "Start WAL reader thread: %d", i + 1); - pthread_create(&threads[i], NULL, doExtractPageMap, &thread_args[i]); - } - - /* Wait for threads */ - for (i = 0; i < threads_need; i++) - { - pthread_join(threads[i], NULL); - if (thread_args[i].ret == 1) - extract_isok = false; - } - - pfree(threads); - pfree(thread_args); + extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId, + InvalidXLogRecPtr, tli, wal_seg_size, + startpoint, endpoint, false, extractPageInfo, + NULL); time(&end_time); if (extract_isok) @@ -439,48 +256,26 @@ extractPageMap(const char *archivedir, TimeLineID tli, uint32 seg_size, } /* - * Ensure that the backup has all wal files needed for recovery to consistent state. + * Ensure that the backup has all wal files needed for recovery to consistent + * state. + * + * WAL records reading is processed using threads. Each thread reads single WAL + * file. */ static void validate_backup_wal_from_start_to_stop(pgBackup *backup, - char *backup_xlog_path, TimeLineID tli, + const char *archivedir, TimeLineID tli, uint32 xlog_seg_size) { - XLogRecPtr startpoint = backup->start_lsn; - XLogRecord *record; - XLogReaderState *xlogreader; - char *errormsg; - XLogPageReadPrivate private; - bool got_endpoint = false; + bool got_endpoint; - xlogreader = InitXLogPageRead(&private, backup_xlog_path, tli, - xlog_seg_size, true); - - while (true) - { - record = XLogReadRecord(xlogreader, startpoint, &errormsg); - - if (record == NULL) - { - if (errormsg) - elog(WARNING, "%s", errormsg); - - break; - } - - /* Got WAL record at stop_lsn */ - if (xlogreader->ReadRecPtr == backup->stop_lsn) - { - got_endpoint = true; - break; - } - startpoint = InvalidXLogRecPtr; /* continue reading at next record */ - } + got_endpoint = RunXLogThreads(archivedir, 0, InvalidTransactionId, + InvalidXLogRecPtr, tli, xlog_seg_size, + backup->start_lsn, backup->stop_lsn, + false, NULL, NULL); if (!got_endpoint) { - PrintXLogCorruptionMsg(&private, WARNING); - /* * If we don't have WAL between start_lsn and stop_lsn, * the backup is definitely corrupted. Update its status. @@ -495,10 +290,6 @@ validate_backup_wal_from_start_to_stop(pgBackup *backup, (uint32) (backup->stop_lsn >> 32), (uint32) (backup->stop_lsn)); } - - /* clean */ - CleanupXLogPageRead(xlogreader); - XLogReaderFree(xlogreader); } /* @@ -509,20 +300,12 @@ validate_backup_wal_from_start_to_stop(pgBackup *backup, void validate_wal(pgBackup *backup, const char *archivedir, time_t target_time, TransactionId target_xid, - XLogRecPtr target_lsn, - TimeLineID tli, uint32 seg_size) + XLogRecPtr target_lsn, TimeLineID tli, uint32 wal_seg_size) { - XLogRecPtr startpoint = backup->start_lsn; const char *backup_id; - XLogRecord *record; - XLogReaderState *xlogreader; - char *errormsg; - XLogPageReadPrivate private; - TransactionId last_xid = InvalidTransactionId; - TimestampTz last_time = 0; + XLogRecTarget last_rec; char last_timestamp[100], target_timestamp[100]; - XLogRecPtr last_lsn = InvalidXLogRecPtr; bool all_wal = false; char backup_xlog_path[MAXPGPATH]; @@ -549,11 +332,11 @@ validate_wal(pgBackup *backup, const char *archivedir, DATABASE_DIR, PG_XLOG_DIR); validate_backup_wal_from_start_to_stop(backup, backup_xlog_path, tli, - seg_size); + wal_seg_size); } else validate_backup_wal_from_start_to_stop(backup, (char *) archivedir, tli, - seg_size); + wal_seg_size); if (backup->status == BACKUP_STATUS_CORRUPT) { @@ -564,7 +347,8 @@ validate_wal(pgBackup *backup, const char *archivedir, * If recovery target is provided check that we can restore backup to a * recovery target time or xid. */ - if (!TransactionIdIsValid(target_xid) && target_time == 0 && !XRecOffIsValid(target_lsn)) + if (!TransactionIdIsValid(target_xid) && target_time == 0 && + !XRecOffIsValid(target_lsn)) { /* Recovery target is not given so exit */ elog(INFO, "Backup %s WAL segments are valid", backup_id); @@ -583,106 +367,54 @@ validate_wal(pgBackup *backup, const char *archivedir, * up to the given recovery target. * In any case we cannot restore to the point before stop_lsn. */ - xlogreader = InitXLogPageRead(&private, archivedir, tli, seg_size, - true); /* We can restore at least up to the backup end */ - time2iso(last_timestamp, lengthof(last_timestamp), backup->recovery_time); - last_xid = backup->recovery_xid; - last_lsn = backup->stop_lsn; + last_rec.rec_time = 0; + last_rec.rec_xid = backup->recovery_xid; + last_rec.rec_lsn = backup->stop_lsn; - if ((TransactionIdIsValid(target_xid) && target_xid == last_xid) + time2iso(last_timestamp, lengthof(last_timestamp), backup->recovery_time); + + if ((TransactionIdIsValid(target_xid) && target_xid == last_rec.rec_xid) || (target_time != 0 && backup->recovery_time >= target_time) - || (XRecOffIsValid(target_lsn) && backup->stop_lsn >= target_lsn)) + || (XRecOffIsValid(target_lsn) && last_rec.rec_lsn >= target_lsn)) all_wal = true; - startpoint = backup->stop_lsn; - while (true) - { - bool timestamp_record; - - record = XLogReadRecord(xlogreader, startpoint, &errormsg); - if (record == NULL) - { - if (errormsg) - elog(WARNING, "%s", errormsg); - - break; - } - - timestamp_record = getRecordTimestamp(xlogreader, &last_time); - if (XLogRecGetXid(xlogreader) != InvalidTransactionId) - last_xid = XLogRecGetXid(xlogreader); - last_lsn = xlogreader->ReadRecPtr; - - /* Check target xid */ - if (TransactionIdIsValid(target_xid) && target_xid == last_xid) - { - all_wal = true; - break; - } - /* Check target time */ - else if (target_time != 0 && timestamp_record && - timestamptz_to_time_t(last_time) >= target_time) - { - all_wal = true; - break; - } - /* Check target lsn */ - else if (XRecOffIsValid(target_lsn) && last_lsn >= target_lsn) - { - all_wal = true; - break; - } - /* If there are no target xid, target time and target lsn */ - else if (!TransactionIdIsValid(target_xid) && target_time == 0 && - !XRecOffIsValid(target_lsn)) - { - all_wal = true; - /* We don't stop here. We want to get last_xid and last_time */ - } - - startpoint = InvalidXLogRecPtr; /* continue reading at next record */ - } - - if (last_time > 0) + all_wal = all_wal || + RunXLogThreads(archivedir, target_time, target_xid, target_lsn, + tli, wal_seg_size, backup->stop_lsn, + InvalidXLogRecPtr, true, validateXLogRecord, &last_rec); + if (last_rec.rec_time > 0) time2iso(last_timestamp, lengthof(last_timestamp), - timestamptz_to_time_t(last_time)); + timestamptz_to_time_t(last_rec.rec_time)); /* There are all needed WAL records */ if (all_wal) - elog(INFO, "backup validation completed successfully on time %s, xid " XID_FMT " and LSN %X/%X", - last_timestamp, last_xid, - (uint32) (last_lsn >> 32), (uint32) last_lsn); + elog(INFO, "Backup validation completed successfully on time %s, xid " XID_FMT " and LSN %X/%X", + last_timestamp, last_rec.rec_xid, + (uint32) (last_rec.rec_lsn >> 32), (uint32) last_rec.rec_lsn); /* Some needed WAL records are absent */ else { - PrintXLogCorruptionMsg(&private, WARNING); - - elog(WARNING, "recovery can be done up to time %s, xid " XID_FMT " and LSN %X/%X", - last_timestamp, last_xid, - (uint32) (last_lsn >> 32), (uint32) last_lsn); + elog(WARNING, "Recovery can be done up to time %s, xid " XID_FMT " and LSN %X/%X", + last_timestamp, last_rec.rec_xid, + (uint32) (last_rec.rec_lsn >> 32), (uint32) last_rec.rec_lsn); if (target_time > 0) - time2iso(target_timestamp, lengthof(target_timestamp), - target_time); + time2iso(target_timestamp, lengthof(target_timestamp), target_time); if (TransactionIdIsValid(target_xid) && target_time != 0) - elog(ERROR, "not enough WAL records to time %s and xid " XID_FMT, + elog(ERROR, "Not enough WAL records to time %s and xid " XID_FMT, target_timestamp, target_xid); else if (TransactionIdIsValid(target_xid)) - elog(ERROR, "not enough WAL records to xid " XID_FMT, + elog(ERROR, "Not enough WAL records to xid " XID_FMT, target_xid); else if (target_time != 0) - elog(ERROR, "not enough WAL records to time %s", + elog(ERROR, "Not enough WAL records to time %s", target_timestamp); else if (XRecOffIsValid(target_lsn)) - elog(ERROR, "not enough WAL records to lsn %X/%X", + elog(ERROR, "Not enough WAL records to lsn %X/%X", (uint32) (target_lsn >> 32), (uint32) (target_lsn)); } - - /* clean */ - CleanupXLogPageRead(xlogreader); - XLogReaderFree(xlogreader); } /* @@ -691,13 +423,13 @@ validate_wal(pgBackup *backup, const char *archivedir, * pg_stop_backup(). */ bool -read_recovery_info(const char *archivedir, TimeLineID tli, uint32 seg_size, +read_recovery_info(const char *archivedir, TimeLineID tli, uint32 wal_seg_size, XLogRecPtr start_lsn, XLogRecPtr stop_lsn, time_t *recovery_time, TransactionId *recovery_xid) { XLogRecPtr startpoint = stop_lsn; XLogReaderState *xlogreader; - XLogPageReadPrivate private; + XLogReaderData reader_data; bool res; if (!XRecOffIsValid(start_lsn)) @@ -708,7 +440,8 @@ read_recovery_info(const char *archivedir, TimeLineID tli, uint32 seg_size, elog(ERROR, "Invalid stop_lsn value %X/%X", (uint32) (stop_lsn >> 32), (uint32) (stop_lsn)); - xlogreader = InitXLogPageRead(&private, archivedir, tli, seg_size, true); + xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size, + false, true, true); /* Read records from stop_lsn down to start_lsn */ do @@ -725,11 +458,11 @@ read_recovery_info(const char *archivedir, TimeLineID tli, uint32 seg_size, errptr = startpoint ? startpoint : xlogreader->EndRecPtr; if (errormsg) - elog(ERROR, "could not read WAL record at %X/%X: %s", + elog(ERROR, "Could not read WAL record at %X/%X: %s", (uint32) (errptr >> 32), (uint32) (errptr), errormsg); else - elog(ERROR, "could not read WAL record at %X/%X", + elog(ERROR, "Could not read WAL record at %X/%X", (uint32) (errptr >> 32), (uint32) (errptr)); } @@ -763,10 +496,10 @@ cleanup: */ bool wal_contains_lsn(const char *archivedir, XLogRecPtr target_lsn, - TimeLineID target_tli, uint32 seg_size) + TimeLineID target_tli, uint32 wal_seg_size) { XLogReaderState *xlogreader; - XLogPageReadPrivate private; + XLogReaderData reader_data; char *errormsg; bool res; @@ -774,8 +507,8 @@ wal_contains_lsn(const char *archivedir, XLogRecPtr target_lsn, elog(ERROR, "Invalid target_lsn value %X/%X", (uint32) (target_lsn >> 32), (uint32) (target_lsn)); - xlogreader = InitXLogPageRead(&private, archivedir, target_tli, seg_size, - true); + xlogreader = InitXLogPageRead(&reader_data, archivedir, target_tli, + wal_seg_size, false, false, true); res = XLogReadRecord(xlogreader, target_lsn, &errormsg) != NULL; /* Didn't find 'target_lsn' and there is no error, return false */ @@ -798,16 +531,16 @@ wal_contains_lsn(const char *archivedir, XLogRecPtr target_lsn, XLogRecPtr get_last_wal_lsn(const char *archivedir, XLogRecPtr start_lsn, XLogRecPtr stop_lsn, TimeLineID tli, bool seek_prev_segment, - uint32 seg_size) + uint32 wal_seg_size) { XLogReaderState *xlogreader; - XLogPageReadPrivate private; + XLogReaderData reader_data; XLogRecPtr startpoint; XLogSegNo start_segno; XLogSegNo segno; XLogRecPtr res = InvalidXLogRecPtr; - GetXLogSegNo(stop_lsn, segno, seg_size); + GetXLogSegNo(stop_lsn, segno, wal_seg_size); if (segno <= 1) elog(ERROR, "Invalid WAL segment number " UINT64_FORMAT, segno); @@ -815,19 +548,20 @@ get_last_wal_lsn(const char *archivedir, XLogRecPtr start_lsn, if (seek_prev_segment) segno = segno - 1; - xlogreader = InitXLogPageRead(&private, archivedir, tli, seg_size, true); + xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size, + false, false, true); /* * Calculate startpoint. Decide: we should use 'start_lsn' or offset 0. */ - GetXLogSegNo(start_lsn, start_segno, seg_size); + GetXLogSegNo(start_lsn, start_segno, wal_seg_size); if (start_segno == segno) startpoint = start_lsn; else { XLogRecPtr found; - GetXLogRecPtr(segno, 0, seg_size, startpoint); + GetXLogRecPtr(segno, 0, wal_seg_size, startpoint); found = XLogFindNextRecord(xlogreader, startpoint); if (XLogRecPtrIsInvalid(found)) @@ -839,7 +573,7 @@ get_last_wal_lsn(const char *archivedir, XLogRecPtr start_lsn, else elog(WARNING, "Could not read WAL record at %X/%X", (uint32) (startpoint >> 32), (uint32) (startpoint)); - PrintXLogCorruptionMsg(&private, ERROR); + PrintXLogCorruptionMsg(&reader_data, ERROR); } startpoint = found; } @@ -868,13 +602,13 @@ get_last_wal_lsn(const char *archivedir, XLogRecPtr start_lsn, else elog(WARNING, "Could not read WAL record at %X/%X", (uint32) (errptr >> 32), (uint32) (errptr)); - PrintXLogCorruptionMsg(&private, ERROR); + PrintXLogCorruptionMsg(&reader_data, ERROR); } /* continue reading at next record */ startpoint = InvalidXLogRecPtr; - GetXLogSegNo(xlogreader->EndRecPtr, next_segno, seg_size); + GetXLogSegNo(xlogreader->EndRecPtr, next_segno, wal_seg_size); if (next_segno > segno) break; @@ -920,25 +654,24 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *pageTLI) { - XLogPageReadPrivate *private_data; + XLogReaderData *reader_data; uint32 targetPageOff; - private_data = (XLogPageReadPrivate *) xlogreader->private_data; - targetPageOff = targetPagePtr % private_data->xlog_seg_size; + reader_data = (XLogReaderData *) xlogreader->private_data; + targetPageOff = targetPagePtr % wal_seg_size; - if (interrupted) + if (interrupted || thread_interrupted) elog(ERROR, "Thread [%d]: Interrupted during WAL reading", - private_data->thread_num); + reader_data->thread_num); /* * See if we need to switch to a new segment because the requested record * is not in the currently open one. */ - if (!IsInXLogSeg(targetPagePtr, private_data->xlogsegno, - private_data->xlog_seg_size)) + if (!IsInXLogSeg(targetPagePtr, reader_data->xlogsegno, wal_seg_size)) { elog(VERBOSE, "Thread [%d]: Need to switch to the next WAL segment, page LSN %X/%X, record being read LSN %X/%X", - private_data->thread_num, + reader_data->thread_num, (uint32) (targetPagePtr >> 32), (uint32) (targetPagePtr), (uint32) (xlogreader->currRecPtr >> 32), (uint32) (xlogreader->currRecPtr )); @@ -955,52 +688,49 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, /* * Switch to the next WAL segment after reading contrecord. */ - if (private_data->manual_switch) - private_data->need_switch = true; + if (wal_manual_switch) + reader_data->need_switch = true; } else { CleanupXLogPageRead(xlogreader); /* - * Do not switch to next WAL segment in this function. Currently it is - * manually switched only in doExtractPageMap(). - */ - if (private_data->manual_switch) + * Do not switch to next WAL segment in this function. It is + * manually switched by a thread routine. + */ + if (wal_manual_switch) { - private_data->need_switch = true; + reader_data->need_switch = true; return -1; } } } - GetXLogSegNo(targetPagePtr, private_data->xlogsegno, - private_data->xlog_seg_size); + GetXLogSegNo(targetPagePtr, reader_data->xlogsegno, wal_seg_size); /* Try to switch to the next WAL segment */ - if (!private_data->xlogexists) + if (!reader_data->xlogexists) { char xlogfname[MAXFNAMELEN]; - GetXLogFileName(xlogfname, private_data->tli, private_data->xlogsegno, - private_data->xlog_seg_size); - snprintf(private_data->xlogpath, MAXPGPATH, "%s/%s", - private_data->archivedir, xlogfname); + GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno, + wal_seg_size); + snprintf(reader_data->xlogpath, MAXPGPATH, "%s/%s", wal_archivedir, + xlogfname); - if (fileExists(private_data->xlogpath, private_data->location)) + if (fileExists(reader_data->xlogpath, FIO_BACKUP_HOST)) { elog(LOG, "Thread [%d]: Opening WAL segment \"%s\"", - private_data->thread_num, - private_data->xlogpath); + reader_data->thread_num, reader_data->xlogpath); - private_data->xlogexists = true; - private_data->xlogfile = fio_open(private_data->xlogpath, - O_RDONLY | PG_BINARY, private_data->location); + reader_data->xlogexists = true; + reader_data->xlogfile = fio_open(reader_data->xlogpath, + O_RDONLY | PG_BINARY, FIO_BACKUP_HOST); - if (private_data->xlogfile < 0) + if (reader_data->xlogfile < 0) { elog(WARNING, "Thread [%d]: Could not open WAL segment \"%s\": %s", - private_data->thread_num, - private_data->xlogpath, + reader_data->thread_num, reader_data->xlogpath, strerror(errno)); return -1; } @@ -1009,21 +739,21 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, /* Try to open compressed WAL segment */ else { - snprintf(private_data->gz_xlogpath, - sizeof(private_data->gz_xlogpath), "%s.gz", - private_data->xlogpath); - if (fileExists(private_data->gz_xlogpath, private_data->location)) + snprintf(reader_data->gz_xlogpath, sizeof(reader_data->gz_xlogpath), + "%s.gz", reader_data->xlogpath); + if (fileExists(reader_data->gz_xlogpath, FIO_BACKUP_HOST)) { elog(LOG, "Thread [%d]: Opening compressed WAL segment \"%s\"", - private_data->thread_num, private_data->gz_xlogpath); + reader_data->thread_num, reader_data->gz_xlogpath); - private_data->xlogexists = true; - private_data->gz_xlogfile = fio_gzopen(private_data->gz_xlogpath, - "rb", -1, private_data->location); - if (private_data->gz_xlogfile == NULL) + reader_data->xlogexists = true; + reader_data->gz_xlogfile = fio_gzopen(reader_data->gz_xlogpath, + "rb", -1, FIO_BACKUP_HOST); + if (reader_data->gz_xlogfile == NULL) { elog(WARNING, "Thread [%d]: Could not open compressed WAL segment \"%s\": %s", - private_data->thread_num, private_data->gz_xlogpath, strerror(errno)); + reader_data->thread_num, reader_data->gz_xlogpath, + strerror(errno)); return -1; } } @@ -1031,69 +761,67 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, #endif /* Exit without error if WAL segment doesn't exist */ - if (!private_data->xlogexists) + if (!reader_data->xlogexists) return -1; } /* * At this point, we have the right segment open. */ - Assert(private_data->xlogexists); + Assert(reader_data->xlogexists); /* * Do not read same page read earlier from the file, read it from the buffer */ - if (private_data->prev_page_off != 0 && - private_data->prev_page_off == targetPageOff) + if (reader_data->prev_page_off != 0 && + reader_data->prev_page_off == targetPageOff) { - memcpy(readBuf, private_data->page_buf, XLOG_BLCKSZ); - *pageTLI = private_data->tli; + memcpy(readBuf, reader_data->page_buf, XLOG_BLCKSZ); + *pageTLI = reader_data->tli; return XLOG_BLCKSZ; } /* Read the requested page */ - if (private_data->xlogfile != -1) + if (reader_data->xlogfile != -1) { - if (fio_seek(private_data->xlogfile, (off_t) targetPageOff) < 0) + if (fio_seek(reader_data->xlogfile, (off_t) targetPageOff) < 0) { elog(WARNING, "Thread [%d]: Could not seek in WAL segment \"%s\": %s", - private_data->thread_num, private_data->xlogpath, strerror(errno)); + reader_data->thread_num, reader_data->xlogpath, strerror(errno)); return -1; } - if (fio_read(private_data->xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (fio_read(reader_data->xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { elog(WARNING, "Thread [%d]: Could not read from WAL segment \"%s\": %s", - private_data->thread_num, private_data->xlogpath, strerror(errno)); + reader_data->thread_num, reader_data->xlogpath, strerror(errno)); return -1; } } #ifdef HAVE_LIBZ else { - if (fio_gzseek(private_data->gz_xlogfile, (z_off_t) targetPageOff, SEEK_SET) == -1) + if (fio_gzseek(reader_data->gz_xlogfile, (z_off_t) targetPageOff, SEEK_SET) == -1) { elog(WARNING, "Thread [%d]: Could not seek in compressed WAL segment \"%s\": %s", - private_data->thread_num, - private_data->gz_xlogpath, - get_gz_error(private_data->gz_xlogfile)); + reader_data->thread_num, reader_data->gz_xlogpath, + get_gz_error(reader_data->gz_xlogfile)); return -1; } - if (fio_gzread(private_data->gz_xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (fio_gzread(reader_data->gz_xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { elog(WARNING, "Thread [%d]: Could not read from compressed WAL segment \"%s\": %s", - private_data->thread_num, - private_data->gz_xlogpath, - get_gz_error(private_data->gz_xlogfile)); + reader_data->thread_num, reader_data->gz_xlogpath, + get_gz_error(reader_data->gz_xlogfile)); return -1; } } #endif - memcpy(private_data->page_buf, readBuf, XLOG_BLCKSZ); - private_data->prev_page_off = targetPageOff; - *pageTLI = private_data->tli; + memcpy(reader_data->page_buf, readBuf, XLOG_BLCKSZ); + reader_data->prev_page_off = targetPageOff; + *pageTLI = reader_data->tli; return XLOG_BLCKSZ; } @@ -1101,90 +829,569 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, * Initialize WAL segments reading. */ static XLogReaderState * -InitXLogPageRead(XLogPageReadPrivate *private_data, const char *archivedir, - TimeLineID tli, uint32 xlog_seg_size, bool allocate_reader) +InitXLogPageRead(XLogReaderData *reader_data, const char *archivedir, + TimeLineID tli, uint32 segment_size, bool manual_switch, + bool consistent_read, bool allocate_reader) { XLogReaderState *xlogreader = NULL; - MemSet(private_data, 0, sizeof(XLogPageReadPrivate)); - private_data->archivedir = archivedir; - private_data->tli = tli; - private_data->xlog_seg_size = xlog_seg_size; - private_data->xlogfile = -1; - private_data->location = FIO_BACKUP_HOST; + wal_archivedir = archivedir; + wal_seg_size = segment_size; + wal_manual_switch = manual_switch; + wal_consistent_read = consistent_read; + + MemSet(reader_data, 0, sizeof(XLogReaderData)); + reader_data->tli = tli; + reader_data->xlogfile = -1; if (allocate_reader) { #if PG_VERSION_NUM >= 110000 - xlogreader = XLogReaderAllocate(xlog_seg_size, - &SimpleXLogPageRead, private_data); + xlogreader = XLogReaderAllocate(wal_seg_size, &SimpleXLogPageRead, + reader_data); #else - xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, private_data); + xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, reader_data); #endif if (xlogreader == NULL) - elog(ERROR, "out of memory"); + elog(ERROR, "Out of memory"); xlogreader->system_identifier = instance_config.system_identifier; } return xlogreader; } +/* + * Run WAL processing routines using threads. Start from startpoint up to + * endpoint. It is possible to send zero endpoint, threads will read WAL + * infinitely in this case. + */ +static bool +RunXLogThreads(const char *archivedir, time_t target_time, + TransactionId target_xid, XLogRecPtr target_lsn, TimeLineID tli, + uint32 segment_size, XLogRecPtr startpoint, XLogRecPtr endpoint, + bool consistent_read, xlog_record_function process_record, + XLogRecTarget *last_rec) +{ + pthread_t *threads; + xlog_thread_arg *thread_args; + int i; + int threads_need = 0; + XLogSegNo endSegNo = 0; + XLogSegNo errorSegNo = 0; + bool result = true; + + if (!XRecOffIsValid(startpoint)) + elog(ERROR, "Invalid startpoint value %X/%X", + (uint32) (startpoint >> 32), (uint32) (startpoint)); + + if (!XLogRecPtrIsInvalid(endpoint)) + { + if (!XRecOffIsValid(endpoint)) + elog(ERROR, "Invalid endpoint value %X/%X", + (uint32) (endpoint >> 32), (uint32) (endpoint)); + + GetXLogSegNo(endpoint, endSegNo, segment_size); + } + + /* Initialize static variables for workers */ + wal_target_time = target_time; + wal_target_xid = target_xid; + wal_target_lsn = target_lsn; + + GetXLogSegNo(startpoint, segno_start, segment_size); + segno_target = 0; + GetXLogSegNo(startpoint, segno_next, segment_size); + segnum_read = 0; + segnum_corrupted = 0; + + threads = (pthread_t *) pgut_malloc(sizeof(pthread_t) * num_threads); + thread_args = (xlog_thread_arg *) pgut_malloc(sizeof(xlog_thread_arg) * num_threads); + + /* + * Initialize thread args. + * + * Each thread works with its own WAL segment and we need to adjust + * startpoint value for each thread. + */ + for (i = 0; i < num_threads; i++) + { + xlog_thread_arg *arg = &thread_args[i]; + + InitXLogPageRead(&arg->reader_data, archivedir, tli, segment_size, true, + consistent_read, false); + arg->reader_data.xlogsegno = segno_next; + arg->reader_data.thread_num = i + 1; + arg->process_record = process_record; + arg->startpoint = startpoint; + arg->endpoint = endpoint; + arg->endSegNo = endSegNo; + arg->got_target = false; + /* By default there is some error */ + arg->ret = 1; + + threads_need++; + segno_next++; + /* + * If we need to read less WAL segments than num_threads, create less + * threads. + */ + if (endSegNo != 0 && segno_next > endSegNo) + break; + GetXLogRecPtr(segno_next, 0, segment_size, startpoint); + } + + /* Run threads */ + thread_interrupted = false; + for (i = 0; i < threads_need; i++) + { + elog(VERBOSE, "Start WAL reader thread: %d", i + 1); + pthread_create(&threads[i], NULL, XLogThreadWorker, &thread_args[i]); + } + + /* Wait for threads */ + for (i = 0; i < threads_need; i++) + { + pthread_join(threads[i], NULL); + if (thread_args[i].ret == 1) + result = false; + } + + if (last_rec) + for (i = 0; i < threads_need; i++) + { + XLogRecTarget *cur_rec; + + if (thread_args[i].ret != 0) + { + /* + * Save invalid segment number after which all segments are not + * valid. + */ + if (errorSegNo == 0 || + errorSegNo > thread_args[i].reader_data.xlogsegno) + errorSegNo = thread_args[i].reader_data.xlogsegno; + continue; + } + + /* Is this segment valid */ + if (errorSegNo != 0 && + thread_args[i].reader_data.xlogsegno > errorSegNo) + continue; + + cur_rec = &thread_args[i].reader_data.cur_rec; + /* + * If we got the target return minimum possible record. + */ + if (segno_target > 0) + { + if (thread_args[i].got_target && + thread_args[i].reader_data.xlogsegno == segno_target) + { + *last_rec = *cur_rec; + break; + } + } + /* + * Else return maximum possible record up to which restore is + * possible. + */ + else if (last_rec->rec_lsn < cur_rec->rec_lsn) + *last_rec = *cur_rec; + } + + pfree(threads); + pfree(thread_args); + + return result; +} + +/* + * WAL reader worker. + */ +void * +XLogThreadWorker(void *arg) +{ + xlog_thread_arg *thread_arg = (xlog_thread_arg *) arg; + XLogReaderData *reader_data = &thread_arg->reader_data; + XLogReaderState *xlogreader; + XLogSegNo nextSegNo = 0; + XLogRecPtr found; + uint32 prev_page_off = 0; + bool need_read = true; + +#if PG_VERSION_NUM >= 110000 + xlogreader = XLogReaderAllocate(wal_seg_size, &SimpleXLogPageRead, + reader_data); +#else + xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, reader_data); +#endif + if (xlogreader == NULL) + elog(ERROR, "Thread [%d]: out of memory", reader_data->thread_num); + xlogreader->system_identifier = instance_config.system_identifier; + + found = XLogFindNextRecord(xlogreader, thread_arg->startpoint); + + /* + * We get invalid WAL record pointer usually when WAL segment is absent or + * is corrupted. + */ + if (XLogRecPtrIsInvalid(found)) + { + if (wal_consistent_read && XLogWaitForConsistency(xlogreader)) + need_read = false; + else + { + if (xlogreader->errormsg_buf[0] != '\0') + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X: %s", + reader_data->thread_num, + (uint32) (thread_arg->startpoint >> 32), + (uint32) (thread_arg->startpoint), + xlogreader->errormsg_buf); + else + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X", + reader_data->thread_num, + (uint32) (thread_arg->startpoint >> 32), + (uint32) (thread_arg->startpoint)); + PrintXLogCorruptionMsg(reader_data, ERROR); + } + } + + thread_arg->startpoint = found; + + elog(VERBOSE, "Thread [%d]: Starting LSN: %X/%X", + reader_data->thread_num, + (uint32) (thread_arg->startpoint >> 32), + (uint32) (thread_arg->startpoint)); + + while (need_read) + { + XLogRecord *record; + char *errormsg; + bool stop_reading = false; + + if (interrupted || thread_interrupted) + elog(ERROR, "Thread [%d]: Interrupted during WAL reading", + reader_data->thread_num); + + /* + * We need to switch to the next WAL segment after reading previous + * record. It may happen if we read contrecord. + */ + if (reader_data->need_switch && + !SwitchThreadToNextWal(xlogreader, thread_arg)) + break; + + record = XLogReadRecord(xlogreader, thread_arg->startpoint, &errormsg); + + if (record == NULL) + { + XLogRecPtr errptr; + + /* + * There is no record, try to switch to the next WAL segment. + * Usually SimpleXLogPageRead() does it by itself. But here we need + * to do it manually to support threads. + */ + if (reader_data->need_switch && errormsg == NULL) + { + if (SwitchThreadToNextWal(xlogreader, thread_arg)) + continue; + else + break; + } + + /* + * XLogWaitForConsistency() is normally used only with threads. + * Call it here for just in case. + */ + if (wal_consistent_read && XLogWaitForConsistency(xlogreader)) + break; + else if (wal_consistent_read) + { + XLogSegNo segno_report; + + pthread_lock(&wal_segment_mutex); + segno_report = segno_start + segnum_read; + pthread_mutex_unlock(&wal_segment_mutex); + + /* + * Report error message if this is the first corrupted WAL. + */ + if (reader_data->xlogsegno > segno_report) + return NULL; /* otherwise just stop the thread */ + } + + errptr = thread_arg->startpoint ? + thread_arg->startpoint : xlogreader->EndRecPtr; + + if (errormsg) + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X: %s", + reader_data->thread_num, + (uint32) (errptr >> 32), (uint32) (errptr), + errormsg); + else + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X", + reader_data->thread_num, + (uint32) (errptr >> 32), (uint32) (errptr)); + + /* + * If we don't have all WAL files from prev backup start_lsn to current + * start_lsn, we won't be able to build page map and PAGE backup will + * be incorrect. Stop it and throw an error. + */ + PrintXLogCorruptionMsg(reader_data, ERROR); + } + + getRecordTimestamp(xlogreader, &reader_data->cur_rec.rec_time); + if (TransactionIdIsValid(XLogRecGetXid(xlogreader))) + reader_data->cur_rec.rec_xid = XLogRecGetXid(xlogreader); + reader_data->cur_rec.rec_lsn = xlogreader->ReadRecPtr; + + if (thread_arg->process_record) + thread_arg->process_record(xlogreader, reader_data, &stop_reading); + if (stop_reading) + { + thread_arg->got_target = true; + + pthread_lock(&wal_segment_mutex); + /* We should store least target segment number */ + if (segno_target == 0 || segno_target > reader_data->xlogsegno) + segno_target = reader_data->xlogsegno; + pthread_mutex_unlock(&wal_segment_mutex); + + break; + } + + /* + * Check if other thread got the target segment. Check it not very + * often, only every WAL page. + */ + if (wal_consistent_read && prev_page_off != 0 && + prev_page_off != reader_data->prev_page_off) + { + XLogSegNo segno; + + pthread_lock(&wal_segment_mutex); + segno = segno_target; + pthread_mutex_unlock(&wal_segment_mutex); + + if (segno != 0 && segno < reader_data->xlogsegno) + break; + } + prev_page_off = reader_data->prev_page_off; + + /* continue reading at next record */ + thread_arg->startpoint = InvalidXLogRecPtr; + + GetXLogSegNo(xlogreader->EndRecPtr, nextSegNo, wal_seg_size); + + if (thread_arg->endSegNo != 0 && + !XLogRecPtrIsInvalid(thread_arg->endpoint) && + /* + * Consider thread_arg->endSegNo and thread_arg->endpoint only if + * they are valid. + */ + xlogreader->ReadRecPtr == thread_arg->endpoint && + nextSegNo > thread_arg->endSegNo) + break; + } + + CleanupXLogPageRead(xlogreader); + XLogReaderFree(xlogreader); + + /* Extracting is successful */ + thread_arg->ret = 0; + return NULL; +} + +/* + * Do manual switch to the next WAL segment. + * + * Returns false if the reader reaches the end of a WAL segment list. + */ +static bool +SwitchThreadToNextWal(XLogReaderState *xlogreader, xlog_thread_arg *arg) +{ + XLogReaderData *reader_data; + XLogRecPtr found; + + reader_data = (XLogReaderData *) xlogreader->private_data; + reader_data->need_switch = false; + + /* Critical section */ + pthread_lock(&wal_segment_mutex); + Assert(segno_next); + reader_data->xlogsegno = segno_next; + segnum_read++; + segno_next++; + pthread_mutex_unlock(&wal_segment_mutex); + + /* We've reached the end */ + if (arg->endSegNo != 0 && reader_data->xlogsegno > arg->endSegNo) + return false; + + /* Adjust next record position */ + GetXLogRecPtr(reader_data->xlogsegno, 0, wal_seg_size, arg->startpoint); + /* We need to close previously opened file if it wasn't closed earlier */ + CleanupXLogPageRead(xlogreader); + /* Skip over the page header and contrecord if any */ + found = XLogFindNextRecord(xlogreader, arg->startpoint); + + /* + * We get invalid WAL record pointer usually when WAL segment is + * absent or is corrupted. + */ + if (XLogRecPtrIsInvalid(found)) + { + /* + * Check if we need to stop reading. We stop if other thread found a + * target segment. + */ + if (wal_consistent_read && XLogWaitForConsistency(xlogreader)) + return false; + else if (wal_consistent_read) + { + XLogSegNo segno_report; + + pthread_lock(&wal_segment_mutex); + segno_report = segno_start + segnum_read; + pthread_mutex_unlock(&wal_segment_mutex); + + /* + * Report error message if this is the first corrupted WAL. + */ + if (reader_data->xlogsegno > segno_report) + return false; /* otherwise just stop the thread */ + } + + elog(WARNING, "Thread [%d]: Could not read WAL record at %X/%X", + reader_data->thread_num, + (uint32) (arg->startpoint >> 32), (uint32) (arg->startpoint)); + PrintXLogCorruptionMsg(reader_data, ERROR); + } + arg->startpoint = found; + + elog(VERBOSE, "Thread [%d]: Switched to LSN %X/%X", + reader_data->thread_num, + (uint32) (arg->startpoint >> 32), (uint32) (arg->startpoint)); + + return true; +} + +/* + * Wait for other threads since the current thread couldn't read its segment. + * We need to decide is it fail or not. + * + * Returns true if there is no failure and previous target segment was found. + * Otherwise return false. + */ +static bool +XLogWaitForConsistency(XLogReaderState *xlogreader) +{ + uint32 segnum_need; + XLogReaderData *reader_data =(XLogReaderData *) xlogreader->private_data; + bool log_message = true; + + segnum_need = reader_data->xlogsegno - segno_start; + while (true) + { + uint32 segnum_current_read; + XLogSegNo segno; + + if (log_message) + { + char xlogfname[MAXFNAMELEN]; + + GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno, + wal_seg_size); + + elog(VERBOSE, "Thread [%d]: Possible WAL corruption in %s. Wait for other threads to decide is this a failure", + reader_data->thread_num, xlogfname); + log_message = false; + } + + if (interrupted || thread_interrupted) + elog(ERROR, "Thread [%d]: Interrupted during WAL reading", + reader_data->thread_num); + + pthread_lock(&wal_segment_mutex); + segnum_current_read = segnum_read + segnum_corrupted; + segno = segno_target; + pthread_mutex_unlock(&wal_segment_mutex); + + /* Other threads read all previous segments and didn't find target */ + if (segnum_need <= segnum_current_read) + { + /* Mark current segment as corrupted */ + pthread_lock(&wal_segment_mutex); + segnum_corrupted++; + pthread_mutex_unlock(&wal_segment_mutex); + return false; + } + + if (segno != 0 && segno < reader_data->xlogsegno) + return true; + + pg_usleep(500000L); /* 500 ms */ + } + + /* We shouldn't reach it */ + return false; +} + /* * Cleanup after WAL segment reading. */ static void CleanupXLogPageRead(XLogReaderState *xlogreader) { - XLogPageReadPrivate *private_data; + XLogReaderData *reader_data; - private_data = (XLogPageReadPrivate *) xlogreader->private_data; - if (private_data->xlogfile >= 0) + reader_data = (XLogReaderData *) xlogreader->private_data; + if (reader_data->xlogfile >= 0) { - fio_close(private_data->xlogfile); - private_data->xlogfile = -1; + fio_close(reader_data->xlogfile); + reader_data->xlogfile = -1; } #ifdef HAVE_LIBZ - else if (private_data->gz_xlogfile != NULL) + else if (reader_data->gz_xlogfile != NULL) { - fio_gzclose(private_data->gz_xlogfile); - private_data->gz_xlogfile = NULL; + fio_gzclose(reader_data->gz_xlogfile); + reader_data->gz_xlogfile = NULL; } #endif - private_data->prev_page_off = 0; - private_data->xlogexists = false; + reader_data->prev_page_off = 0; + reader_data->xlogexists = false; } static void -PrintXLogCorruptionMsg(XLogPageReadPrivate *private_data, int elevel) +PrintXLogCorruptionMsg(XLogReaderData *reader_data, int elevel) { - if (private_data->xlogpath[0] != 0) + if (reader_data->xlogpath[0] != 0) { /* * XLOG reader couldn't read WAL segment. * We throw a WARNING here to be able to update backup status. */ - if (!private_data->xlogexists) + if (!reader_data->xlogexists) elog(elevel, "Thread [%d]: WAL segment \"%s\" is absent", - private_data->thread_num, - private_data->xlogpath); - else if (private_data->xlogfile != -1) + reader_data->thread_num, reader_data->xlogpath); + else if (reader_data->xlogfile != -1) elog(elevel, "Thread [%d]: Possible WAL corruption. " "Error has occured during reading WAL segment \"%s\"", - private_data->thread_num, - private_data->xlogpath); + reader_data->thread_num, reader_data->xlogpath); #ifdef HAVE_LIBZ - else if (private_data->gz_xlogfile != NULL) + else if (reader_data->gz_xlogfile != NULL) elog(elevel, "Thread [%d]: Possible WAL corruption. " "Error has occured during reading WAL segment \"%s\"", - private_data->thread_num, - private_data->gz_xlogpath); + reader_data->thread_num, reader_data->gz_xlogpath); #endif } else { /* Cannot tell what happened specifically */ elog(elevel, "Thread [%d]: An error occured during WAL reading", - private_data->thread_num); + reader_data->thread_num); } } @@ -1192,7 +1399,8 @@ PrintXLogCorruptionMsg(XLogPageReadPrivate *private_data, int elevel) * Extract information about blocks modified in this record. */ static void -extractPageInfo(XLogReaderState *record) +extractPageInfo(XLogReaderState *record, XLogReaderData *reader_data, + bool *stop_reading) { uint8 block_id; RmgrId rmid = XLogRecGetRmid(record); @@ -1260,6 +1468,27 @@ extractPageInfo(XLogReaderState *record) } } +/* + * Check the current read WAL record during validation. + */ +static void +validateXLogRecord(XLogReaderState *record, XLogReaderData *reader_data, + bool *stop_reading) +{ + /* Check target xid */ + if (TransactionIdIsValid(wal_target_xid) && + wal_target_xid == reader_data->cur_rec.rec_xid) + *stop_reading = true; + /* Check target time */ + else if (wal_target_time != 0 && + timestamptz_to_time_t(reader_data->cur_rec.rec_time) >= wal_target_time) + *stop_reading = true; + /* Check target lsn */ + else if (XRecOffIsValid(wal_target_lsn) && + reader_data->cur_rec.rec_lsn >= wal_target_lsn) + *stop_reading = true; +} + /* * Extract timestamp from WAL record. * diff --git a/src/pg_probackup.c b/src/pg_probackup.c index 2a60e447..1ee40e3b 100644 --- a/src/pg_probackup.c +++ b/src/pg_probackup.c @@ -57,6 +57,8 @@ char backup_instance_path[MAXPGPATH]; */ char arclog_path[MAXPGPATH] = ""; +/* colon separated external directories list ("/path1:/path2") */ +char *externaldir = NULL; /* common options */ static char *backup_id_string = NULL; int num_threads = 1; @@ -88,11 +90,14 @@ bool restore_as_replica = false; bool restore_no_validate = false; bool skip_block_validation = false; +bool skip_external_dirs = false; /* delete options */ bool delete_wal = false; bool delete_expired = false; +bool merge_expired = false; bool force_delete = false; +bool dry_run = false; /* compression options */ bool compress_shortcut = false; @@ -140,12 +145,15 @@ static ConfigOption cmd_options[] = { 'b', 234, "temp-slot", &temp_slot, SOURCE_CMD_STRICT }, { 'b', 134, "delete-wal", &delete_wal, SOURCE_CMD_STRICT }, { 'b', 135, "delete-expired", &delete_expired, SOURCE_CMD_STRICT }, + { 'b', 235, "merge-expired", &merge_expired, SOURCE_CMD_STRICT }, + { 'b', 237, "dry-run", &dry_run, SOURCE_CMD_STRICT }, /* restore options */ { 's', 136, "time", &target_time, SOURCE_CMD_STRICT }, { 's', 137, "xid", &target_xid, SOURCE_CMD_STRICT }, { 's', 138, "inclusive", &target_inclusive, SOURCE_CMD_STRICT }, { 'u', 139, "timeline", &target_tli, SOURCE_CMD_STRICT }, { 'f', 'T', "tablespace-mapping", opt_tablespace_map, SOURCE_CMD_STRICT }, + { 'f', 155, "external-mapping", opt_externaldir_map, SOURCE_CMD_STRICT }, { 'b', 140, "immediate", &target_immediate, SOURCE_CMD_STRICT }, { 's', 141, "recovery-target-name", &target_name, SOURCE_CMD_STRICT }, { 's', 142, "recovery-target-action", &target_action, SOURCE_CMD_STRICT }, @@ -153,6 +161,7 @@ static ConfigOption cmd_options[] = { 'b', 143, "no-validate", &restore_no_validate, SOURCE_CMD_STRICT }, { 's', 144, "lsn", &target_lsn, SOURCE_CMD_STRICT }, { 'b', 154, "skip-block-validation", &skip_block_validation, SOURCE_CMD_STRICT }, + { 'b', 156, "skip-external-dirs", &skip_external_dirs, SOURCE_CMD_STRICT }, /* delete options */ { 'b', 145, "wal", &delete_wal, SOURCE_CMD_STRICT }, { 'b', 146, "expired", &delete_expired, SOURCE_CMD_STRICT }, @@ -399,8 +408,12 @@ main(int argc, char *argv[]) config_get_opt_env(instance_options); /* Read options from configuration file */ - join_path_components(path, backup_instance_path, BACKUP_CATALOG_CONF_FILE); - config_read_opt(path, instance_options, ERROR, true); + if (backup_subcmd != ADD_INSTANCE_CMD) + { + join_path_components(path, backup_instance_path, + BACKUP_CATALOG_CONF_FILE); + config_read_opt(path, instance_options, ERROR, true, false); + } } /* Initialize logger */ @@ -529,12 +542,13 @@ main(int argc, char *argv[]) case DELETE_CMD: if (delete_expired && backup_id_string) elog(ERROR, "You cannot specify --delete-expired and --backup-id options together"); - if (!delete_expired && !delete_wal && !backup_id_string) - elog(ERROR, "You must specify at least one of the delete options: --expired |--wal |--backup_id"); - if (delete_wal && !delete_expired && !backup_id_string) - return do_retention_purge(); - if (delete_expired) - return do_retention_purge(); + if (merge_expired && backup_id_string) + elog(ERROR, "You cannot specify --merge-expired and --backup-id options together"); + if (!delete_expired && !merge_expired && !delete_wal && !backup_id_string) + elog(ERROR, "You must specify at least one of the delete options: " + "--expired |--wal |--merge-expired |--delete-invalid |--backup_id"); + if (!backup_id_string) + return do_retention(); else do_delete(current.backup_id); break; @@ -545,7 +559,7 @@ main(int argc, char *argv[]) do_show_config(); break; case SET_CONFIG_CMD: - do_set_config(); + do_set_config(false); break; case NO_CMD: /* Should not happen */ diff --git a/src/pg_probackup.h b/src/pg_probackup.h index 43407fb4..69b6bb20 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -10,10 +10,12 @@ #ifndef PG_PROBACKUP_H #define PG_PROBACKUP_H -#include -#include -#include -#include +#include "postgres_fe.h" +#include "libpq-fe.h" +#include "libpq-int.h" + +#include "access/xlog_internal.h" +#include "utils/pg_crc.h" #ifdef FRONTEND #undef FRONTEND @@ -33,7 +35,7 @@ #include "datapagemap.h" /* Directory/File names */ -#define DATABASE_DIR "database" +#define DATABASE_DIR "database" #define BACKUPS_DIR "backups" #if PG_VERSION_NUM >= 100000 #define PG_XLOG_DIR "pg_wal" @@ -51,6 +53,7 @@ #define PG_BACKUP_LABEL_FILE "backup_label" #define PG_BLACK_LIST "black_list" #define PG_TABLESPACE_MAP_FILE "tablespace_map" +#define EXTERNAL_DIR "external_directories/externaldir" /* Timeout defaults */ #define ARCHIVE_TIMEOUT_DEFAULT 300 @@ -124,6 +127,7 @@ typedef struct pgFile int n_blocks; /* size of the file in blocks, readed during DELTA backup */ bool is_cfs; /* Flag to distinguish files compressed by CFS*/ bool is_database; + int external_dir_num; /* Number of external directory. 0 if not external */ bool exists_in_prev; /* Mark files, both data and regular, that exists in previous backup */ CompressAlg compress_alg; /* compression algorithm applied to the file */ volatile pg_atomic_flag lock; /* lock for synchronization of parallel threads */ @@ -184,6 +188,7 @@ typedef struct InstanceConfig uint32 xlog_seg_size; char *pgdata; + char *external_dir_str; const char *pgdatabase; const char *pghost; const char *pgport; @@ -268,6 +273,8 @@ struct pgBackup pgBackup *parent_backup_link; char *primary_conninfo; /* Connection parameters of the backup * in the format suitable for recovery.conf */ + char *external_dir_str; /* List of external directories, + * separated by ':' */ }; /* Recovery target for restore and validate subcommands */ @@ -298,9 +305,11 @@ typedef struct { const char *from_root; const char *to_root; + const char *external_prefix; parray *files_list; parray *prev_filelist; + parray *external_dirs; XLogRecPtr prev_start_lsn; PGconn *backup_conn; @@ -399,11 +408,14 @@ extern bool exclusive_backup; /* restore options */ extern bool restore_as_replica; extern bool skip_block_validation; +extern bool skip_external_dirs; /* delete options */ extern bool delete_wal; extern bool delete_expired; +extern bool merge_expired; extern bool force_delete; +extern bool dry_run; /* compression options */ extern bool compress_shortcut; @@ -448,6 +460,7 @@ extern pgRecoveryTarget *parseRecoveryTargetOptions( /* in merge.c */ extern void do_merge(time_t backup_id); +extern void merge_backups(pgBackup *backup, pgBackup *next_backup); /* in init.c */ extern int do_init(void); @@ -461,7 +474,7 @@ extern int do_archive_get(char *wal_file_path, char *wal_file_name); /* in configure.c */ extern void do_show_config(void); -extern void do_set_config(void); +extern void do_set_config(bool missing_ok); extern void init_config(InstanceConfig *config); /* in show.c */ @@ -470,7 +483,7 @@ extern int do_show(time_t requested_backup_id); /* in delete.c */ extern void do_delete(time_t backup_id); extern void delete_backup_files(pgBackup *backup); -extern int do_retention_purge(void); +extern int do_retention(void); extern int do_delete_instance(void); /* in fetch.c */ @@ -504,7 +517,8 @@ extern pgBackup *catalog_get_last_data_backup(parray *backup_list, TimeLineID tli); extern void pgBackupWriteControl(FILE *out, pgBackup *backup); extern void write_backup_filelist(pgBackup *backup, parray *files, - const char *root); + const char *root, const char *external_prefix, + parray *external_list); extern void pgBackupGetPath(const pgBackup *backup, char *path, size_t len, const char *subdir); @@ -515,10 +529,14 @@ extern void pgBackupInit(pgBackup *backup); extern void pgBackupFree(void *backup); extern int pgBackupCompareId(const void *f1, const void *f2); extern int pgBackupCompareIdDesc(const void *f1, const void *f2); +extern int pgBackupCompareIdEqual(const void *l, const void *r); +extern pgBackup* find_direct_child(parray *backup_list, pgBackup *target_backup); extern pgBackup* find_parent_full_backup(pgBackup *current_backup); extern int scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup); extern bool is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive); +extern bool is_prolific(parray *backup_list, pgBackup *target_backup); +extern bool in_backup_list(parray *backup_list, pgBackup *target_backup); extern int get_backup_index_number(parray *backup_list, pgBackup *backup); extern bool launch_agent(void); @@ -530,7 +548,8 @@ extern const char* deparse_compress_alg(int alg); /* in dir.c */ extern void dir_list_file(parray *files, const char *root, bool exclude, - bool omit_symlink, bool add_root, fio_location location); + bool omit_symlink, bool add_root, int external_dir_num, fio_location location); + extern void create_data_directories(const char *data_dir, const char *backup_dir, bool extract_tablespaces, @@ -538,10 +557,20 @@ extern void create_data_directories(const char *data_dir, extern void read_tablespace_map(parray *files, const char *backup_dir); extern void opt_tablespace_map(ConfigOption *opt, const char *arg); +extern void opt_externaldir_map(ConfigOption *opt, const char *arg); extern void check_tablespace_mapping(pgBackup *backup); +extern void check_external_dir_mapping(pgBackup *backup); +extern char *get_external_remap(char *current_dir); -extern void print_file_list(FILE *out, const parray *files, const char *root); -extern parray *dir_read_file_list(const char *root, const char *file_txt, fio_location location); +extern void print_file_list(FILE *out, const parray *files, const char *root, + const char *external_prefix, parray *external_list); +extern parray *dir_read_file_list(const char *root, const char *external_prefix, + const char *file_txt, fio_location location); +extern parray *make_external_directory_list(const char *colon_separated_dirs); +extern void free_dir_list(parray *list); +extern void makeExternalDirPathByNum(char *ret_path, const char *pattern_path, + const int dir_num); +extern bool backup_contains_external(const char *dir, parray *dirs_list); extern int dir_create_dir(const char *path, mode_t mode); extern bool dir_is_empty(const char *path); @@ -549,14 +578,16 @@ extern bool dir_is_empty(const char *path); extern bool fileExists(const char *path, fio_location location); extern size_t pgFileSize(const char *path); -extern pgFile *pgFileNew(const char *path, bool omit_symlink, fio_location location); +extern pgFile *pgFileNew(const char *path, bool omit_symlink, int external_dir_num, fio_location location); extern pgFile *pgFileInit(const char *path); extern void pgFileDelete(pgFile *file); extern void pgFileFree(void *file); extern pg_crc32 pgFileGetCRC(const char *file_path, bool use_crc32c, bool raise_on_deleted, size_t *bytes_read, fio_location location); extern int pgFileComparePath(const void *f1, const void *f2); +extern int pgFileComparePathWithExternal(const void *f1, const void *f2); extern int pgFileComparePathDesc(const void *f1, const void *f2); +extern int pgFileComparePathWithExternalDesc(const void *f1, const void *f2); extern int pgFileCompareLinked(const void *f1, const void *f2); extern int pgFileCompareSize(const void *f1, const void *f2); @@ -583,14 +614,11 @@ extern bool check_file_pages(pgFile *file, XLogRecPtr stop_lsn, /* parsexlog.c */ extern void extractPageMap(const char *archivedir, TimeLineID tli, uint32 seg_size, - XLogRecPtr startpoint, XLogRecPtr endpoint, - parray *files); -extern void validate_wal(pgBackup *backup, - const char *archivedir, - time_t target_time, - TransactionId target_xid, - XLogRecPtr target_lsn, - TimeLineID tli, uint32 seg_size); + XLogRecPtr startpoint, XLogRecPtr endpoint); +extern void validate_wal(pgBackup *backup, const char *archivedir, + time_t target_time, TransactionId target_xid, + XLogRecPtr target_lsn, TimeLineID tli, + uint32 seg_size); extern bool read_recovery_info(const char *archivedir, TimeLineID tli, uint32 seg_size, XLogRecPtr start_lsn, XLogRecPtr stop_lsn, diff --git a/src/restore.c b/src/restore.c index 825db86a..77de980f 100644 --- a/src/restore.c +++ b/src/restore.c @@ -21,6 +21,9 @@ typedef struct { parray *files; pgBackup *backup; + parray *req_external_dirs; + parray *cur_external_dirs; + char *external_prefix; /* * Return value from the thread. @@ -29,7 +32,7 @@ typedef struct int ret; } restore_files_arg; -static void restore_backup(pgBackup *backup); +static void restore_backup(pgBackup *backup, const char *external_dir_str); static void create_recovery_conf(time_t backup_id, pgRecoveryTarget *rt, pgBackup *backup); @@ -37,7 +40,6 @@ static parray *read_timeline_history(TimeLineID targetTLI); static void *restore_files(void *arg); static void remove_deleted_files(pgBackup *backup); - /* * Entry point of pg_probackup RESTORE and VALIDATE subcommands. */ @@ -53,9 +55,6 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, pgBackup *dest_backup = NULL; pgBackup *base_full_backup = NULL; pgBackup *corrupted_backup = NULL; - int dest_backup_index = 0; - int base_full_backup_index = 0; - int corrupted_backup_index = 0; char *action = is_restore ? "Restore":"Validate"; parray *parent_chain = NULL; @@ -177,8 +176,6 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, if (dest_backup == NULL) elog(ERROR, "Backup satisfying target options is not found."); - dest_backup_index = get_backup_index_number(backups, dest_backup); - /* If we already found dest_backup, look for full backup. */ if (dest_backup->backup_mode == BACKUP_MODE_FULL) base_full_backup = dest_backup; @@ -199,7 +196,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, missing_backup_start_time = tmp_backup->parent_backup; missing_backup_id = base36enc_dup(tmp_backup->parent_backup); - for (j = get_backup_index_number(backups, tmp_backup); j >= 0; j--) + for (j = 0; j < parray_num(backups); j++) { pgBackup *backup = (pgBackup *) parray_get(backups, j); @@ -222,6 +219,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, } } } + pg_free(missing_backup_id); /* No point in doing futher */ elog(ERROR, "%s of backup %s failed.", action, base36enc(dest_backup->start_time)); } @@ -233,7 +231,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, /* parent_backup_id contain human-readable backup ID of oldest invalid backup */ parent_backup_id = base36enc_dup(tmp_backup->start_time); - for (j = get_backup_index_number(backups, tmp_backup) - 1; j >= 0; j--) + for (j = 0; j < parray_num(backups); j++) { pgBackup *backup = (pgBackup *) parray_get(backups, j); @@ -258,7 +256,13 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, } } } + pg_free(parent_backup_id); tmp_backup = find_parent_full_backup(dest_backup); + + /* sanity */ + if (!tmp_backup) + elog(ERROR, "Parent full backup for the given backup %s was not found", + base36enc(dest_backup->start_time)); } /* @@ -274,14 +278,15 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, if (base_full_backup == NULL) elog(ERROR, "Full backup satisfying target options is not found."); - base_full_backup_index = get_backup_index_number(backups, base_full_backup); - /* * Ensure that directories provided in tablespace mapping are valid * i.e. empty or not exist. */ if (is_restore) + { check_tablespace_mapping(dest_backup); + check_external_dir_mapping(dest_backup); + } /* At this point we are sure that parent chain is whole * so we can build separate array, containing all needed backups, @@ -292,17 +297,16 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, /* Take every backup that is a child of base_backup AND parent of dest_backup * including base_backup and dest_backup */ - for (i = base_full_backup_index; i >= dest_backup_index; i--) - { - tmp_backup = (pgBackup *) parray_get(backups, i); - if (is_parent(base_full_backup->start_time, tmp_backup, true) && - is_parent(tmp_backup->start_time, dest_backup, true)) - { - parray_append(parent_chain, tmp_backup); - } + tmp_backup = dest_backup; + while(tmp_backup->parent_backup_link) + { + parray_append(parent_chain, tmp_backup); + tmp_backup = tmp_backup->parent_backup_link; } + parray_append(parent_chain, base_full_backup); + /* for validation or restore with enabled validation */ if (!is_restore || !rt->restore_no_validate) { @@ -312,7 +316,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, /* * Validate backups from base_full_backup to dest_backup. */ - for (i = 0; i < parray_num(parent_chain); i++) + for (i = parray_num(parent_chain) - 1; i >= 0; i--) { tmp_backup = (pgBackup *) parray_get(parent_chain, i); @@ -339,10 +343,6 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, if (tmp_backup->status != BACKUP_STATUS_OK) { corrupted_backup = tmp_backup; - /* we need corrupted backup index from 'backups' not parent_chain - * so we can properly orphanize all its descendants - */ - corrupted_backup_index = get_backup_index_number(backups, corrupted_backup); break; } /* We do not validate WAL files of intermediate backups @@ -368,7 +368,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, char *corrupted_backup_id; corrupted_backup_id = base36enc_dup(corrupted_backup->start_time); - for (j = corrupted_backup_index - 1; j >= 0; j--) + for (j = 0; j < parray_num(backups); j++) { pgBackup *backup = (pgBackup *) parray_get(backups, j); @@ -409,10 +409,11 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, base36enc(dest_backup->start_time), status2str(dest_backup->status)); /* We ensured that all backups are valid, now restore if required + * TODO: before restore - lock entire parent chain */ if (is_restore) { - for (i = 0; i < parray_num(parent_chain); i++) + for (i = parray_num(parent_chain) - 1; i >= 0; i--) { pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); @@ -427,7 +428,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, if (rt->restore_no_validate && !lock_backup(backup)) elog(ERROR, "Cannot lock backup directory"); - restore_backup(backup); + restore_backup(backup, dest_backup->external_dir_str); } /* @@ -455,19 +456,23 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, * Restore one backup. */ void -restore_backup(pgBackup *backup) +restore_backup(pgBackup *backup, const char *external_dir_str) { char timestamp[100]; char this_backup_path[MAXPGPATH]; char database_path[MAXPGPATH]; + char external_prefix[MAXPGPATH]; char list_path[MAXPGPATH]; parray *files; + parray *requested_external_dirs = NULL; + parray *current_external_dirs = NULL; int i; /* arrays with meta info for multi threaded backup */ pthread_t *threads; restore_files_arg *threads_args; bool restore_isok = true; + if (backup->status != BACKUP_STATUS_OK) elog(ERROR, "Backup %s cannot be restored because it is not valid", base36enc(backup->start_time)); @@ -492,31 +497,84 @@ restore_backup(pgBackup *backup) pgBackupGetPath(backup, this_backup_path, lengthof(this_backup_path), NULL); create_data_directories(instance_config.pgdata, this_backup_path, true, FIO_DB_HOST); + if(external_dir_str && !skip_external_dirs) + { + requested_external_dirs = make_external_directory_list(external_dir_str); + for (i = 0; i < parray_num(requested_external_dirs); i++) + { + char *external_path = parray_get(requested_external_dirs, i); + external_path = get_external_remap(external_path); + dir_create_dir(external_path, DIR_PERMISSION); + } + } + + if(backup->external_dir_str) + current_external_dirs = make_external_directory_list(backup->external_dir_str); + /* * Get list of files which need to be restored. */ pgBackupGetPath(backup, database_path, lengthof(database_path), DATABASE_DIR); + pgBackupGetPath(backup, external_prefix, lengthof(external_prefix), + EXTERNAL_DIR); pgBackupGetPath(backup, list_path, lengthof(list_path), DATABASE_FILE_LIST); - files = dir_read_file_list(database_path, list_path, FIO_BACKUP_HOST); + files = dir_read_file_list(database_path, external_prefix, list_path, FIO_BACKUP_HOST); + /* Restore directories in do_backup_instance way */ + parray_qsort(files, pgFileComparePath); + + /* + * Make external directories before restore + * and setup threads at the same time + */ + for (i = 0; i < parray_num(files); i++) + { + pgFile *file = (pgFile *) parray_get(files, i); + + /* If the entry was an external directory, create it in the backup */ + if (file->external_dir_num && S_ISDIR(file->mode)) + { + char dirpath[MAXPGPATH]; + char *dir_name; + char *external_path; + + if (!current_external_dirs || + parray_num(current_external_dirs) < file->external_dir_num - 1) + elog(ERROR, "Inconsistent external directory backup metadata"); + + external_path = parray_get(current_external_dirs, + file->external_dir_num - 1); + if (backup_contains_external(external_path, requested_external_dirs)) + { + char container_dir[MAXPGPATH]; + + external_path = get_external_remap(external_path); + makeExternalDirPathByNum(container_dir, external_prefix, + file->external_dir_num); + dir_name = GetRelativePath(file->path, container_dir); + elog(VERBOSE, "Create directory \"%s\"", dir_name); + join_path_components(dirpath, external_path, dir_name); + dir_create_dir(dirpath, DIR_PERMISSION); + } + } + + /* setup threads */ + pg_atomic_clear_flag(&file->lock); + } threads = (pthread_t *) palloc(sizeof(pthread_t) * num_threads); threads_args = (restore_files_arg *) palloc(sizeof(restore_files_arg)*num_threads); - /* setup threads */ - for (i = 0; i < parray_num(files); i++) - { - pgFile *file = (pgFile *) parray_get(files, i); - - pg_atomic_clear_flag(&file->lock); - } - /* Restore files into target directory */ + thread_interrupted = false; for (i = 0; i < num_threads; i++) { restore_files_arg *arg = &(threads_args[i]); arg->files = files; arg->backup = backup; + arg->req_external_dirs = requested_external_dirs; + arg->cur_external_dirs = current_external_dirs; + arg->external_prefix = external_prefix; /* By default there are some error */ threads_args[i].ret = 1; @@ -560,16 +618,18 @@ remove_deleted_files(pgBackup *backup) parray *files; parray *files_restored; char filelist_path[MAXPGPATH]; + char external_prefix[MAXPGPATH]; int i; pgBackupGetPath(backup, filelist_path, lengthof(filelist_path), DATABASE_FILE_LIST); + pgBackupGetPath(backup, external_prefix, lengthof(external_prefix), EXTERNAL_DIR); /* Read backup's filelist using target database path as base path */ - files = dir_read_file_list(instance_config.pgdata, filelist_path, FIO_BACKUP_HOST); + files = dir_read_file_list(instance_config.pgdata, external_prefix, filelist_path, FIO_BACKUP_HOST); parray_qsort(files, pgFileComparePathDesc); /* Get list of files actually existing in target database */ files_restored = parray_new(); - dir_list_file(files_restored, instance_config.pgdata, true, true, false, FIO_BACKUP_HOST); + dir_list_file(files_restored, instance_config.pgdata, true, true, false, 0, FIO_BACKUP_HOST); /* To delete from leaf, sort in reversed order */ parray_qsort(files_restored, pgFileComparePathDesc); @@ -617,7 +677,7 @@ restore_files(void *arg) lengthof(from_root), DATABASE_DIR); /* check for interrupt */ - if (interrupted) + if (interrupted || thread_interrupted) elog(ERROR, "interrupted during restore database"); rel_path = GetRelativePath(file->path,from_root); @@ -673,6 +733,17 @@ restore_files(void *arg) false, parse_program_version(arguments->backup->program_version)); } + else if (file->external_dir_num) + { + char *external_path = parray_get(arguments->cur_external_dirs, + file->external_dir_num - 1); + if (backup_contains_external(external_path, + arguments->req_external_dirs)) + { + external_path = get_external_remap(external_path); + copy_file(arguments->external_prefix, FIO_BACKUP_HOST, external_path, FIO_DB_HOST, file); + } + } else if (strcmp(file->name, "pg_control") == 0) copy_pgcontrol_file(from_root, FIO_BACKUP_HOST, instance_config.pgdata, FIO_DB_HOST, diff --git a/src/show.c b/src/show.c index ffcd0038..083962e5 100644 --- a/src/show.c +++ b/src/show.c @@ -635,6 +635,10 @@ show_instance_json(parray *backup_list) json_add_value(buf, "primary_conninfo", backup->primary_conninfo, json_level, true); + if (backup->external_dir_str) + json_add_value(buf, "external-dirs", backup->external_dir_str, + json_level, true); + json_add_value(buf, "status", status2str(backup->status), json_level, true); diff --git a/src/utils/configuration.c b/src/utils/configuration.c index 3eb83b9f..7d181452 100644 --- a/src/utils/configuration.c +++ b/src/utils/configuration.c @@ -524,7 +524,7 @@ config_get_opt(int argc, char **argv, ConfigOption cmd_options[], */ int config_read_opt(const char *path, ConfigOption options[], int elevel, - bool strict) + bool strict, bool missing_ok) { FILE *fp; char buf[1024]; @@ -535,7 +535,7 @@ config_read_opt(const char *path, ConfigOption options[], int elevel, if (!options) return parsed_options; - if ((fp = pgut_fopen(path, "rt", true)) == NULL) + if ((fp = pgut_fopen(path, "rt", missing_ok)) == NULL) return parsed_options; while (fgets(buf, lengthof(buf), fp)) diff --git a/src/utils/configuration.h b/src/utils/configuration.h index 9602f1d6..96e20047 100644 --- a/src/utils/configuration.h +++ b/src/utils/configuration.h @@ -78,7 +78,7 @@ struct ConfigOption extern int config_get_opt(int argc, char **argv, ConfigOption cmd_options[], ConfigOption options[]); extern int config_read_opt(const char *path, ConfigOption options[], int elevel, - bool strict); + bool strict, bool missing_ok); extern void config_get_opt_env(ConfigOption options[]); extern void config_set_opt(ConfigOption options[], void *var, OptionSource source); diff --git a/src/utils/logger.c b/src/utils/logger.c index 29efc592..b27ed5c6 100644 --- a/src/utils/logger.c +++ b/src/utils/logger.c @@ -122,9 +122,6 @@ exit_if_necessary(int elevel) { if (elevel > WARNING && !in_cleanup) { - /* Interrupt other possible routines */ - interrupted = true; - if (loggin_in_progress) { loggin_in_progress = false; @@ -136,11 +133,15 @@ exit_if_necessary(int elevel) /* If this is not the main thread then don't call exit() */ if (main_tid != pthread_self()) + { + /* Interrupt other possible routines */ + thread_interrupted = true; #ifdef WIN32 ExitThread(elevel); #else pthread_exit(NULL); #endif + } else exit(elevel); } diff --git a/src/utils/pgut.c b/src/utils/pgut.c index 3aaec9a6..372de099 100644 --- a/src/utils/pgut.c +++ b/src/utils/pgut.c @@ -701,7 +701,7 @@ on_interrupt(void) int save_errno = errno; char errbuf[256]; - /* Set interruped flag */ + /* Set interrupted flag */ interrupted = true; /* diff --git a/src/utils/thread.c b/src/utils/thread.c index 0999a0d5..f1624be9 100644 --- a/src/utils/thread.c +++ b/src/utils/thread.c @@ -7,8 +7,12 @@ *------------------------------------------------------------------------- */ +#include "postgres_fe.h" + #include "thread.h" +bool thread_interrupted = false; + #ifdef WIN32 DWORD main_tid = 0; #else diff --git a/src/utils/thread.h b/src/utils/thread.h index 6b8349bf..a2948156 100644 --- a/src/utils/thread.h +++ b/src/utils/thread.h @@ -34,7 +34,7 @@ extern DWORD main_tid; extern pthread_t main_tid; #endif - +extern bool thread_interrupted; extern int pthread_lock(pthread_mutex_t *mp); diff --git a/src/validate.c b/src/validate.c index 60125dc4..8f5ea62f 100644 --- a/src/validate.c +++ b/src/validate.c @@ -44,6 +44,7 @@ void pgBackupValidate(pgBackup *backup) { char base_path[MAXPGPATH]; + char external_prefix[MAXPGPATH]; char path[MAXPGPATH]; parray *files; bool corrupted = false; @@ -99,8 +100,9 @@ pgBackupValidate(pgBackup *backup) elog(WARNING, "Invalid backup_mode of backup %s", base36enc(backup->start_time)); pgBackupGetPath(backup, base_path, lengthof(base_path), DATABASE_DIR); + pgBackupGetPath(backup, external_prefix, lengthof(external_prefix), EXTERNAL_DIR); pgBackupGetPath(backup, path, lengthof(path), DATABASE_FILE_LIST); - files = dir_read_file_list(base_path, path, FIO_BACKUP_HOST); + files = dir_read_file_list(base_path, external_prefix, path, FIO_BACKUP_HOST); /* setup threads */ for (i = 0; i < parray_num(files); i++) @@ -115,6 +117,7 @@ pgBackupValidate(pgBackup *backup) palloc(sizeof(validate_files_arg) * num_threads); /* Validate files */ + thread_interrupted = false; for (i = 0; i < num_threads; i++) { validate_files_arg *arg = &(threads_args[i]); @@ -184,7 +187,7 @@ pgBackupValidateFiles(void *arg) if (!pg_atomic_test_set_flag(&file->lock)) continue; - if (interrupted) + if (interrupted || thread_interrupted) elog(ERROR, "Interrupted during validate"); /* Validate only regular files */ @@ -247,7 +250,8 @@ pgBackupValidateFiles(void *arg) * Starting from 2.0.25 we calculate crc of pg_control differently. */ if (arguments->backup_version >= 20025 && - strcmp(file->name, "pg_control") == 0) + strcmp(file->name, "pg_control") == 0 && + !file->external_dir_num) crc = get_pgcontrol_checksum(arguments->base_path); else crc = pgFileGetCRC(file->path, @@ -333,7 +337,13 @@ do_validate_all(void) sprintf(arclog_path, "%s/%s/%s", backup_path, "wal", instance_name); join_path_components(conf_path, backup_instance_path, BACKUP_CATALOG_CONF_FILE); - config_read_opt(conf_path, instance_options, ERROR, false); + if (config_read_opt(conf_path, instance_options, ERROR, false, + true) == 0) + { + elog(WARNING, "Configuration file \"%s\" is empty", conf_path); + corrupted_backup_found = true; + continue; + } do_validate_instance(); } @@ -372,7 +382,6 @@ do_validate_all(void) static void do_validate_instance(void) { - char *current_backup_id; int i; int j; parray *backups; @@ -387,7 +396,6 @@ do_validate_instance(void) for (i = 0; i < parray_num(backups); i++) { pgBackup *base_full_backup; - char *parent_backup_id; current_backup = (pgBackup *) parray_get(backups, i); @@ -402,6 +410,7 @@ do_validate_instance(void) /* chain is broken */ if (result == 0) { + char *parent_backup_id; /* determine missing backup ID */ parent_backup_id = base36enc_dup(tmp_backup->parent_backup); @@ -420,34 +429,39 @@ do_validate_instance(void) elog(WARNING, "Backup %s has missing parent %s", base36enc(current_backup->start_time), parent_backup_id); } + pg_free(parent_backup_id); continue; } /* chain is whole, but at least one parent is invalid */ else if (result == 1) { - /* determine corrupt backup ID */ - parent_backup_id = base36enc_dup(tmp_backup->start_time); - /* Oldest corrupt backup has a chance for revalidation */ if (current_backup->start_time != tmp_backup->start_time) { + char *backup_id = base36enc_dup(tmp_backup->start_time); /* orphanize current_backup */ if (current_backup->status == BACKUP_STATUS_OK) { write_backup_status(current_backup, BACKUP_STATUS_ORPHAN); elog(WARNING, "Backup %s is orphaned because his parent %s has status: %s", - base36enc(current_backup->start_time), parent_backup_id, + base36enc(current_backup->start_time), backup_id, status2str(tmp_backup->status)); } else { elog(WARNING, "Backup %s has parent %s with status: %s", - base36enc(current_backup->start_time),parent_backup_id, + base36enc(current_backup->start_time), backup_id, status2str(tmp_backup->status)); } + pg_free(backup_id); continue; } base_full_backup = find_parent_full_backup(current_backup); + + /* sanity */ + if (!base_full_backup) + elog(ERROR, "Parent full backup for the given backup %s was not found", + base36enc(current_backup->start_time)); } /* chain is whole, all parents are valid at first glance, * current backup validation can proceed @@ -480,6 +494,7 @@ do_validate_instance(void) */ if (current_backup->status != BACKUP_STATUS_OK) { + char *current_backup_id; /* This is ridiculous but legal. * PAGE_b2 <- OK * PAGE_a2 <- OK @@ -564,7 +579,7 @@ do_validate_instance(void) if (backup->status == BACKUP_STATUS_OK) { - //tmp_backup = find_parent_full_backup(dest_backup); + /* Revalidation successful, validate corresponding WAL files */ validate_wal(backup, arclog_path, 0, 0, 0, current_backup->tli, diff --git a/tests/__init__.py b/tests/__init__.py index fdba4164..033ce535 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,11 +1,11 @@ import unittest from . import init_test, merge, option_test, show_test, compatibility, \ - backup_test, delete_test, delta, restore_test, validate_test, \ - retention_test, pgpro560, pgpro589, pgpro2068, false_positive, replica, \ + backup_test, delete, delta, restore, validate, \ + retention, pgpro560, pgpro589, pgpro2068, false_positive, replica, \ compression, page, ptrack, archive, exclude, cfs_backup, cfs_restore, \ cfs_validate_backup, auth_test, time_stamp, snapfs, logging, \ - locking, remote + locking, remote, external, config def load_tests(loader, tests, pattern): @@ -14,12 +14,13 @@ def load_tests(loader, tests, pattern): suite.addTests(loader.loadTestsFromModule(archive)) suite.addTests(loader.loadTestsFromModule(backup_test)) suite.addTests(loader.loadTestsFromModule(compatibility)) + suite.addTests(loader.loadTestsFromModule(config)) # suite.addTests(loader.loadTestsFromModule(cfs_backup)) # suite.addTests(loader.loadTestsFromModule(cfs_restore)) # suite.addTests(loader.loadTestsFromModule(cfs_validate_backup)) # suite.addTests(loader.loadTestsFromModule(logging)) suite.addTests(loader.loadTestsFromModule(compression)) - suite.addTests(loader.loadTestsFromModule(delete_test)) + suite.addTests(loader.loadTestsFromModule(delete)) suite.addTests(loader.loadTestsFromModule(delta)) suite.addTests(loader.loadTestsFromModule(exclude)) suite.addTests(loader.loadTestsFromModule(false_positive)) @@ -32,14 +33,16 @@ def load_tests(loader, tests, pattern): # suite.addTests(loader.loadTestsFromModule(ptrack)) suite.addTests(loader.loadTestsFromModule(remote)) suite.addTests(loader.loadTestsFromModule(replica)) - suite.addTests(loader.loadTestsFromModule(restore_test)) - suite.addTests(loader.loadTestsFromModule(retention_test)) + suite.addTests(loader.loadTestsFromModule(restore)) + suite.addTests(loader.loadTestsFromModule(retention)) suite.addTests(loader.loadTestsFromModule(show_test)) suite.addTests(loader.loadTestsFromModule(snapfs)) - suite.addTests(loader.loadTestsFromModule(validate_test)) + suite.addTests(loader.loadTestsFromModule(validate)) suite.addTests(loader.loadTestsFromModule(pgpro560)) suite.addTests(loader.loadTestsFromModule(pgpro589)) + suite.addTests(loader.loadTestsFromModule(pgpro2068)) suite.addTests(loader.loadTestsFromModule(time_stamp)) + suite.addTests(loader.loadTestsFromModule(external)) return suite diff --git a/tests/config.py b/tests/config.py new file mode 100644 index 00000000..4a382e13 --- /dev/null +++ b/tests/config.py @@ -0,0 +1,53 @@ +import unittest +import subprocess +import os +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from sys import exit + +module_name = 'config' + + +class ConfigTest(ProbackupTest, unittest.TestCase): + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_remove_instance_config(self): + """remove pg_probackup.conf""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.show_pb(backup_dir) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + self.backup_node(backup_dir, 'node', node) + + self.backup_node( + backup_dir, 'node', node, backup_type='page') + + conf_file = os.path.join( + backup_dir, 'backups','node', 'pg_probackup.conf') + + os.unlink(os.path.join(backup_dir, 'backups','node', 'pg_probackup.conf')) + + try: + self.backup_node( + backup_dir, 'node', node, backup_type='page') + self.assertEqual( + 1, 0, + "Expecting Error because pg_probackup.conf is missing. " + ".\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: could not open file "{0}": ' + 'No such file or directory'.format(conf_file), + e.message, + "\n Unexpected Error Message: {0}\n CMD: {1}".format( + repr(e.message), self.cmd)) diff --git a/tests/delete.py b/tests/delete.py new file mode 100644 index 00000000..71919c86 --- /dev/null +++ b/tests/delete.py @@ -0,0 +1,531 @@ +import unittest +import os +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +import subprocess +from sys import exit + + +module_name = 'delete' + + +class DeleteTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_delete_full_backups(self): + """delete full backups""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # full backup + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench( + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + pgbench.wait() + pgbench.stdout.close() + + self.backup_node(backup_dir, 'node', node) + + show_backups = self.show_pb(backup_dir, 'node') + id_1 = show_backups[0]['id'] + id_2 = show_backups[1]['id'] + id_3 = show_backups[2]['id'] + self.delete_pb(backup_dir, 'node', id_2) + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(show_backups[0]['id'], id_1) + self.assertEqual(show_backups[1]['id'], id_3) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_delete_archive_mix_compress_and_non_compressed_segments(self): + """stub""" + + # @unittest.skip("skip") + def test_delete_increment_page(self): + """delete increment and all after him""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # full backup mode + self.backup_node(backup_dir, 'node', node) + # page backup mode + self.backup_node(backup_dir, 'node', node, backup_type="page") + # page backup mode + self.backup_node(backup_dir, 'node', node, backup_type="page") + # full backup mode + self.backup_node(backup_dir, 'node', node) + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 4) + + # delete first page backup + self.delete_pb(backup_dir, 'node', show_backups[1]['id']) + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 2) + + self.assertEqual(show_backups[0]['backup-mode'], "FULL") + self.assertEqual(show_backups[0]['status'], "OK") + self.assertEqual(show_backups[1]['backup-mode'], "FULL") + self.assertEqual(show_backups[1]['status'], "OK") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_increment_ptrack(self): + """delete increment and all after him""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica', 'ptrack_enable': 'on'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # full backup mode + self.backup_node(backup_dir, 'node', node) + # page backup mode + self.backup_node(backup_dir, 'node', node, backup_type="ptrack") + # page backup mode + self.backup_node(backup_dir, 'node', node, backup_type="ptrack") + # full backup mode + self.backup_node(backup_dir, 'node', node) + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 4) + + # delete first page backup + self.delete_pb(backup_dir, 'node', show_backups[1]['id']) + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 2) + + self.assertEqual(show_backups[0]['backup-mode'], "FULL") + self.assertEqual(show_backups[0]['status'], "OK") + self.assertEqual(show_backups[1]['backup-mode'], "FULL") + self.assertEqual(show_backups[1]['status'], "OK") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_orphaned_wal_segments(self): + """make archive node, make three full backups, delete second backup without --wal option, then delete orphaned wals via --wal option""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.safe_psql( + "postgres", + "create table t_heap as select 1 as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + # first full backup + backup_1_id = self.backup_node(backup_dir, 'node', node) + # second full backup + backup_2_id = self.backup_node(backup_dir, 'node', node) + # third full backup + backup_3_id = self.backup_node(backup_dir, 'node', node) + node.stop() + + # Check wals + wals_dir = os.path.join(backup_dir, 'wal', 'node') + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] + original_wal_quantity = len(wals) + + # delete second full backup + self.delete_pb(backup_dir, 'node', backup_2_id) + # check wal quantity + self.validate_pb(backup_dir) + self.assertEqual(self.show_pb(backup_dir, 'node', backup_1_id)['status'], "OK") + self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") + # try to delete wals for second backup + self.delete_pb(backup_dir, 'node', options=['--wal']) + # check wal quantity + self.validate_pb(backup_dir) + self.assertEqual(self.show_pb(backup_dir, 'node', backup_1_id)['status'], "OK") + self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") + + # delete first full backup + self.delete_pb(backup_dir, 'node', backup_1_id) + self.validate_pb(backup_dir) + self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") + + result = self.delete_pb(backup_dir, 'node', options=['--wal']) + # delete useless wals + self.assertTrue('INFO: removed min WAL segment' in result + and 'INFO: removed max WAL segment' in result) + self.validate_pb(backup_dir) + self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") + + # Check quantity, it should be lower than original + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] + self.assertTrue(original_wal_quantity > len(wals), "Number of wals not changed after 'delete --wal' which is illegal") + + # Delete last backup + self.delete_pb(backup_dir, 'node', backup_3_id, options=['--wal']) + wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] + self.assertEqual (0, len(wals), "Number of wals should be equal to 0") + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_backup_with_empty_control_file(self): + """ + take backup, truncate its control file, + try to delete it via 'delete' command + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica', 'ptrack_enable': 'on'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + # full backup mode + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + # page backup mode + self.backup_node( + backup_dir, 'node', node, backup_type="delta", options=['--stream']) + # page backup mode + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", options=['--stream']) + + with open( + os.path.join(backup_dir, 'backups', 'node', backup_id, 'backup.control'), + 'wt') as f: + f.flush() + f.close() + + show_backups = self.show_pb(backup_dir, 'node') + self.assertEqual(len(show_backups), 3) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_interleaved_incremental_chains(self): + """complicated case of interleaved backup chains""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUPs + + backup_id_a = self.backup_node(backup_dir, 'node', node) + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULL B backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # FULLb ERROR + # FULLa OK + # Take PAGEa1 backup + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + # Change FULL B backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + # Now we start to play with first generation of PAGE backups + # Change PAGEb1 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # Change PAGEa1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + # Change PAGEa2 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + + # Change PAGEb1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGEa2 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGEc1 OK + # FULLc OK + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Delete FULLb + self.delete_pb( + backup_dir, 'node', backup_id_b) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 5) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_delete_multiple_descendants(self): + """ + PAGEb3 + | PAGEa3 + PAGEb2 / + | PAGEa2 / + PAGEb1 \ / + | PAGEa1 + FULLb | + FULLa should be deleted + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULLb backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change FULLb backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + # Change PAGEa1 backup status to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # Change PAGEb1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEb1 backup status to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # Change PAGEa2 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEb2 and PAGEb1 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b2, 'ERROR') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + page_id_a3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa3 OK + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEa3 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a3, 'ERROR') + + # Change PAGEb2 status to OK + self.change_backup_status(backup_dir, 'node', page_id_b2, 'OK') + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb3 OK + # PAGEa3 ERROR + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEa3, PAGEa2 and PAGEb1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a3, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # PAGEb3 OK + # PAGEa3 OK + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Check that page_id_a3 and page_id_a2 are both direct descendants of page_id_a1 + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id=page_id_a3)['parent-backup-id'], + page_id_a1) + + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id=page_id_a2)['parent-backup-id'], + page_id_a1) + + # Delete FULLa + self.delete_pb(backup_dir, 'node', backup_id_a) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/delete_test.py b/tests/delete_test.py deleted file mode 100644 index f49c01bf..00000000 --- a/tests/delete_test.py +++ /dev/null @@ -1,211 +0,0 @@ -import unittest -import os -from .helpers.ptrack_helpers import ProbackupTest, ProbackupException -import subprocess -from sys import exit - - -module_name = 'delete' - - -class DeleteTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_delete_full_backups(self): - """delete full backups""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir=os.path.join(module_name, fname, 'node'), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'}) - - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.slow_start() - - # full backup - self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - self.backup_node(backup_dir, 'node', node) - - pgbench = node.pgbench( - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - pgbench.wait() - pgbench.stdout.close() - - self.backup_node(backup_dir, 'node', node) - - show_backups = self.show_pb(backup_dir, 'node') - id_1 = show_backups[0]['id'] - id_2 = show_backups[1]['id'] - id_3 = show_backups[2]['id'] - self.delete_pb(backup_dir, 'node', id_2) - show_backups = self.show_pb(backup_dir, 'node') - self.assertEqual(show_backups[0]['id'], id_1) - self.assertEqual(show_backups[1]['id'], id_3) - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_delete_archive_mix_compress_and_non_compressed_segments(self): - """stub""" - - # @unittest.skip("skip") - def test_delete_increment_page(self): - """delete increment and all after him""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir=os.path.join(module_name, fname, 'node'), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'}) - - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.slow_start() - - # full backup mode - self.backup_node(backup_dir, 'node', node) - # page backup mode - self.backup_node(backup_dir, 'node', node, backup_type="page") - # page backup mode - self.backup_node(backup_dir, 'node', node, backup_type="page") - # full backup mode - self.backup_node(backup_dir, 'node', node) - - show_backups = self.show_pb(backup_dir, 'node') - self.assertEqual(len(show_backups), 4) - - # delete first page backup - self.delete_pb(backup_dir, 'node', show_backups[1]['id']) - - show_backups = self.show_pb(backup_dir, 'node') - self.assertEqual(len(show_backups), 2) - - self.assertEqual(show_backups[0]['backup-mode'], "FULL") - self.assertEqual(show_backups[0]['status'], "OK") - self.assertEqual(show_backups[1]['backup-mode'], "FULL") - self.assertEqual(show_backups[1]['status'], "OK") - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_delete_increment_ptrack(self): - """delete increment and all after him""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir=os.path.join(module_name, fname, 'node'), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica', 'ptrack_enable': 'on'}) - - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.slow_start() - - # full backup mode - self.backup_node(backup_dir, 'node', node) - # page backup mode - self.backup_node(backup_dir, 'node', node, backup_type="ptrack") - # page backup mode - self.backup_node(backup_dir, 'node', node, backup_type="ptrack") - # full backup mode - self.backup_node(backup_dir, 'node', node) - - show_backups = self.show_pb(backup_dir, 'node') - self.assertEqual(len(show_backups), 4) - - # delete first page backup - self.delete_pb(backup_dir, 'node', show_backups[1]['id']) - - show_backups = self.show_pb(backup_dir, 'node') - self.assertEqual(len(show_backups), 2) - - self.assertEqual(show_backups[0]['backup-mode'], "FULL") - self.assertEqual(show_backups[0]['status'], "OK") - self.assertEqual(show_backups[1]['backup-mode'], "FULL") - self.assertEqual(show_backups[1]['status'], "OK") - - # Clean after yourself - self.del_test_dir(module_name, fname) - - # @unittest.skip("skip") - def test_delete_orphaned_wal_segments(self): - """make archive node, make three full backups, delete second backup without --wal option, then delete orphaned wals via --wal option""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir=os.path.join(module_name, fname, 'node'), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'}) - - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.slow_start() - - node.safe_psql( - "postgres", - "create table t_heap as select 1 as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") - # first full backup - backup_1_id = self.backup_node(backup_dir, 'node', node) - # second full backup - backup_2_id = self.backup_node(backup_dir, 'node', node) - # third full backup - backup_3_id = self.backup_node(backup_dir, 'node', node) - node.stop() - - # Check wals - wals_dir = os.path.join(backup_dir, 'wal', 'node') - wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] - original_wal_quantity = len(wals) - - # delete second full backup - self.delete_pb(backup_dir, 'node', backup_2_id) - # check wal quantity - self.validate_pb(backup_dir) - self.assertEqual(self.show_pb(backup_dir, 'node', backup_1_id)['status'], "OK") - self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") - # try to delete wals for second backup - self.delete_pb(backup_dir, 'node', options=['--wal']) - # check wal quantity - self.validate_pb(backup_dir) - self.assertEqual(self.show_pb(backup_dir, 'node', backup_1_id)['status'], "OK") - self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") - - # delete first full backup - self.delete_pb(backup_dir, 'node', backup_1_id) - self.validate_pb(backup_dir) - self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") - - result = self.delete_pb(backup_dir, 'node', options=['--wal']) - # delete useless wals - self.assertTrue('INFO: removed min WAL segment' in result - and 'INFO: removed max WAL segment' in result) - self.validate_pb(backup_dir) - self.assertEqual(self.show_pb(backup_dir, 'node', backup_3_id)['status'], "OK") - - # Check quantity, it should be lower than original - wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] - self.assertTrue(original_wal_quantity > len(wals), "Number of wals not changed after 'delete --wal' which is illegal") - - # Delete last backup - self.delete_pb(backup_dir, 'node', backup_3_id, options=['--wal']) - wals = [f for f in os.listdir(wals_dir) if os.path.isfile(os.path.join(wals_dir, f)) and not f.endswith('.backup')] - self.assertEqual (0, len(wals), "Number of wals should be equal to 0") - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/expected/option_help.out b/tests/expected/option_help.out index abb5ce71..a83c3905 100644 --- a/tests/expected/option_help.out +++ b/tests/expected/option_help.out @@ -63,7 +63,7 @@ pg_probackup - utility to manage backup/recovery of PostgreSQL database. [--skip-block-validation] pg_probackup validate -B backup-path [--instance=instance_name] - [-i backup-id] [--progress] + [-i backup-id] [--progress] [-j num-threads] [--time=time|--xid=xid|--lsn=lsn [--inclusive=boolean]] [--recovery-target-name=target-name] [--timeline=timeline] diff --git a/tests/external.py b/tests/external.py new file mode 100644 index 00000000..1327402c --- /dev/null +++ b/tests/external.py @@ -0,0 +1,1240 @@ +import unittest +import os +from time import sleep +from .helpers.ptrack_helpers import ProbackupTest, ProbackupException +from .helpers.cfs_helpers import find_by_name +import shutil + + +module_name = 'external' + + +class ExternalTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_external_simple(self): + """ + make node, create external directory, take backup + with external directory, restore backup, check that + external directory was successfully copied + """ + fname = self.id().split('.')[3] + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + external_dir = self.get_tblspace_path(node, 'somedirectory') + + # create directory in external_directory + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # take FULL backup with external directory pointing to a file + file_path = os.path.join(core_dir, 'file') + open(file_path,"w+") + + try: + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=[ + '--external-dirs={0}'.format(file_path)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because external dir point to a file" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertIn( + 'ERROR: --external-dirs option "{0}": ' + 'directory or symbolic link expected\n'.format(file_path), + e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # FULL backup + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + # Fill external directories + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir, options=["-j", "4"]) + + # Full backup with external dir + self.backup_node( + backup_dir, 'node', node, + options=[ + '--external-dirs={0}'.format(external_dir)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + node.cleanup() + shutil.rmtree(external_dir, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_external_none(self): + """ + make node, create external directory, take backup + with external directory, take delta backup with --external-dirs=none, + restore delta backup, check that + external directory was not copied + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + external_dir = self.get_tblspace_path(node, 'somedirectory') + + # create directory in external_directory + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL backup + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + # Fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir, options=["-j", "4"]) + + # Full backup with external dir + self.backup_node( + backup_dir, 'node', node, + options=[ + '--external-dirs={0}'.format(external_dir)]) + + # Delta backup without external directory + self.backup_node( + backup_dir, 'node', node, backup_type="delta") +# options=['--external-dirs=none']) + + shutil.rmtree(external_dir, ignore_errors=True) + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + node.cleanup() + + self.restore_node( + backup_dir, 'node', node, options=["-j", "4"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_external_dir_mapping(self): + """ + make node, take full backup, check that restore with + external-dir mapping will end with error, take page backup, + check that restore with external-dir mapping will end with + success + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + external_dir1_old = self.get_tblspace_path(node, 'external_dir1') + external_dir2_old = self.get_tblspace_path(node, 'external_dir2') + + # Fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, options=["-j", "4"]) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + node_restored.cleanup() + + external_dir1_new = self.get_tblspace_path(node_restored, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node_restored, 'external_dir2') + + try: + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format( + external_dir1_old, external_dir1_new), + "--external-mapping={0}={1}".format( + external_dir2_old, external_dir2_new)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because tablespace mapping is incorrect" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'ERROR: --external-mapping option' in e.message and + 'have an entry in list of external directories' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format(external_dir1_old, external_dir2_old)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format( + external_dir1_old, external_dir1_new), + "--external-mapping={0}={1}".format( + external_dir2_old, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_backup_multiple_external(self): + """check that cmdline has priority over config""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + external_dir1_old = self.get_tblspace_path(node, 'external_dir1') + external_dir2_old = self.get_tblspace_path(node, 'external_dir2') + + # FULL backup + self.backup_node( + backup_dir, 'node', node, backup_type="full", + options=["-j", "4", "--stream"]) + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, options=["-j", "4"]) + + self.set_config( + backup_dir, 'node', + options=[ + '-E', external_dir1_old]) + + # cmdline option MUST override options in config + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}".format(external_dir2_old)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs', 'external_dir1']) + + node.cleanup() + + shutil.rmtree(external_dir1_old, ignore_errors=True) + shutil.rmtree(external_dir2_old, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + options=["-j", "4"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_backward_compatibility(self): + """ + take backup with old binary without external dirs support + take delta backup with new binary and 2 external directories + restore delta backup, check that incremental chain + restored correctly + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.show_pb(backup_dir) + + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.show_pb(backup_dir) + + node.slow_start() + + node.pgbench_init(scale=5) + + # FULL backup with old binary without external dirs support + self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + external_dir1_old = self.get_tblspace_path(node, 'external_dir1') + external_dir2_old = self.get_tblspace_path(node, 'external_dir2') + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, options=["-j", "4"]) + + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + # fill external directories with changed data + shutil.rmtree(external_dir1_old, ignore_errors=True) + shutil.rmtree(external_dir2_old, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # delta backup with external directories using new binary + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1_old, + external_dir2_old)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # RESTORE chain with new binary + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + external_dir1_new = self.get_tblspace_path(node_restored, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node_restored, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1_old, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2_old, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_backward_compatibility_merge_1(self): + """ + take backup with old binary without external dirs support + take delta backup with new binary and 2 external directories + merge delta backup ajd restore it + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.show_pb(backup_dir) + + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.show_pb(backup_dir) + + node.slow_start() + + node.pgbench_init(scale=5) + + # tmp FULL backup with old binary + tmp_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + external_dir1_old = self.get_tblspace_path(node, 'external_dir1') + external_dir2_old = self.get_tblspace_path(node, 'external_dir2') + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # FULL backup with old binary without external dirs support + self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + pgbench = node.pgbench(options=['-T', '30', '-c', '1']) + pgbench.wait() + + # delta backup with external directories using new binary + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1_old, + external_dir2_old)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Merge chain chain with new binary + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # Restore merged backup + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + external_dir1_new = self.get_tblspace_path(node_restored, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node_restored, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1_old, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2_old, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_backward_compatibility_merge_2(self): + """ + take backup with old binary without external dirs support + take delta backup with new binary and 2 external directories + merge delta backup ajd restore it + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir, old_binary=True) + self.show_pb(backup_dir) + + self.add_instance(backup_dir, 'node', node, old_binary=True) + self.show_pb(backup_dir) + + node.slow_start() + + node.pgbench_init(scale=5) + + # tmp FULL backup with old binary + tmp_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + external_dir1_old = self.get_tblspace_path(node, 'external_dir1') + external_dir2_old = self.get_tblspace_path(node, 'external_dir2') + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=tmp_id) + + # FULL backup with old binary without external dirs support + self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + pgbench = node.pgbench(options=['-T', '30', '-c', '1']) + pgbench.wait() + + # delta backup with external directories using new binary + self.backup_node( + backup_dir, 'node', node, + backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1_old, + external_dir2_old)]) + + pgbench = node.pgbench(options=['-T', '30', '-c', '1']) + pgbench.wait() + + # Fill external dirs with changed data + shutil.rmtree(external_dir1_old, ignore_errors=True) + shutil.rmtree(external_dir2_old, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, + options=['-j', '4', '--skip-external-dirs']) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, + options=['-j', '4', '--skip-external-dirs']) + + # delta backup without external directories using old binary + backup_id = self.backup_node( + backup_dir, 'node', node, + backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1_old, + external_dir2_old)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # Merge chain chain with new binary + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # Restore merged backup + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + node_restored.cleanup() + + external_dir1_new = self.get_tblspace_path(node_restored, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node_restored, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1_old, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2_old, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_merge(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=5) + + # FULL backup with old binary without external dirs support + self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + external_dir1_old = self.get_tblspace_path(node, 'external_dir1') + external_dir2_old = self.get_tblspace_path(node, 'external_dir2') + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, options=["-j", "4"]) + + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + shutil.rmtree(external_dir1_old, ignore_errors=True) + shutil.rmtree(external_dir2_old, ignore_errors=True) + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, + old_binary=True, options=["-j", "4", "--stream"]) + + # fill external directories with changed data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # delta backup with external directories using new binary + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1_old, + external_dir2_old)]) + + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + external_dir1_new = self.get_tblspace_path(node, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1_old, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2_old, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_merge_1(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=5) + + # FULL backup with old data + backup_id_1 = self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + external_dir1_old = self.get_tblspace_path(node, 'external_dir1') + external_dir2_old = self.get_tblspace_path(node, 'external_dir2') + + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup with new data + backup_id_2 = self.backup_node( + backup_dir, 'node', node, + options=["-j", "4", "--stream"]) + + # fill external directories with old data + self.restore_node( + backup_dir, 'node', node, backup_id=backup_id_1, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, backup_id=backup_id_1, + data_dir=external_dir2_old, options=["-j", "4"]) + + # FULL backup with external directories + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1_old, + external_dir2_old)]) + + # drop old external data + shutil.rmtree(external_dir1_old, ignore_errors=True) + shutil.rmtree(external_dir2_old, ignore_errors=True) + + # fill external directories with new data + self.restore_node( + backup_dir, 'node', node, backup_id=backup_id_2, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, backup_id=backup_id_2, + data_dir=external_dir2_old, options=["-j", "4"]) + + # drop now not needed backups + + # DELTA backup with external directories + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1_old, + external_dir2_old)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + shutil.rmtree(external_dir1_old, ignore_errors=True) + shutil.rmtree(external_dir2_old, ignore_errors=True) + + # merge backups without external directories + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + external_dir1_new = self.get_tblspace_path(node, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1_old, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2_old, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_merge_single(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=5) + + # FULL backup + self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + external_dir1_old = self.get_tblspace_path(node, 'external_dir1') + external_dir2_old = self.get_tblspace_path(node, 'external_dir2') + + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup with changed data + backup_id = self.backup_node( + backup_dir, 'node', node, + options=["-j", "4", "--stream"]) + + # fill external directories with changed data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # delta backup with external directories using new binary + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1_old, + external_dir2_old)]) + + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + external_dir1_new = self.get_tblspace_path(node, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1_old, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2_old, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_merge_double(self): + """""" + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=5) + + # FULL backup + self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + external_dir1_old = self.get_tblspace_path(node, 'external_dir1') + external_dir2_old = self.get_tblspace_path(node, 'external_dir2') + + pgbench = node.pgbench(options=['-T', '30', '-c', '1', '--no-vacuum']) + pgbench.wait() + + # FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, + options=["-j", "4", "--stream"]) + + # fill external directories with changed data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1_old, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2_old, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # delta backup with external directories + self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1_old, + external_dir2_old)]) + + # delta backup with external directories + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type="delta", + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1_old, + external_dir2_old)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + shutil.rmtree(external_dir1_old, ignore_errors=True) + shutil.rmtree(external_dir2_old, ignore_errors=True) + + # delta backup without external directories + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + external_dir1_new = self.get_tblspace_path(node, 'external_dir1') + external_dir2_new = self.get_tblspace_path(node, 'external_dir2') + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", + "--external-mapping={0}={1}".format(external_dir1_old, external_dir1_new), + "--external-mapping={0}={1}".format(external_dir2_old, external_dir2_new)]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_restore_skip_external(self): + """ + Check that --skip-external-dirs works correctly + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir1 = self.get_tblspace_path(node, 'external_dir1') + external_dir2 = self.get_tblspace_path(node, 'external_dir2') + + node.pgbench_init(scale=3) + + # temp FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + # fill external directories with data + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir1, options=["-j", "4"]) + + self.restore_node( + backup_dir, 'node', node, + data_dir=external_dir2, options=["-j", "4"]) + + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # FULL backup with external directories + self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}:{1}".format( + external_dir1, + external_dir2)]) + + # delete first externals, so pgdata_compare + # will be capable of detecting redundant + # external files after restore + shutil.rmtree(external_dir1, ignore_errors=True) + shutil.rmtree(external_dir2, ignore_errors=True) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + # RESTORE + node.cleanup() + shutil.rmtree(node.base_dir, ignore_errors=True) + + self.restore_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--skip-external-dirs"]) + + pgdata_restored = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_dir_is_symlink(self): + """ + Check that backup works correctly if external dir is symlink, + symlink pointing to external dir should be followed, + but restored as directory + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + + node.pgbench_init(scale=3) + + # temp FULL backup + backup_id = self.backup_node( + backup_dir, 'node', node, options=["-j", "4", "--stream"]) + + # fill some directory with data + core_dir = os.path.join(self.tmp_path, module_name, fname) + symlinked_dir = os.path.join(core_dir, 'symlinked') + + self.restore_node( + backup_dir, 'node', node, + data_dir=symlinked_dir, options=["-j", "4"]) + + # drop temp FULL backup + self.delete_pb(backup_dir, 'node', backup_id=backup_id) + + # create symlink to directory in external directory + os.symlink(symlinked_dir, external_dir) + + # FULL backup with external directories + backup_id = self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}".format( + external_dir)]) + + pgdata = self.pgdata_content( + node.base_dir, exclude_dirs=['logs']) + + node_restored = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node_restored')) + + # RESTORE + node_restored.cleanup() + + external_dir_new = self.get_tblspace_path( + node_restored, 'external_dir') + + self.restore_node( + backup_dir, 'node', node_restored, + options=[ + "-j", "4", "--external-mapping={0}={1}".format( + external_dir, external_dir_new)]) + + pgdata_restored = self.pgdata_content( + node_restored.base_dir, exclude_dirs=['logs']) + + self.compare_pgdata(pgdata, pgdata_restored) + + self.assertEqual( + external_dir, + self.show_pb( + backup_dir, 'node', + backup_id=backup_id)['external-dirs']) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.expectedFailure + # @unittest.skip("skip") + def test_external_dir_is_tablespace(self): + """ + Check that backup fails with error + if external directory points to tablespace + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + core_dir = os.path.join(self.tmp_path, module_name, fname) + shutil.rmtree(core_dir, ignore_errors=True) + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + external_dir = self.get_tblspace_path(node, 'external_dir') + + self.create_tblspace_in_node( + node, 'tblspace1', tblspc_path=external_dir) + + node.pgbench_init(scale=3, tablespace='tblspace1') + + # FULL backup with external directories + try: + backup_id = self.backup_node( + backup_dir, 'node', node, + options=[ + "-j", "4", "--stream", + "-E", "{0}".format( + external_dir)]) + # we should die here because exception is what we expect to happen + self.assertEqual( + 1, 0, + "Expecting Error because external dir points to the tablespace" + "\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'External directory path (-E option)' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # external directory contain symlink to file + # external directory contain symlink to directory + # external directory is symlink + + # latest page backup without external_dir + + # multiple external directories + + # --external-dirs=none + + # --external-dirs point to a file + + # external directory in config and in command line + + # external directory contain multuple directories, some of them my be empty + + # forbid to external-dirs to point to tablespace directories + # check that not changed files are not copied by next backup + + # merge + + # complex merge + diff --git a/tests/helpers/ptrack_helpers.py b/tests/helpers/ptrack_helpers.py index c2eb2561..2d2b333b 100644 --- a/tests/helpers/ptrack_helpers.py +++ b/tests/helpers/ptrack_helpers.py @@ -892,7 +892,7 @@ class ProbackupTest(object): def delete_expired( self, backup_dir, instance, options=[], old_binary=False): cmd_list = [ - 'delete', '--expired', '--wal', + 'delete', '-B', backup_dir, '--instance={0}'.format(instance) ] @@ -997,6 +997,29 @@ class ProbackupTest(object): ) master.reload() + def change_backup_status(self, backup_dir, instance, backup_id, status): + + control_file_path = os.path.join( + backup_dir, 'backups', instance, backup_id, 'backup.control') + + with open(control_file_path, 'r') as f: + actual_control = f.read() + + new_control_file = '' + for line in actual_control.splitlines(): + if line.startswith('status'): + line = 'status = {0}'.format(status) + new_control_file += line + new_control_file += '\n' + + with open(control_file_path, 'wt') as f: + f.write(new_control_file) + f.flush() + f.close() + + with open(control_file_path, 'r') as f: + actual_control = f.read() + def wrong_wal_clean(self, node, wal_size): wals_dir = os.path.join(self.backup_dir(node), 'wal') wals = [ diff --git a/tests/logging.py b/tests/logging.py index 173a977a..210a6f4f 100644 --- a/tests/logging.py +++ b/tests/logging.py @@ -1,7 +1,7 @@ import unittest import os from .helpers.ptrack_helpers import ProbackupTest, ProbackupException - +import datetime module_name = 'logging' @@ -15,10 +15,8 @@ class LogTest(ProbackupTest, unittest.TestCase): fname = self.id().split('.')[3] node = self.make_simple_node( base_dir=os.path.join(module_name, fname, 'node'), - initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica'} - ) + initdb_params=['--data-checksums']) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) @@ -41,4 +39,36 @@ class LogTest(ProbackupTest, unittest.TestCase): gdb.continue_execution_until_exit() # Clean after yourself - self.del_test_dir(module_name, fname) \ No newline at end of file + self.del_test_dir(module_name, fname) + + def test_log_filename_strftime(self): + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + node.slow_start() + + self.set_config( + backup_dir, 'node', + options=['--log-rotation-age=1d']) + + self.backup_node( + backup_dir, 'node', node, + options=[ + '--stream', + '--log-level-file=VERBOSE', + '--log-filename=pg_probackup-%a.log']) + + day_of_week = datetime.datetime.today().strftime("%a") + + path = os.path.join( + backup_dir, 'log', 'pg_probackup-{0}.log'.format(day_of_week)) + + self.assertTrue(os.path.isfile(path)) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/merge.py b/tests/merge.py index 908e2751..366c6f34 100644 --- a/tests/merge.py +++ b/tests/merge.py @@ -4,6 +4,7 @@ import unittest import os from .helpers.ptrack_helpers import ProbackupTest, ProbackupException import shutil +from datetime import datetime, timedelta module_name = "merge" @@ -1202,11 +1203,7 @@ class MergeTest(ProbackupTest, unittest.TestCase): backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') node = self.make_simple_node( base_dir=os.path.join(module_name, fname, 'node'), - set_replication=True, initdb_params=['--data-checksums'], - pg_options={ - 'wal_level': 'replica' - } - ) + set_replication=True, initdb_params=['--data-checksums']) self.init_pb(backup_dir) self.add_instance(backup_dir, 'node', node) @@ -1256,9 +1253,92 @@ class MergeTest(ProbackupTest, unittest.TestCase): backup_id_deleted = self.show_pb(backup_dir, "node")[1]["id"] + # Try to continue failed MERGE + self.merge_backup(backup_dir, "node", backup_id) + # Clean after yourself + self.del_test_dir(module_name, fname) + + def test_continue_failed_merge_3(self): + """ + Check that failed MERGE can`t be continued after target backup deleting + Create FULL and 2 PAGE backups + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, initdb_params=['--data-checksums']) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Create test data + node.safe_psql("postgres", "create sequence t_seq") + node.safe_psql( + "postgres", + "create table t_heap as select i as id, nextval('t_seq')" + " as t_seq, md5(i::text) as text, md5(i::text)::tsvector" + " as tsvector from generate_series(0,100000) i" + ) + + # FULL backup + self.backup_node(backup_dir, 'node', node) + + # CREATE FEW PAGE BACKUP + i = 0 + + while i < 2: + + node.safe_psql( + "postgres", + "delete from t_heap" + ) + + node.safe_psql( + "postgres", + "vacuum t_heap" + ) + node.safe_psql( + "postgres", + "insert into t_heap select i as id, nextval('t_seq') as t_seq," + " md5(i::text) as text, md5(i::text)::tsvector as tsvector" + " from generate_series(100,200000) i" + ) + + # PAGE BACKUP + self.backup_node( + backup_dir, 'node', node, backup_type='page' + ) + i = i + 1 + + if self.paranoia: + pgdata = self.pgdata_content(node.data_dir) + + backup_id_merge = self.show_pb(backup_dir, "node")[2]["id"] + backup_id_delete = self.show_pb(backup_dir, "node")[1]["id"] + + print(self.show_pb(backup_dir, as_text=True, as_json=False)) + + gdb = self.merge_backup(backup_dir, "node", backup_id_merge, gdb=True) + + gdb.set_breakpoint('copy_file') + gdb.run_until_break() + gdb.continue_execution_until_break(2) + + gdb._execute('signal SIGKILL') + + print(self.show_pb(backup_dir, as_text=True, as_json=False)) + print(os.path.join(backup_dir, "backups", "node", backup_id_delete)) + + # DELETE PAGE1 + shutil.rmtree( + os.path.join(backup_dir, "backups", "node", backup_id_delete)) + # Try to continue failed MERGE try: - self.merge_backup(backup_dir, "node", backup_id) + self.merge_backup(backup_dir, "node", backup_id_merge) self.assertEqual( 1, 0, "Expecting Error because of backup corruption.\n " @@ -1266,8 +1346,8 @@ class MergeTest(ProbackupTest, unittest.TestCase): repr(self.output), self.cmd)) except ProbackupException as e: self.assertTrue( - "ERROR: Backup {0} has status: DELETING".format( - backup_id_deleted) in e.message, + "ERROR: Parent full backup for the given backup {0} was not found".format( + backup_id_merge) in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(e.message), self.cmd)) @@ -1369,6 +1449,482 @@ class MergeTest(ProbackupTest, unittest.TestCase): self.del_test_dir(module_name, fname) + def test_crash_after_opening_backup_control_1(self): + """ + check that crashing after opening backup.control + for writing will not result in losing backup metadata + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # FULL stream backup + self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # DELTA archive backup + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + gdb.set_breakpoint('write_backup_filelist') + gdb.run_until_break() + + gdb.set_breakpoint('write_backup') + gdb.continue_execution_until_break() + gdb.set_breakpoint('pgBackupWriteControl') + gdb.continue_execution_until_break() + + gdb._execute('signal SIGKILL') + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[1]['status']) + + self.del_test_dir(module_name, fname) + + def test_crash_after_opening_backup_control_2(self): + """ + check that crashing after opening backup_content.control + for writing will not result in losing metadata about backup files + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Add data + node.pgbench_init(scale=3) + + # FULL backup + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # Change data + pgbench = node.pgbench(options=['-T', '20', '-c', '2']) + pgbench.wait() + + path = node.safe_psql( + 'postgres', + "select pg_relation_filepath('pgbench_accounts')").rstrip() + + fsm_path = path + '_fsm' + + node.safe_psql( + 'postgres', + 'vacuum pgbench_accounts') + + # DELTA backup + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + gdb.set_breakpoint('write_backup_filelist') + gdb.run_until_break() + + gdb.set_breakpoint('print_file_list') + gdb.continue_execution_until_break() + + gdb._execute('signal SIGKILL') + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[1]['status']) + + # In to_backup drop file that comes from from_backup + # emulate crash during previous merge + file_to_remove = os.path.join( + backup_dir, 'backups', + 'node', full_id, 'database', fsm_path) + + print(file_to_remove) + + os.remove(file_to_remove) + + # Continue failed merge + self.merge_backup(backup_dir, "node", backup_id) + + node.cleanup() + + # restore merge backup + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + + self.compare_pgdata(pgdata, pgdata_restored) + + self.del_test_dir(module_name, fname) + + def test_losing_file_after_failed_merge(self): + """ + check that crashing after opening backup_content.control + for writing will not result in losing metadata about backup files + """ + fname = self.id().split('.')[3] + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'autovacuum': 'off'}) + + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Add data + node.pgbench_init(scale=1) + + # FULL backup + full_id = self.backup_node( + backup_dir, 'node', node, options=['--stream']) + + # Change data + node.safe_psql( + 'postgres', + "update pgbench_accounts set aid = aid + 1005000") + + path = node.safe_psql( + 'postgres', + "select pg_relation_filepath('pgbench_accounts')").rstrip() + + node.safe_psql( + 'postgres', + "VACUUM pgbench_accounts") + + vm_path = path + '_vm' + + # DELTA backup + backup_id = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + gdb = self.merge_backup(backup_dir, "node", backup_id, gdb=True) + gdb.set_breakpoint('write_backup_filelist') + gdb.run_until_break() + + gdb.set_breakpoint('print_file_list') + gdb.continue_execution_until_break() + + gdb._execute('signal SIGKILL') + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[0]['status']) + + self.assertEqual( + 'MERGING', self.show_pb(backup_dir, 'node')[1]['status']) + + # In to_backup drop file that comes from from_backup + # emulate crash during previous merge + file_to_remove = os.path.join( + backup_dir, 'backups', + 'node', full_id, 'database', vm_path) + + os.remove(file_to_remove) + + # Try to continue failed MERGE + #print(backup_id) + #exit(1) + self.merge_backup(backup_dir, "node", backup_id) + + self.assertEqual( + 'OK', self.show_pb(backup_dir, 'node')[0]['status']) + + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_merge_backup_from_future(self): + """ + take FULL backup, table PAGE backup from future, + try to merge page with FULL + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'wal_level': 'replica', + 'max_wal_senders': '2', + 'autovacuum': 'off'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node(backup_dir, 'node', node) + + node.pgbench_init(scale=3) + + # Take PAGE from future + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + + with open( + os.path.join( + backup_dir, 'backups', 'node', + backup_id, "backup.control"), "a") as conf: + conf.write("start-time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() + timedelta(days=3))) + + # rename directory + new_id = self.show_pb(backup_dir, 'node')[1]['id'] + + os.rename( + os.path.join(backup_dir, 'backups', 'node', backup_id), + os.path.join(backup_dir, 'backups', 'node', new_id)) + + pgbench = node.pgbench(options=['-T', '3', '-c', '2', '--no-vacuum']) + pgbench.wait() + + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + pgdata = self.pgdata_content(node.data_dir) + + node.cleanup() + self.merge_backup(backup_dir, 'node', backup_id=backup_id) + + self.restore_node(backup_dir, 'node', node, backup_id=backup_id) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_merge_multiple_descendants(self): + """ + PAGEb3 + | PAGEa3 + PAGEb2 / + | PAGEa2 / + PAGEb1 \ / + | PAGEa1 + FULLb | + FULLa + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULLb backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change FULLb backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + # Change PAGEa1 backup status to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # Change PAGEb1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEb1 backup status to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # Change PAGEa2 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEb2 and PAGEb1 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b2, 'ERROR') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + page_id_a3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa3 OK + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEa3 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a3, 'ERROR') + + # Change PAGEb2 status to OK + self.change_backup_status(backup_dir, 'node', page_id_b2, 'OK') + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb3 OK + # PAGEa3 ERROR + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEa3, PAGEa2 and PAGEb1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a3, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # PAGEb3 OK + # PAGEa3 OK + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Check that page_id_a3 and page_id_a2 are both direct descendants of page_id_a1 + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id=page_id_a3)['parent-backup-id'], + page_id_a1) + + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id=page_id_a2)['parent-backup-id'], + page_id_a1) + + self.merge_backup( + backup_dir, 'node', page_id_a2, + options=['--merge-expired', '--log-level-console=log']) + + try: + self.merge_backup( + backup_dir, 'node', page_id_a3, + options=['--merge-expired', '--log-level-console=log']) + self.assertEqual( + 1, 0, + "Expecting Error because of parent FULL backup is missing.\n " + "Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "ERROR: Parent full backup for the given " + "backup {0} was not found".format( + page_id_a3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format( + repr(e.message), self.cmd)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # 1. always use parent link when merging (intermediates may be from different chain) # 2. page backup we are merging with may disappear after failed merge, # it should not be possible to continue merge after that diff --git a/tests/page.py b/tests/page.py index a0a554de..b0eb79bf 100644 --- a/tests/page.py +++ b/tests/page.py @@ -711,7 +711,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): self.assertTrue( 'INFO: Wait for LSN' in e.message and 'in archived WAL segment' in e.message and - 'could not read WAL record at' in e.message and + 'Could not read WAL record at' in e.message and 'WAL segment "{0}" is absent\n'.format( file) in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( @@ -737,7 +737,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): self.assertTrue( 'INFO: Wait for LSN' in e.message and 'in archived WAL segment' in e.message and - 'could not read WAL record at' in e.message and + 'Could not read WAL record at' in e.message and 'WAL segment "{0}" is absent\n'.format( file) in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( @@ -829,7 +829,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): self.assertTrue( 'INFO: Wait for LSN' in e.message and 'in archived WAL segment' in e.message and - 'could not read WAL record at' in e.message and + 'Could not read WAL record at' in e.message and 'incorrect resource manager data checksum in record at' in e.message and 'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format( file) in e.message, @@ -855,7 +855,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): self.assertTrue( 'INFO: Wait for LSN' in e.message and 'in archived WAL segment' in e.message and - 'could not read WAL record at' in e.message and + 'Could not read WAL record at' in e.message and 'incorrect resource manager data checksum in record at' in e.message and 'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format( file) in e.message, @@ -952,7 +952,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): self.assertTrue( 'INFO: Wait for LSN' in e.message and 'in archived WAL segment' in e.message and - 'could not read WAL record at' in e.message and + 'Could not read WAL record at' in e.message and 'WAL file is from different database system: WAL file database system identifier is' in e.message and 'pg_control database system identifier is' in e.message and 'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format( @@ -979,7 +979,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase): self.assertTrue( 'INFO: Wait for LSN' in e.message and 'in archived WAL segment' in e.message and - 'could not read WAL record at' in e.message and + 'Could not read WAL record at' in e.message and 'WAL file is from different database system: WAL file database system identifier is' in e.message and 'pg_control database system identifier is' in e.message and 'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format( diff --git a/tests/restore_test.py b/tests/restore.py similarity index 96% rename from tests/restore_test.py rename to tests/restore.py index 4519fe07..3a5ec616 100644 --- a/tests/restore_test.py +++ b/tests/restore.py @@ -5,6 +5,7 @@ import subprocess from datetime import datetime import sys from time import sleep +from datetime import datetime, timedelta module_name = 'restore' @@ -1724,3 +1725,60 @@ class RestoreTest(ProbackupTest, unittest.TestCase): # Clean after yourself self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_restore_backup_from_future(self): + """more complex test_restore_chain()""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={ + 'wal_level': 'replica', + 'max_wal_senders': '2'}) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL + self.backup_node(backup_dir, 'node', node) + + node.pgbench_init(scale=3) + #pgbench = node.pgbench(options=['-T', '20', '-c', '2']) + #pgbench.wait() + + # Take PAGE from future + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + + with open( + os.path.join( + backup_dir, 'backups', 'node', + backup_id, "backup.control"), "a") as conf: + conf.write("start-time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() + timedelta(days=3))) + + # rename directory + new_id = self.show_pb(backup_dir, 'node')[1]['id'] + + os.rename( + os.path.join(backup_dir, 'backups', 'node', backup_id), + os.path.join(backup_dir, 'backups', 'node', new_id)) + + pgbench = node.pgbench(options=['-T', '3', '-c', '2', '--no-vacuum']) + pgbench.wait() + + backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') + pgdata = self.pgdata_content(node.data_dir) + + node.cleanup() + self.restore_node(backup_dir, 'node', node, backup_id=backup_id) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) diff --git a/tests/retention.py b/tests/retention.py new file mode 100644 index 00000000..e1104b3a --- /dev/null +++ b/tests/retention.py @@ -0,0 +1,1247 @@ +import os +import unittest +from datetime import datetime, timedelta +from .helpers.ptrack_helpers import ProbackupTest + + +module_name = 'retention' + + +class RetentionTest(ProbackupTest, unittest.TestCase): + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_retention_redundancy_1(self): + """purge backups using redundancy-based retention policy""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'} + ) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + with open(os.path.join( + backup_dir, 'backups', 'node', + "pg_probackup.conf"), "a") as conf: + conf.write("retention-redundancy = 1\n") + + # Make backups to be purged + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type="page") + # Make backups to be keeped + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type="page") + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + # Purge backups + log = self.delete_expired( + backup_dir, 'node', options=['--expired', '--wal']) + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + # Check that WAL segments were deleted + min_wal = None + max_wal = None + for line in log.splitlines(): + if line.startswith("INFO: removed min WAL segment"): + min_wal = line[31:-1] + elif line.startswith("INFO: removed max WAL segment"): + max_wal = line[31:-1] + + if not min_wal: + self.assertTrue(False, "min_wal is empty") + + if not max_wal: + self.assertTrue(False, "max_wal is not set") + + for wal_name in os.listdir(os.path.join(backup_dir, 'wal', 'node')): + if not wal_name.endswith(".backup"): + # wal_name_b = wal_name.encode('ascii') + self.assertEqual(wal_name[8:] > min_wal[8:], True) + self.assertEqual(wal_name[8:] > max_wal[8:], True) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + #@unittest.skip("skip") + def test_retention_window_2(self): + """purge backups using window-based retention policy""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'} + ) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + with open( + os.path.join( + backup_dir, + 'backups', + 'node', + "pg_probackup.conf"), "a") as conf: + conf.write("retention-redundancy = 1\n") + conf.write("retention-window = 1\n") + + # Make backups to be purged + self.backup_node(backup_dir, 'node', node) + self.backup_node(backup_dir, 'node', node, backup_type="page") + # Make backup to be keeped + self.backup_node(backup_dir, 'node', node) + + backups = os.path.join(backup_dir, 'backups', 'node') + days_delta = 5 + for backup in os.listdir(backups): + if backup == 'pg_probackup.conf': + continue + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=days_delta))) + days_delta -= 1 + + # Make backup to be keeped + self.backup_node(backup_dir, 'node', node, backup_type="page") + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + # Purge backups + self.delete_expired(backup_dir, 'node', options=['--expired']) + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + #@unittest.skip("skip") + def test_retention_window_3(self): + """purge all backups using window-based retention policy""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + + # Take FULL BACKUP + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + # Take second FULL BACKUP + backup_id_2 = self.backup_node(backup_dir, 'node', node) + + # Take third FULL BACKUP + backup_id_3 = self.backup_node(backup_dir, 'node', node) + + + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup == 'pg_probackup.conf': + continue + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + # Purge backups + self.delete_expired( + backup_dir, 'node', options=['--retention-window=1', '--expired']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 0) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + # count wal files in ARCHIVE + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_retention_window_4(self): + """purge all backups using window-based retention policy""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + + # Take FULL BACKUPs + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + backup_id_2 = self.backup_node(backup_dir, 'node', node) + + backup_id_3 = self.backup_node(backup_dir, 'node', node) + + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup == 'pg_probackup.conf': + continue + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + self.delete_pb(backup_dir, 'node', backup_id_2) + self.delete_pb(backup_dir, 'node', backup_id_3) + + # Purge backups + self.delete_expired( + backup_dir, 'node', + options=['--retention-window=1', '--expired', '--wal']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 0) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + # count wal files in ARCHIVE + wals_dir = os.path.join(backup_dir, 'wal', 'node') + # n_wals = len(os.listdir(wals_dir)) + + # self.assertTrue(n_wals > 0) + + # self.delete_expired( + # backup_dir, 'node', + # options=['--retention-window=1', '--expired', '--wal']) + + # count again + n_wals = len(os.listdir(wals_dir)) + self.assertTrue(n_wals == 0) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_expire_interleaved_incremental_chains(self): + """complicated case of interleaved backup chains""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + + # Take FULL BACKUPs + + backup_id_a = self.backup_node(backup_dir, 'node', node) + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULL B backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # FULLb ERROR + # FULLa OK + # Take PAGEa1 backup + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + # Change FULL B backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + # Now we start to play with first generation of PAGE backups + # Change PAGEb1 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # Change PAGEa1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + # Change PAGEa2 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + + # Change PAGEb1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGEa2 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_a2, page_id_b2, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + + self.delete_expired( + backup_dir, 'node', + options=['--retention-window=1', '--expired']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 6) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_redundancy_expire_interleaved_incremental_chains(self): + """complicated case of interleaved backup chains""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULL B backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # FULLb ERROR + # FULLa OK + # Take PAGEa1 backup + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + # Change FULL B backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + # Now we start to play with first generation of PAGE backups + # Change PAGEb1 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # Change PAGEa1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + # Change PAGEa2 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + + # Change PAGEb1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGEa2 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + self.delete_expired( + backup_dir, 'node', + options=['--retention-redundancy=1', '--expired']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 3) + + print(self.show_pb( + backup_dir, 'node', as_json=False, as_text=True)) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_merge_interleaved_incremental_chains(self): + """complicated case of interleaved backup chains""" + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + backup_id_b = self.backup_node(backup_dir, 'node', node) + + # Change FULL B backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + # FULLb ERROR + # FULLa OK + # Take PAGEa1 backup + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + # Change FULL B backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + # Now we start to play with first generation of PAGE backups + # Change PAGEb1 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # Change PAGEa1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + # Change PAGEa2 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + + # Change PAGEb1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change PAGEa2 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_a2, page_id_b2, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=['--retention-window=1', '--expired', '--merge-expired']) + + self.assertIn( + "Merge incremental chain between FULL backup {0} and backup {1}".format( + backup_id_a, page_id_a2), + output) + + self.assertIn( + "Merging backup {0} with backup {1}".format( + page_id_a1, backup_id_a), output) + + self.assertIn( + "Rename {0} to {1}".format( + backup_id_a, page_id_a1), output) + + self.assertIn( + "Merging backup {0} with backup {1}".format( + page_id_a2, page_id_a1), output) + + self.assertIn( + "Rename {0} to {1}".format( + page_id_a1, page_id_a2), output) + + self.assertIn( + "Merge incremental chain between FULL backup {0} and backup {1}".format( + backup_id_b, page_id_b2), + output) + + self.assertIn( + "Merging backup {0} with backup {1}".format( + page_id_b1, backup_id_b), output) + + self.assertIn( + "Rename {0} to {1}".format( + backup_id_b, page_id_b1), output) + + self.assertIn( + "Merging backup {0} with backup {1}".format( + page_id_b2, page_id_b1), output) + + self.assertIn( + "Rename {0} to {1}".format( + page_id_b1, page_id_b2), output) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_merge_interleaved_incremental_chains_1(self): + """ + PAGEb3 + PAGEb2 + PAGEb1 + PAGEa1 + FULLb + FULLa + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + pgbench = node.pgbench(options=['-t', '10', '-c', '2']) + pgbench.wait() + + backup_id_b = self.backup_node(backup_dir, 'node', node) + pgbench = node.pgbench(options=['-t', '10', '-c', '2']) + pgbench.wait() + + # Change FULL B backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgdata_a1 = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-t', '10', '-c', '2']) + pgbench.wait() + + # PAGEa1 OK + # FULLb ERROR + # FULLa OK + # Change FULL B backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-t', '10', '-c', '2']) + pgbench.wait() + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-t', '10', '-c', '2']) + pgbench.wait() + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgdata_b3 = self.pgdata_content(node.data_dir) + + pgbench = node.pgbench(options=['-t', '10', '-c', '2']) + pgbench.wait() + + # PAGEb3 OK + # PAGEb2 OK + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # PAGEb3 OK + # PAGEb2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_a1, page_id_b3, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=['--retention-window=1', '--expired', '--merge-expired']) + + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['id'], + page_id_b3) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['id'], + page_id_a1) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['backup-mode'], + 'FULL') + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['backup-mode'], + 'FULL') + + node.cleanup() + + # Data correctness of PAGEa3 + self.restore_node(backup_dir, 'node', node, backup_id=page_id_a1) + pgdata_restored_a1 = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata_a1, pgdata_restored_a1) + + node.cleanup() + + # Data correctness of PAGEb3 + self.restore_node(backup_dir, 'node', node, backup_id=page_id_b3) + pgdata_restored_b3 = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata_b3, pgdata_restored_b3) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_merge_multiple_descendants(self): + """ + PAGEb3 + | PAGEa3 + -----------------------------retention window + PAGEb2 / + | PAGEa2 / should be deleted + PAGEb1 \ / + | PAGEa1 + FULLb | + FULLa + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # Take FULL BACKUPs + backup_id_a = self.backup_node(backup_dir, 'node', node) + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + backup_id_b = self.backup_node(backup_dir, 'node', node) + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + # Change FULLb backup status to ERROR + self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') + + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + # Change FULLb backup status to OK + self.change_backup_status(backup_dir, 'node', backup_id_b, 'OK') + + # Change PAGEa1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a1, 'ERROR') + + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb1 OK + # PAGEa1 ERROR + # FULLb OK + # FULLa OK + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + # Change PAGEa1 backup status to OK + self.change_backup_status(backup_dir, 'node', page_id_a1, 'OK') + + # Change PAGEb1 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + # PAGEa2 OK + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEb1 backup status to OK + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # Change PAGEa2 backup status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a2, 'ERROR') + + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEb2 and PAGEb1 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_b2, 'ERROR') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'ERROR') + + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + page_id_a3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + # PAGEa3 OK + # PAGEb2 ERROR + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEa3 status to ERROR + self.change_backup_status(backup_dir, 'node', page_id_a3, 'ERROR') + + # Change PAGEb2 status to OK + self.change_backup_status(backup_dir, 'node', page_id_b2, 'OK') + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # PAGEb3 OK + # PAGEa3 ERROR + # PAGEb2 OK + # PAGEa2 ERROR + # PAGEb1 ERROR + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Change PAGEa3, PAGEa2 and PAGEb1 status to OK + self.change_backup_status(backup_dir, 'node', page_id_a3, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK') + self.change_backup_status(backup_dir, 'node', page_id_b1, 'OK') + + # PAGEb3 OK + # PAGEa3 OK + # PAGEb2 OK + # PAGEa2 OK + # PAGEb1 OK + # PAGEa1 OK + # FULLb OK + # FULLa OK + + # Check that page_id_a3 and page_id_a2 are both direct descendants of page_id_a1 + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id=page_id_a3)['parent-backup-id'], + page_id_a1) + + self.assertEqual( + self.show_pb(backup_dir, 'node', backup_id=page_id_a2)['parent-backup-id'], + page_id_a1) + + print("Backups {0} and {1} are children of {2}".format( + page_id_a3, page_id_a2, page_id_a1)) + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_a3, page_id_b3, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=[ + '--retention-window=1', '--expired', + '--merge-expired', '--log-level-console=log']) + + print(output) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 3) + + # Merging chain A + self.assertIn( + "Merge incremental chain between FULL backup {0} and backup {1}".format( + backup_id_a, page_id_a3), + output) + + self.assertIn( + "Merging backup {0} with backup {1}".format( + page_id_a1, backup_id_a), output) + + self.assertIn( + "INFO: Rename {0} to {1}".format( + backup_id_a, page_id_a1), output) + + self.assertIn( + "WARNING: Backup {0} has multiple valid descendants. " + "Automatic merge is not possible.".format( + page_id_a1), output) + + # Merge chain B + self.assertIn( + "Merge incremental chain between FULL backup {0} and backup {1}".format( + backup_id_b, page_id_b3), + output) + + self.assertIn( + "Merging backup {0} with backup {1}".format( + page_id_b1, backup_id_b), output) + + self.assertIn( + "INFO: Rename {0} to {1}".format( + backup_id_b, page_id_b1), output) + + self.assertIn( + "Merging backup {0} with backup {1}".format( + page_id_b2, page_id_b1), output) + + self.assertIn( + "INFO: Rename {0} to {1}".format( + page_id_b1, page_id_b2), output) + + self.assertIn( + "Merging backup {0} with backup {1}".format( + page_id_b3, page_id_b2), output) + + self.assertIn( + "INFO: Rename {0} to {1}".format( + page_id_b2, page_id_b3), output) + + # this backup deleted because it is not guarded by retention + self.assertIn( + "INFO: Delete: {0}".format( + page_id_a1), output) + + + self.assertEqual( + self.show_pb(backup_dir, 'node')[2]['id'], + page_id_b3) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['id'], + page_id_a3) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['id'], + page_id_a1) + + self.assertEqual( + self.show_pb(backup_dir, 'node')[2]['backup-mode'], + 'FULL') + + self.assertEqual( + self.show_pb(backup_dir, 'node')[1]['backup-mode'], + 'PAGE') + + self.assertEqual( + self.show_pb(backup_dir, 'node')[0]['backup-mode'], + 'FULL') + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_chains(self): + """ + PAGE + -------window + PAGE + PAGE + FULL + PAGE + PAGE + FULL + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # Chain A + backup_id_a = self.backup_node(backup_dir, 'node', node) + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Chain B + backup_id_b = self.backup_node(backup_dir, 'node', node) + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + pgbench = node.pgbench(options=['-T', '10', '-c', '2']) + pgbench.wait() + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_b3, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=[ + '--retention-window=1', '--expired', + '--merge-expired', '--log-level-console=log']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 1) + + node.cleanup() + + self.restore_node(backup_dir, 'node', node) + + pgdata_restored = self.pgdata_content(node.data_dir) + self.compare_pgdata(pgdata, pgdata_restored) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + + # @unittest.skip("skip") + def test_window_chains_1(self): + """ + PAGE + -------window + PAGE + PAGE + FULL + PAGE + PAGE + FULL + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # Chain A + backup_id_a = self.backup_node(backup_dir, 'node', node) + page_id_a1 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Chain B + backup_id_b = self.backup_node(backup_dir, 'node', node) + + page_id_b1 = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + page_id_b2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + page_id_b3 = self.backup_node( + backup_dir, 'node', node, backup_type='delta') + + pgdata = self.pgdata_content(node.data_dir) + + # Purge backups + backups = os.path.join(backup_dir, 'backups', 'node') + for backup in os.listdir(backups): + if backup in [page_id_b3, 'pg_probackup.conf']: + continue + + with open( + os.path.join( + backups, backup, "backup.control"), "a") as conf: + conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( + datetime.now() - timedelta(days=3))) + + output = self.delete_expired( + backup_dir, 'node', + options=[ + '--retention-window=1', + '--merge-expired', '--log-level-console=log']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) + + self.assertIn( + "There are no backups to delete by retention policy", + output) + + self.assertIn( + "Retention merging finished", + output) + + output = self.delete_expired( + backup_dir, 'node', + options=[ + '--retention-window=1', + '--expired', '--log-level-console=log']) + + self.assertEqual(len(self.show_pb(backup_dir, 'node')), 1) + + self.assertIn( + "There are no backups to merge by retention policy", + output) + + self.assertIn( + "Purging finished", + output) + + # Clean after yourself + self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_window_error_backups(self): + """ + PAGE ERROR + -------window + PAGE ERROR + PAGE ERROR + PAGE ERROR + FULL ERROR + FULL + -------redundancy + """ + fname = self.id().split('.')[3] + node = self.make_simple_node( + base_dir=os.path.join(module_name, fname, 'node'), + initdb_params=['--data-checksums']) + + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.slow_start() + + node.pgbench_init(scale=3) + + # Take FULL BACKUPs + backup_id_a1 = self.backup_node(backup_dir, 'node', node) + page_id_a2 = self.backup_node( + backup_dir, 'node', node, backup_type='page') + + # Change FULLb backup status to ERROR + #self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR') diff --git a/tests/retention_test.py b/tests/retention_test.py deleted file mode 100644 index 34c02658..00000000 --- a/tests/retention_test.py +++ /dev/null @@ -1,178 +0,0 @@ -import os -import unittest -from datetime import datetime, timedelta -from .helpers.ptrack_helpers import ProbackupTest - - -module_name = 'retention' - - -class RetentionTest(ProbackupTest, unittest.TestCase): - - # @unittest.skip("skip") - # @unittest.expectedFailure - def test_retention_redundancy_1(self): - """purge backups using redundancy-based retention policy""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir=os.path.join(module_name, fname, 'node'), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.slow_start() - - with open(os.path.join( - backup_dir, 'backups', 'node', - "pg_probackup.conf"), "a") as conf: - conf.write("retention-redundancy = 1\n") - - # Make backups to be purged - self.backup_node(backup_dir, 'node', node) - self.backup_node(backup_dir, 'node', node, backup_type="page") - # Make backups to be keeped - self.backup_node(backup_dir, 'node', node) - self.backup_node(backup_dir, 'node', node, backup_type="page") - - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) - - # Purge backups - log = self.delete_expired(backup_dir, 'node') - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) - - # Check that WAL segments were deleted - min_wal = None - max_wal = None - for line in log.splitlines(): - if line.startswith("INFO: removed min WAL segment"): - min_wal = line[31:-1] - elif line.startswith("INFO: removed max WAL segment"): - max_wal = line[31:-1] - - if not min_wal: - self.assertTrue(False, "min_wal is empty") - - if not max_wal: - self.assertTrue(False, "max_wal is not set") - - for wal_name in os.listdir(os.path.join(backup_dir, 'wal', 'node')): - if not wal_name.endswith(".backup"): - # wal_name_b = wal_name.encode('ascii') - self.assertEqual(wal_name[8:] > min_wal[8:], True) - self.assertEqual(wal_name[8:] > max_wal[8:], True) - - # Clean after yourself - self.del_test_dir(module_name, fname) - -# @unittest.skip("123") - def test_retention_window_2(self): - """purge backups using window-based retention policy""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir=os.path.join(module_name, fname, 'node'), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.slow_start() - - with open( - os.path.join( - backup_dir, - 'backups', - 'node', - "pg_probackup.conf"), "a") as conf: - conf.write("retention-redundancy = 1\n") - conf.write("retention-window = 1\n") - - # Make backups to be purged - self.backup_node(backup_dir, 'node', node) - self.backup_node(backup_dir, 'node', node, backup_type="page") - # Make backup to be keeped - self.backup_node(backup_dir, 'node', node) - - backups = os.path.join(backup_dir, 'backups', 'node') - days_delta = 5 - for backup in os.listdir(backups): - if backup == 'pg_probackup.conf': - continue - with open( - os.path.join( - backups, backup, "backup.control"), "a") as conf: - conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( - datetime.now() - timedelta(days=days_delta))) - days_delta -= 1 - - # Make backup to be keeped - self.backup_node(backup_dir, 'node', node, backup_type="page") - - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4) - - # Purge backups - self.delete_expired(backup_dir, 'node') - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) - - # Clean after yourself - self.del_test_dir(module_name, fname) - -# @unittest.skip("123") - def test_retention_wal(self): - """purge backups using window-based retention policy""" - fname = self.id().split('.')[3] - node = self.make_simple_node( - base_dir=os.path.join(module_name, fname, 'node'), - initdb_params=['--data-checksums'], - pg_options={'wal_level': 'replica'} - ) - backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') - self.init_pb(backup_dir) - self.add_instance(backup_dir, 'node', node) - self.set_archiving(backup_dir, 'node', node) - node.slow_start() - - node.safe_psql( - "postgres", - "create table t_heap as select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,100500) i") - - # Take FULL BACKUP - self.backup_node(backup_dir, 'node', node) - node.safe_psql( - "postgres", - "insert into t_heap select i as id, md5(i::text) as text, " - "md5(repeat(i::text,10))::tsvector as tsvector " - "from generate_series(0,100500) i") - - self.backup_node(backup_dir, 'node', node) - - backups = os.path.join(backup_dir, 'backups', 'node') - days_delta = 5 - for backup in os.listdir(backups): - if backup == 'pg_probackup.conf': - continue - with open( - os.path.join( - backups, backup, "backup.control"), "a") as conf: - conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format( - datetime.now() - timedelta(days=days_delta))) - days_delta -= 1 - - # Make backup to be keeped - self.backup_node(backup_dir, 'node', node, backup_type="page") - - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 3) - - # Purge backups - self.delete_expired( - backup_dir, 'node', options=['--retention-window=2']) - self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2) - - # Clean after yourself - self.del_test_dir(module_name, fname) diff --git a/tests/snapfs.py b/tests/snapfs.py index a8acc835..3ea05e8e 100644 --- a/tests/snapfs.py +++ b/tests/snapfs.py @@ -9,8 +9,8 @@ module_name = 'snapfs' class SnapFSTest(ProbackupTest, unittest.TestCase): - # @unittest.skip("skip") # @unittest.expectedFailure + @unittest.skipUnless(ProbackupTest.enterprise, 'skip') def test_snapfs_simple(self): """standart backup modes with ARCHIVE WAL method""" fname = self.id().split('.')[3] diff --git a/tests/validate_test.py b/tests/validate.py similarity index 97% rename from tests/validate_test.py rename to tests/validate.py index 8ad30459..16a2a03c 100644 --- a/tests/validate_test.py +++ b/tests/validate.py @@ -61,7 +61,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): '{0} blknum 1, empty page'.format(file_path) in f.read(), 'Failed to detect nullified block') - self.validate_pb(backup_dir) + self.validate_pb(backup_dir, options=["-j", "4"]) node.cleanup() self.restore_node(backup_dir, 'node', node) @@ -106,10 +106,10 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate to real time self.assertIn( - "INFO: backup validation completed successfully", + "INFO: Backup validation completed successfully", self.validate_pb( backup_dir, 'node', - options=["--time={0}".format(target_time)]), + options=["--time={0}".format(target_time), "-j", "4"]), '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(self.output), self.cmd)) @@ -118,7 +118,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): try: self.validate_pb( backup_dir, 'node', options=["--time={0}".format( - unreal_time_1)]) + unreal_time_1), "-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of validation to unreal time.\n " @@ -136,7 +136,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): try: self.validate_pb( backup_dir, 'node', - options=["--time={0}".format(unreal_time_2)]) + options=["--time={0}".format(unreal_time_2), "-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of validation to unreal time.\n " @@ -144,7 +144,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): repr(self.output), self.cmd)) except ProbackupException as e: self.assertTrue( - 'ERROR: not enough WAL records to time' in e.message, + 'ERROR: Not enough WAL records to time' in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(e.message), self.cmd)) @@ -156,11 +156,13 @@ class ValidateTest(ProbackupTest, unittest.TestCase): con.commit() target_xid = res[0][0] self.switch_wal_segment(node) + time.sleep(5) self.assertIn( - "INFO: backup validation completed successfully", + "INFO: Backup validation completed successfully", self.validate_pb( - backup_dir, 'node', options=["--xid={0}".format(target_xid)]), + backup_dir, 'node', options=["--xid={0}".format(target_xid), + "-j", "4"]), '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(self.output), self.cmd)) @@ -168,7 +170,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): unreal_xid = int(target_xid) + 1000 try: self.validate_pb( - backup_dir, 'node', options=["--xid={0}".format(unreal_xid)]) + backup_dir, 'node', options=["--xid={0}".format(unreal_xid), + "-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of validation to unreal xid.\n " @@ -176,12 +179,13 @@ class ValidateTest(ProbackupTest, unittest.TestCase): repr(self.output), self.cmd)) except ProbackupException as e: self.assertTrue( - 'ERROR: not enough WAL records to xid' in e.message, + 'ERROR: Not enough WAL records to xid' in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(e.message), self.cmd)) # Validate with backup ID - output = self.validate_pb(backup_dir, 'node', backup_id) + output = self.validate_pb(backup_dir, 'node', backup_id, + options=["-j", "4"]) self.assertIn( "INFO: Validating backup {0}".format(backup_id), output, @@ -267,7 +271,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Simple validate try: self.validate_pb( - backup_dir, 'node', backup_id=backup_id_2) + backup_dir, 'node', backup_id=backup_id_2, options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -370,7 +374,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate PAGE1 try: self.validate_pb( - backup_dir, 'node', backup_id=backup_id_2) + backup_dir, 'node', backup_id=backup_id_2, options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -469,7 +473,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate PAGE1 try: self.validate_pb( - backup_dir, 'node', backup_id=backup_id_2) + backup_dir, 'node', backup_id=backup_id_2, options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because backup has status ERROR.\n " @@ -556,7 +560,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate instance try: - self.validate_pb(backup_dir) + self.validate_pb(backup_dir, options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because backup has status ERROR.\n " @@ -695,7 +699,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): try: self.validate_pb( backup_dir, 'node', - backup_id=backup_id_4) + backup_id=backup_id_4, options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n" @@ -895,7 +899,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.validate_pb( backup_dir, 'node', options=[ - '-i', backup_id_4, '--xid={0}'.format(target_xid)]) + '-i', backup_id_4, '--xid={0}'.format(target_xid), + "-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -1036,8 +1041,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate Instance try: - self.validate_pb( - backup_dir, 'node') + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -1185,7 +1189,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate Instance try: - self.validate_pb(backup_dir, 'node') + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: @@ -1286,7 +1290,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate Instance try: - self.validate_pb(backup_dir, 'node') + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data files corruption.\n " @@ -1344,7 +1348,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Simple validate try: - self.validate_pb(backup_dir, 'node') + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of wal segments corruption.\n" @@ -1414,7 +1418,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): 'node', backup_id, options=[ - "--xid={0}".format(target_xid)]) + "--xid={0}".format(target_xid), "-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of wal segments corruption.\n" @@ -1471,7 +1475,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): file = file[:-3] try: - self.validate_pb(backup_dir, 'node') + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of wal segment disappearance.\n" @@ -1495,7 +1499,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Run validate again try: - self.validate_pb(backup_dir, 'node', backup_id) + self.validate_pb(backup_dir, 'node', backup_id, options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of backup corruption.\n" @@ -1579,7 +1583,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): 'node', backup_id, options=[ - "--xid={0}".format(target_xid)]) + "--xid={0}".format(target_xid), "-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of wal segments corruption.\n" @@ -1587,9 +1591,9 @@ class ValidateTest(ProbackupTest, unittest.TestCase): repr(self.output), self.cmd)) except ProbackupException as e: self.assertTrue( - 'ERROR: not enough WAL records to xid' in e.message and - 'WARNING: recovery can be done up to time' in e.message and - "ERROR: not enough WAL records to xid {0}\n".format( + 'ERROR: Not enough WAL records to xid' in e.message and + 'WARNING: Recovery can be done up to time' in e.message and + "ERROR: Not enough WAL records to xid {0}\n".format( target_xid), '\n Unexpected Error Message: {0}\n CMD: {1}'.format( repr(e.message), self.cmd)) @@ -1633,7 +1637,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): try: self.validate_pb( backup_dir, 'node', - options=["--time={0}".format(recovery_time)]) + options=["--time={0}".format(recovery_time), "-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of wal segment disappearance.\n " @@ -1673,7 +1677,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): backup_dir, 'node', backup_id)['recovery-time'] self.validate_pb( - backup_dir, 'node', options=["--time={0}".format(recovery_time)]) + backup_dir, 'node', options=["--time={0}".format(recovery_time), + "-j", "4"]) # Clean after yourself self.del_test_dir(module_name, fname) @@ -1830,7 +1835,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(file, file_new) try: - self.validate_pb(backup_dir) + self.validate_pb(backup_dir, options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data file dissapearance.\n " @@ -1868,7 +1873,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(file_new, file) try: - self.validate_pb(backup_dir) + self.validate_pb(backup_dir, options=["-j", "4"]) except ProbackupException as e: self.assertIn( 'WARNING: Some backups are not valid'.format( @@ -1933,7 +1938,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(file, file_new) try: - self.validate_pb(backup_dir) + self.validate_pb(backup_dir, options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data file dissapearance.\n " @@ -1973,7 +1978,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(file, file_new) try: - self.validate_pb(backup_dir) + self.validate_pb(backup_dir, options=["-j", "4"]) except ProbackupException as e: self.assertIn( 'WARNING: Some backups are not valid'.format( @@ -2047,7 +2052,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(file, file_new) try: - self.validate_pb(backup_dir, 'node', validate_id) + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data file dissapearance.\n " @@ -2094,7 +2100,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.backup_node(backup_dir, 'node', node, backup_type='page') try: - self.validate_pb(backup_dir, 'node') + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data file dissapearance.\n " @@ -2162,7 +2168,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # revalidate again try: - self.validate_pb(backup_dir, 'node', validate_id) + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data file dissapearance.\n " @@ -2235,7 +2242,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Fix CORRUPT os.rename(file_new, file) - output = self.validate_pb(backup_dir, 'node', validate_id) + output = self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) self.assertIn( 'WARNING: Backup {0} has status: ORPHAN'.format(validate_id), @@ -2397,7 +2405,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(file, file_new) try: - self.validate_pb(backup_dir) + self.validate_pb(backup_dir, options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of data file dissapearance.\n " @@ -2439,7 +2447,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(old_directory, new_directory) try: - self.validate_pb(backup_dir) + self.validate_pb(backup_dir, options=["-j", "4"]) except ProbackupException as e: self.assertIn( 'WARNING: Some backups are not valid', e.message, @@ -2483,7 +2491,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # second time must be provided with ID of missing backup try: - self.validate_pb(backup_dir) + self.validate_pb(backup_dir, options=["-j", "4"]) except ProbackupException as e: self.assertIn( 'WARNING: Some backups are not valid', e.message, @@ -2528,7 +2536,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.assertTrue(self.show_pb(backup_dir, 'node')[1]['status'] == 'OK') self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') - output = self.validate_pb(backup_dir) + output = self.validate_pb(backup_dir, options=["-j", "4"]) self.assertIn( 'INFO: All backups are valid', @@ -2704,7 +2712,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(old_directory, new_directory) try: - self.validate_pb(backup_dir, 'node', validate_id) + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of backup dissapearance.\n " @@ -2744,7 +2753,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.assertTrue(self.show_pb(backup_dir, 'node')[0]['status'] == 'OK') try: - self.validate_pb(backup_dir, 'node', validate_id) + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of backup dissapearance.\n " @@ -2773,7 +2783,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(new_directory, old_directory) # Revalidate backup chain - self.validate_pb(backup_dir, 'node', validate_id) + self.validate_pb(backup_dir, 'node', validate_id, options=["-j", "4"]) self.assertTrue(self.show_pb(backup_dir, 'node')[11]['status'] == 'OK') self.assertTrue(self.show_pb(backup_dir, 'node')[10]['status'] == 'OK') @@ -2851,7 +2861,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(full_old_directory, full_new_directory) try: - self.validate_pb(backup_dir, 'node', validate_id) + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of backup dissapearance.\n " @@ -2894,7 +2905,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(full_new_directory, full_old_directory) # Revalidate backup chain - self.validate_pb(backup_dir, 'node', validate_id) + self.validate_pb(backup_dir, 'node', validate_id, options=["-j", "4"]) self.assertTrue(self.show_pb(backup_dir, 'node')[11]['status'] == 'OK') self.assertTrue(self.show_pb(backup_dir, 'node')[10]['status'] == 'OK') @@ -2974,7 +2985,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(full_old_directory, full_new_directory) try: - self.validate_pb(backup_dir, 'node', validate_id) + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of backup dissapearance.\n " @@ -3017,7 +3029,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Revalidate backup chain try: - self.validate_pb(backup_dir, 'node', validate_id) + self.validate_pb(backup_dir, 'node', validate_id, + options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of backup dissapearance.\n " @@ -3082,7 +3095,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(full_new_directory, full_old_directory) # Revalidate chain - self.validate_pb(backup_dir, 'node', validate_id) + self.validate_pb(backup_dir, 'node', validate_id, options=["-j", "4"]) self.assertTrue(self.show_pb(backup_dir, 'node')[11]['status'] == 'OK') self.assertTrue(self.show_pb(backup_dir, 'node')[10]['status'] == 'OK') @@ -3158,7 +3171,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.rename(full_old_directory, full_new_directory) try: - self.validate_pb(backup_dir, 'node') + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of backup dissapearance.\n " @@ -3207,7 +3220,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Revalidate backup chain try: - self.validate_pb(backup_dir, 'node') + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of backup dissapearance.\n " @@ -3316,7 +3329,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase): # Validate backup try: - self.validate_pb(backup_dir, 'node') + self.validate_pb(backup_dir, 'node', options=["-j", "4"]) self.assertEqual( 1, 0, "Expecting Error because of pg_control change.\n "