diff --git a/bin/pgbackrest b/bin/pgbackrest index 97a04a2c6..19876f079 100755 --- a/bin/pgbackrest +++ b/bin/pgbackrest @@ -109,6 +109,14 @@ eval exitSafe(new pgBackRest::Archive()->process()); } + ################################################################################################################################ + # Process check command + ################################################################################################################################ + if (commandTest(CMD_CHECK)) + { + exitSafe(new pgBackRest::Archive()->check()); + } + ################################################################################################################################ # Process start/stop commands ################################################################################################################################ diff --git a/doc/lib/BackRestDoc/Common/DocRender.pm b/doc/lib/BackRestDoc/Common/DocRender.pm index 713c11fe0..c193ef6b1 100644 --- a/doc/lib/BackRestDoc/Common/DocRender.pm +++ b/doc/lib/BackRestDoc/Common/DocRender.pm @@ -66,6 +66,7 @@ my $oRenderTag = 'path' => ['', ''], 'cmd' => ['', ''], 'br-option' => ['', ''], + 'pg-setting' => ['', ''], 'param' => ['', ''], 'setting' => ['', ''], 'code' => ['', ''], diff --git a/doc/xml/backlog.xml b/doc/xml/backlog.xml index 75ff0f100..c65227635 100644 --- a/doc/xml/backlog.xml +++ b/doc/xml/backlog.xml @@ -32,14 +32,6 @@

If crashes during a backup it may not be able to recover if the backup label is present. Copy and delete right after start_backup(). Stop backup will want to delete it so it might be necessary to copy it back or at least touch a file that can delete. Check after the backup is complete to make sure it's really gone.

-
- Ability to test that WAL archiving is working - -

Github Issue

- -

Add a new command, archive-test, that will execute pg_switch_xlog and check that the xlog makes it to the archive.

-
-
Abandon threads and go to processes diff --git a/doc/xml/reference.xml b/doc/xml/reference.xml index 4731d1763..8457f9c96 100644 --- a/doc/xml/reference.xml +++ b/doc/xml/reference.xml @@ -72,8 +72,16 @@ The general section defines settings that are shared between multiple operations. - + + + Archive timeout. + + Set maximum time, in seconds, to wait for WAL segments to reach the archive. The timeout applies to the check command and to the backup command when waiting for WAL segments required to make the backup consistent to be archived. + + 30 + + Buffer size for file operations. @@ -125,7 +133,7 @@ Database query timeout. - Sets the timeout for queries against the database. This includes the pg_start_backup() and pg_stop_backup() functions which can each take a substantial amount of time. Because of this the timeout should be kept high unless you know that these functions will return quickly (i.e. if you have set startfast=y and you know that the database cluster will not generate many WAL segments during the backup). + Sets the timeout, in seconds, for queries against the database. This includes the pg_start_backup() and pg_stop_backup() functions which can each take a substantial amount of time. Because of this the timeout should be kept high unless you know that these functions will return quickly (i.e. if you have set startfast=y and you know that the database cluster will not generate many WAL segments during the backup). 600 @@ -203,7 +211,6 @@ The backup section defines settings related to backup. - @@ -225,6 +232,7 @@ y + Backup host when operating remotely via SSH. @@ -620,6 +628,24 @@ + + + Check the configuration. + + The check command validates that and the archive_command setting are configured correctly for archiving and backups. It detects misconfigurations, particularly in archiving, that result in incomplete backups because required WAL segments did not reach the archive. The command can be run on the database or the backup host. + + Note that pg_create_restore_point('pgBackRest Archive Check') and pg_switch_xlog() are called to force to archive a WAL segment. Restore points are only supported in >= 9.1 so for older versions the check command may fail if there has been no write activity since the last log rotation. + + + + + {[backrest-exe]} --stanza=db check + + + + + + Expire backups that exceed retention. diff --git a/doc/xml/release.xml b/doc/xml/release.xml index c2b0ab12a..31aac8d6e 100644 --- a/doc/xml/release.xml +++ b/doc/xml/release.xml @@ -85,6 +85,20 @@ + + + + + + + + + +

Added check command to validate that is configured correctly for archiving and backups.

+
+
+
+ diff --git a/doc/xml/user-guide.xml b/doc/xml/user-guide.xml index f4e5ab70e..a724b29a3 100644 --- a/doc/xml/user-guide.xml +++ b/doc/xml/user-guide.xml @@ -412,6 +412,33 @@ --> + +
+ + +
+ Check the Configuration + + + + + Check the configuration + + + {[project-exe]} {[dash]}-stanza={[postgres-cluster-demo]} {[dash]}-log-level-console=info check + successfully stored in the archive at + + + + + + Example of an invalid configuration + + + {[project-exe]} {[dash]}-stanza={[postgres-cluster-demo]} --archive-timeout=.1 check + could not find WAL segment|did not reach the archive + +
@@ -1251,6 +1278,25 @@

Commands are run the same as on a single host configuration except that the backup and expire command are run from the backup host and all other commands are run from the database host.

+ +

Check that the configuration is correct on both the database and backup hosts. More information about the check command can be found in Check the Configuration.

+ + + Check the configuration + + + {[project-exe]} {[dash]}-stanza={[postgres-cluster-demo]} check + + + + + Check the configuration + + + {[project-exe]} {[dash]}-stanza={[postgres-cluster-demo]} check + + + diff --git a/lib/pgBackRest/Archive.pm b/lib/pgBackRest/Archive.pm index aaa53f468..56b125189 100644 --- a/lib/pgBackRest/Archive.pm +++ b/lib/pgBackRest/Archive.pm @@ -11,6 +11,7 @@ use Exporter qw(import); our @EXPORT = qw(); use Fcntl qw(SEEK_CUR O_RDONLY O_WRONLY O_CREAT); use File::Basename qw(dirname basename); +use Scalar::Util qw(blessed); use lib dirname($0); use pgBackRest::Common::Exception; @@ -235,7 +236,7 @@ sub walFileName # If waiting and no WAL segment was found then throw an error if (@stryWalFileName == 0 && defined($iWaitSeconds)) { - confess &log(ERROR, "could not find WAL segment ${strWalSegment} after " . waitInterval($oWait) . ' second(s)'); + confess &log(ERROR, "could not find WAL segment ${strWalSegment} after ${iWaitSeconds} second(s)", ERROR_ARCHIVE_TIMEOUT); } # Return from function and log return values if any @@ -511,7 +512,7 @@ sub pushProcess $self->push($ARGV[1], $bArchiveAsync); - # Fork is async archiving is enabled + # Fork if async archiving is enabled if ($bArchiveAsync) { # Fork and disable the async archive flag if this is the parent process @@ -1035,4 +1036,134 @@ sub range ); } +#################################################################################################################################### +# check +# +# Validates the database configuration and checks that the archive logs can be read by backup. This will alert the user to any +# misconfiguration, particularly of archiving, that would result in the inability of a backup to complete (e.g waiting at the end +# until it times out because it could not find the WAL file). +#################################################################################################################################### +sub check +{ + my $self = shift; + + # Assign function parameters, defaults, and log debug info + my $strOperation = logDebugParam(__PACKAGE__ . '->check'); + + # Initialize default file object + $self->{oFile} = new pgBackRest::File + ( + optionGet(OPTION_STANZA), + optionGet(OPTION_REPO_PATH), + optionRemoteType(), + protocolGet() + ); + + # Initialize the database object + $self->{oDb} = new pgBackRest::Db(); + + # Validate the database configuration + $self->{oDb}->configValidate($self->{oFile}, optionGet(OPTION_DB_PATH)); + + # Force archiving + my $strWalSegment = $self->{oDb}->xlogSwitch(); + + # Get the timeout and error message to display - if it is 0 we are testing + my $iArchiveTimeout = optionGet(OPTION_ARCHIVE_TIMEOUT); + + # Initialize the result variables + my $iResult = 0; + my $strResultMessage = undef; + + # Record the start time to wait for the archive.info file to be written + my $oWait = waitInit($iArchiveTimeout); + + my $strArchiveId = undef; + my $strArchiveFile = undef; + + # Turn off console logging to control when to display the error + logLevelSet(undef, OFF); + + # Wait for the archive.info to be written. If it does not get written within the timout period then report the last error. + do + { + eval + { + $strArchiveId = $self->getCheck($self->{oFile}); + + # Clear any previous errors if we've found the archive.info + $iResult = 0; + }; + + if ($@) + { + my $oMessage = $@; + + # If this is a backrest error then capture the last code and message else confess + if (blessed($oMessage) && $oMessage->isa('pgBackRest::Common::Exception')) + { + $iResult = $oMessage->code(); + $strResultMessage = $oMessage->message(); + } + else + { + confess $oMessage; + } + } + } while (!defined($strArchiveId) && waitMore($oWait)); + + # If able to get the archive id then check the archived WAL file with the time remaining + if ($iResult == 0) + { + eval + { + $strArchiveFile = $self->walFileName($self->{oFile}, $strArchiveId, $strWalSegment, false, $iArchiveTimeout); + }; + + # If this is a backrest error then capture the last code and message else confess + if ($@) + { + my $oMessage = $@; + + # If a backrest exception then return the code else confess + if (blessed($oMessage) && $oMessage->isa('pgBackRest::Common::Exception')) + { + $iResult = $oMessage->code(); + $strResultMessage = $oMessage->message(); + } + else + { + confess $oMessage; + } + } + } + + # Reset the console logging + logLevelSet(undef, optionGet(OPTION_LOG_LEVEL_CONSOLE)); + + # Display results + if ($iResult == 0) + { + &log(INFO, + "WAL segment ${strWalSegment} successfully stored in the archive at '" . + $self->{oFile}->pathGet(PATH_BACKUP_ARCHIVE, "$strArchiveId/${strArchiveFile}") . "'"); + } + else + { + &log(ERROR, $strResultMessage, $iResult); + &log(WARN, + "WAL segment ${strWalSegment} did not reach the archive:\n" . + "HINT: Check the archive_command to ensure that all options are correct (especialy --stanza).\n" . + "HINT: Check the PostreSQL server log for errors."); + } + + # Return from function and log return values if any + return logDebugReturn + ( + $strOperation, + {name => 'iResult', value => $iResult, trace => true} + ); + +} + 1; diff --git a/lib/pgBackRest/Backup.pm b/lib/pgBackRest/Backup.pm index 93ecda226..3bb574307 100644 --- a/lib/pgBackRest/Backup.pm +++ b/lib/pgBackRest/Backup.pm @@ -855,7 +855,7 @@ sub process # Create the modification time for the archive logs my $lModificationTime = time(); - # After the backup has been stopped, need to make a copy of the archive logs need to make the db consistent + # After the backup has been stopped, need to make a copy of the archive logs to make the db consistent logDebugMisc($strOperation, "retrieve archive logs ${strArchiveStart}:${strArchiveStop}"); my $oArchive = new pgBackRest::Archive(); my $strArchiveId = $oArchive->getCheck($self->{oFile}); @@ -863,7 +863,7 @@ sub process foreach my $strArchive (@stryArchive) { - my $strArchiveFile = $oArchive->walFileName($self->{oFile}, $strArchiveId, $strArchive, false, 600); + my $strArchiveFile = $oArchive->walFileName($self->{oFile}, $strArchiveId, $strArchive, false, optionGet(OPTION_ARCHIVE_TIMEOUT)); if (optionGet(OPTION_BACKUP_ARCHIVE_COPY)) { diff --git a/lib/pgBackRest/Common/Exception.pm b/lib/pgBackRest/Common/Exception.pm index e39bee91b..e375766ea 100644 --- a/lib/pgBackRest/Common/Exception.pm +++ b/lib/pgBackRest/Common/Exception.pm @@ -132,6 +132,8 @@ use constant ERROR_DB_MISSING => ERROR_MIN push @EXPORT, qw(ERROR_DB_MISSING); use constant ERROR_DB_INVALID => ERROR_MINIMUM + 56; push @EXPORT, qw(ERROR_DB_INVALID); +use constant ERROR_ARCHIVE_TIMEOUT => ERROR_MINIMUM + 57; + push @EXPORT, qw(ERROR_ARCHIVE_TIMEOUT); use constant ERROR_INVALID_VALUE => ERROR_MAXIMUM - 1; push @EXPORT, qw(ERROR_INVALID_VALUE); diff --git a/lib/pgBackRest/Common/Wait.pm b/lib/pgBackRest/Common/Wait.pm index aee40211b..2ba58f903 100644 --- a/lib/pgBackRest/Common/Wait.pm +++ b/lib/pgBackRest/Common/Wait.pm @@ -17,6 +17,12 @@ use Time::HiRes qw(gettimeofday usleep); use lib dirname($0) . '/../lib'; use pgBackRest::Common::Log; +#################################################################################################################################### +# Wait constants +#################################################################################################################################### +use constant WAIT_TIME_MINIMUM => .1; + push @EXPORT, qw(WAIT_TIME_MINIMUM); + #################################################################################################################################### # waitRemainder #################################################################################################################################### diff --git a/lib/pgBackRest/Config/Config.pm b/lib/pgBackRest/Config/Config.pm index f7fa39aa5..02b9f3d26 100644 --- a/lib/pgBackRest/Config/Config.pm +++ b/lib/pgBackRest/Config/Config.pm @@ -17,6 +17,7 @@ use lib dirname($0) . '/../lib'; use pgBackRest::Common::Exception; use pgBackRest::Common::Ini; use pgBackRest::Common::Log; +use pgBackRest::Common::Wait; use pgBackRest::Protocol::Common; use pgBackRest::Protocol::RemoteMaster; use pgBackRest::Version; @@ -45,6 +46,9 @@ use constant CMD_ARCHIVE_PUSH => 'archive- use constant CMD_BACKUP => 'backup'; push @EXPORT, qw(CMD_BACKUP); $oCommandHash{&CMD_BACKUP} = true; +use constant CMD_CHECK => 'check'; + push @EXPORT, qw(CMD_CHECK); + $oCommandHash{&CMD_CHECK} = true; use constant CMD_EXPIRE => 'expire'; push @EXPORT, qw(CMD_EXPIRE); $oCommandHash{&CMD_EXPIRE} = true; @@ -239,6 +243,8 @@ use constant OPTION_TEST_POINT => 'test-poi # GENERAL Section #----------------------------------------------------------------------------------------------------------------------------------- +use constant OPTION_ARCHIVE_TIMEOUT => 'archive-timeout'; + push @EXPORT, qw(OPTION_ARCHIVE_TIMEOUT); use constant OPTION_BUFFER_SIZE => 'buffer-size'; push @EXPORT, qw(OPTION_BUFFER_SIZE); use constant OPTION_CONFIG_REMOTE => 'config-remote'; @@ -380,6 +386,13 @@ use constant OPTION_DEFAULT_TEST_NO_FORK => false; # GENERAL Section #----------------------------------------------------------------------------------------------------------------------------------- +use constant OPTION_DEFAULT_ARCHIVE_TIMEOUT => 60; + push @EXPORT, qw(OPTION_DEFAULT_ARCHIVE_TIMEOUT); +use constant OPTION_DEFAULT_ARCHIVE_TIMEOUT_MIN => WAIT_TIME_MINIMUM; + push @EXPORT, qw(OPTION_DEFAULT_ARCHIVE_TIMEOUT_MIN); +use constant OPTION_DEFAULT_ARCHIVE_TIMEOUT_MAX => 86400; + push @EXPORT, qw(OPTION_DEFAULT_ARCHIVE_TIMEOUT_MAX); + use constant OPTION_DEFAULT_BUFFER_SIZE => 4194304; push @EXPORT, qw(OPTION_DEFAULT_BUFFER_SIZE); use constant OPTION_DEFAULT_BUFFER_SIZE_MIN => 16384; @@ -406,6 +419,11 @@ use constant OPTION_DEFAULT_COMPRESS_LEVEL_NETWORK_MAX => 9; use constant OPTION_DEFAULT_DB_TIMEOUT => 1800; push @EXPORT, qw(OPTION_DEFAULT_DB_TIMEOUT); +use constant OPTION_DEFAULT_DB_TIMEOUT_MIN => WAIT_TIME_MINIMUM; + push @EXPORT, qw(OPTION_DEFAULT_DB_TIMEOUT_MIN); +use constant OPTION_DEFAULT_DB_TIMEOUT_MAX => 86400 * 7; + push @EXPORT, qw(OPTION_DEFAULT_DB_TIMEOUT_MAX); + use constant OPTION_DEFAULT_CONFIG => '/etc/' . BACKREST_EXE . '.conf'; push @EXPORT, qw(OPTION_DEFAULT_CONFIG); use constant OPTION_DEFAULT_LOCK_PATH => '/tmp/' . BACKREST_EXE; @@ -493,6 +511,12 @@ use constant OPTION_DEFAULT_DB_USER => 'postgres #################################################################################################################################### # Option Rule Hash +# +# pgbackrest will throw an error if: +# 1) an option is provided when executing the command that is not listed in the OPTION_RULE_COMMAND section of the Option Rule Hash +# 2) or an option is not provided when executing the command and it is listed in the OPTION_RULE_COMMAND section as "true" +# If an OPTION_RULE_COMMAND is set to "false" then pgbackrest will not throw an error if the option is missing and also will not throw an +# error if it exists. #################################################################################################################################### my %oOptionRule = ( @@ -508,6 +532,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_EXPIRE => true, &CMD_INFO => true, &CMD_REMOTE => true, @@ -609,6 +634,10 @@ my %oOptionRule = { &OPTION_RULE_REQUIRED => true }, + &CMD_CHECK => + { + &OPTION_RULE_REQUIRED => true + }, &CMD_EXPIRE => { &OPTION_RULE_REQUIRED => true @@ -852,6 +881,19 @@ my %oOptionRule = # GENERAL Section #------------------------------------------------------------------------------------------------------------------------------- + &OPTION_ARCHIVE_TIMEOUT => + { + &OPTION_RULE_SECTION => CONFIG_SECTION_GLOBAL, + &OPTION_RULE_TYPE => OPTION_TYPE_FLOAT, + &OPTION_RULE_DEFAULT => OPTION_DEFAULT_ARCHIVE_TIMEOUT, + &OPTION_RULE_ALLOW_RANGE => [OPTION_DEFAULT_ARCHIVE_TIMEOUT_MIN, OPTION_DEFAULT_ARCHIVE_TIMEOUT_MAX], + &OPTION_RULE_COMMAND => + { + &CMD_BACKUP => true, + &CMD_CHECK => true, + }, + }, + &OPTION_BUFFER_SIZE => { &OPTION_RULE_SECTION => CONFIG_SECTION_GLOBAL, @@ -863,6 +905,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_EXPIRE => false, &CMD_INFO => true, &CMD_REMOTE => true, @@ -873,13 +916,15 @@ my %oOptionRule = &OPTION_DB_TIMEOUT => { &OPTION_RULE_SECTION => CONFIG_SECTION_GLOBAL, - &OPTION_RULE_TYPE => OPTION_TYPE_INTEGER, + &OPTION_RULE_TYPE => OPTION_TYPE_FLOAT, &OPTION_RULE_DEFAULT => OPTION_DEFAULT_DB_TIMEOUT, + &OPTION_RULE_ALLOW_RANGE => [OPTION_DEFAULT_DB_TIMEOUT_MIN, OPTION_DEFAULT_DB_TIMEOUT_MAX], &OPTION_RULE_COMMAND => { &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_EXPIRE => false, &CMD_INFO => true, &CMD_REMOTE => true, @@ -913,6 +958,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_EXPIRE => false, &CMD_INFO => true, &CMD_REMOTE => true, @@ -931,6 +977,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_EXPIRE => false, &CMD_INFO => true, &CMD_REMOTE => true, @@ -948,6 +995,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_INFO => true, &CMD_RESTORE => true }, @@ -963,6 +1011,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_INFO => false, &CMD_EXPIRE => false, &CMD_REMOTE => true, @@ -1001,6 +1050,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_INFO => true, &CMD_REMOTE => true, &CMD_RESTORE => true, @@ -1020,6 +1070,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_INFO => true, &CMD_REMOTE => true, &CMD_RESTORE => true, @@ -1082,6 +1133,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_INFO => true, &CMD_RESTORE => true } @@ -1109,6 +1161,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_EXPIRE => true, &CMD_INFO => true, &CMD_RESTORE => true, @@ -1137,6 +1190,7 @@ my %oOptionRule = &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_EXPIRE => true, &CMD_INFO => true, &CMD_RESTORE => true, @@ -1216,6 +1270,7 @@ my %oOptionRule = { &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, + &CMD_CHECK => true, &CMD_INFO => true, &CMD_RESTORE => true }, @@ -1230,6 +1285,7 @@ my %oOptionRule = { &CMD_ARCHIVE_GET => true, &CMD_ARCHIVE_PUSH => true, + &CMD_CHECK => true, &CMD_INFO => true, &CMD_RESTORE => true }, @@ -1467,7 +1523,11 @@ my %oOptionRule = { &OPTION_RULE_REQUIRED => false }, - &CMD_BACKUP => true + &CMD_BACKUP => true, + &CMD_CHECK => + { + &OPTION_RULE_REQUIRED => false + } }, }, @@ -1479,6 +1539,7 @@ my %oOptionRule = &OPTION_RULE_COMMAND => { &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_REMOTE => true } }, @@ -1491,6 +1552,7 @@ my %oOptionRule = &OPTION_RULE_COMMAND => { &CMD_BACKUP => true, + &CMD_CHECK => true, &CMD_REMOTE => true } }, @@ -1502,7 +1564,8 @@ my %oOptionRule = &OPTION_RULE_DEFAULT => OPTION_DEFAULT_DB_USER, &OPTION_RULE_COMMAND => { - &CMD_BACKUP => true + &CMD_BACKUP => true, + &CMD_CHECK => true, }, &OPTION_RULE_REQUIRED => false, &OPTION_RULE_DEPEND => diff --git a/lib/pgBackRest/Config/ConfigHelpData.pm b/lib/pgBackRest/Config/ConfigHelpData.pm index 71ba5ad2d..5d1713ba0 100644 --- a/lib/pgBackRest/Config/ConfigHelpData.pm +++ b/lib/pgBackRest/Config/ConfigHelpData.pm @@ -90,6 +90,19 @@ my $oConfigHelpData = "\${stanza} is the backup stanza." }, + # ARCHIVE-TIMEOUT Option Help + #--------------------------------------------------------------------------------------------------------------------------- + 'archive-timeout' => + { + section => 'general', + summary => + "Archive timeout.", + description => + "Set maximum time, in seconds, to wait for WAL segments to reach the archive. The timeout applies to the check " . + "command and to the backup command when waiting for WAL segments required to make the backup consistent to " . + "be archived." + }, + # BACKUP-HOST Option Help #--------------------------------------------------------------------------------------------------------------------------- 'backup-host' => @@ -277,10 +290,10 @@ my $oConfigHelpData = summary => "Database query timeout.", description => - "Sets the timeout for queries against the database. This includes the pg_start_backup() and pg_stop_backup() " . - "functions which can each take a substantial amount of time. Because of this the timeout should be kept " . - "high unless you know that these functions will return quickly (i.e. if you have set startfast=y and you " . - "know that the database cluster will not generate many WAL segments during the backup)." + "Sets the timeout, in seconds, for queries against the database. This includes the pg_start_backup() and " . + "pg_stop_backup() functions which can each take a substantial amount of time. Because of this the timeout " . + "should be kept high unless you know that these functions will return quickly (i.e. if you have set " . + "startfast=y and you know that the database cluster will not generate many WAL segments during the backup)." }, # DB-USER Option Help @@ -734,6 +747,7 @@ my $oConfigHelpData = { 'archive-check' => 'section', 'archive-copy' => 'section', + 'archive-timeout' => 'section', 'buffer-size' => 'section', 'cmd-remote' => 'section', 'compress' => 'section', @@ -817,6 +831,47 @@ my $oConfigHelpData = } }, + # CHECK Command Help + #--------------------------------------------------------------------------------------------------------------------------- + 'check' => + { + summary => + "Check the configuration.", + description => + "The check command validates that pgBackRest and the archive_command setting are configured correctly for " . + "archiving and backups. It detects misconfigurations, particularly in archiving, that result in incomplete " . + "backups because required WAL segments did not reach the archive. The command can be run on the database or " . + "the backup host.\n" . + "\n" . + "Note that pg_create_restore_point('pgBackRest Archive Check') and pg_switch_xlog() are called to force " . + "PostgreSQL to archive a WAL segment. Restore points are only supported in PostgreSQL >= 9.1 so for older " . + "versions the check command may fail if there has been no write activity since the last log rotation.", + + option => + { + 'archive-timeout' => 'section', + 'backup-host' => 'section', + 'backup-user' => 'section', + 'buffer-size' => 'section', + 'cmd-remote' => 'section', + 'compress-level' => 'section', + 'compress-level-network' => 'section', + 'config' => 'default', + 'config-remote' => 'section', + 'db-path' => 'section', + 'db-port' => 'section', + 'db-socket-path' => 'section', + 'db-timeout' => 'section', + 'db-user' => 'section', + 'log-level-console' => 'section', + 'log-level-file' => 'section', + 'log-path' => 'section', + 'neutral-umask' => 'section', + 'repo-path' => 'section', + 'stanza' => 'default' + } + }, + # EXPIRE Command Help #--------------------------------------------------------------------------------------------------------------------------- 'expire' => diff --git a/lib/pgBackRest/Db.pm b/lib/pgBackRest/Db.pm index 98a0758a7..25047c3ff 100644 --- a/lib/pgBackRest/Db.pm +++ b/lib/pgBackRest/Db.pm @@ -627,20 +627,8 @@ sub backupStart {name => 'bStartFast'} ); - # Get the version from the control file - my ($strDbVersion) = $self->info($oFile, $strDbPath); - - # Get version and db path from the database - my ($fCompareDbVersion, $strCompareDbPath) = $self->versionGet(); - - # Error if the version from the control file and the configured db-path do not match the values obtained from the database - if (!($strDbVersion == $fCompareDbVersion && $strDbPath eq $strCompareDbPath)) - { - confess &log(ERROR, - "version '${fCompareDbVersion}' and db-path '${strCompareDbPath}' queried from cluster does not match" . - " version '${strDbVersion}' and db-path '${strDbPath}' read from '${strDbPath}/" . DB_FILE_PGCONTROL . "'\n" . - "HINT: the db-path and db-port settings likely reference different clusters", ERROR_DB_MISMATCH); - } + # Validate the database configuration + $self->configValidate($oFile, $strDbPath); # Only allow start-fast option for version >= 8.4 if ($self->{strDbVersion} < PG_VERSION_84 && $bStartFast) @@ -649,20 +637,6 @@ sub backupStart $bStartFast = false; } - # Error if archive_mode = always (support has not been added yet) - if ($self->executeSql('show archive_mode') eq 'always') - { - confess &log(ERROR, "archive_mode=always not supported", ERROR_FEATURE_NOT_SUPPORTED); - } - - # Check if archive_command is set - my $strArchiveCommand = $self->executeSql('show archive_command'); - - if (index($strArchiveCommand, BACKREST_EXE) == -1) - { - confess &log(ERROR, 'archive_command must contain \'' . BACKREST_EXE . '\'', ERROR_ARCHIVE_COMMAND_INVALID); - } - # Acquire the backup advisory lock to make sure that backups are not running from multiple backup servers against the same # database cluster. This lock helps make the stop-auto option safe. if (!$self->executeSqlOne('select pg_try_advisory_lock(' . DB_BACKUP_ADVISORY_LOCK . ')')) @@ -760,4 +734,93 @@ sub backupStop ); } +#################################################################################################################################### +# configValidate +# +# Validate the database configuration and archiving. +#################################################################################################################################### +sub configValidate +{ + my $self = shift; + + # Assign function parameters, defaults, and log debug info + my + ( + $strOperation, + $oFile, + $strDbPath + ) = + logDebugParam + ( + __PACKAGE__ . '->configValidate', \@_, + {name => 'oFile'}, + {name => 'strDbPath'} + ); + + # Get the version from the control file + my ($strDbVersion) = $self->info($oFile, $strDbPath); + + # Get version and db path from the database + my ($fCompareDbVersion, $strCompareDbPath) = $self->versionGet(); + + # Error if the version from the control file and the configured db-path do not match the values obtained from the database + if (!($strDbVersion == $fCompareDbVersion && $strDbPath eq $strCompareDbPath)) + { + confess &log(ERROR, + "version '${fCompareDbVersion}' and db-path '${strCompareDbPath}' queried from cluster does not match" . + " version '${strDbVersion}' and db-path '${strDbPath}' read from '${strDbPath}/" . DB_FILE_PGCONTROL . "'\n" . + "HINT: the db-path and db-port settings likely reference different clusters", ERROR_DB_MISMATCH); + } + + # Error if archive_mode = always (support has not been added yet) + if ($self->executeSql('show archive_mode') eq 'always') + { + confess &log(ERROR, "archive_mode=always not supported", ERROR_FEATURE_NOT_SUPPORTED); + } + + # Check if archive_command is set + my $strArchiveCommand = $self->executeSql('show archive_command'); + + if (index($strArchiveCommand, BACKREST_EXE) == -1) + { + confess &log(ERROR, 'archive_command must contain \'' . BACKREST_EXE . '\'', ERROR_ARCHIVE_COMMAND_INVALID); + } + + return logDebugReturn + ( + $strOperation + ); +} + +#################################################################################################################################### +# xlogSwitch +# +# Forces a switch to the next transaction log in order to archive the current log. +#################################################################################################################################### +sub xlogSwitch +{ + my $self = shift; + + # Assign function parameters, defaults, and log debug info + my $strOperation = logDebugParam(__PACKAGE__ . '->xlogSwitch'); + + # Create a restore point to ensure current xlog will be archived. For versions <= 9.0 activity will need to be generated by + # the user if there have been no writes since the last xlog switch. + if ($self->{strDbVersion} >= PG_VERSION_91) + { + $self->executeSql("select pg_create_restore_point('pgBackRest Archive Check');"); + } + + my $strWalFileName = $self->executeSqlRow('select pg_xlogfile_name from pg_xlogfile_name(pg_switch_xlog());'); + + &log(INFO, "switch xlog ${strWalFileName}"); + + # Return from function and log return values if any + return logDebugReturn + ( + $strOperation, + {name => 'strXlogFileName', value => $strWalFileName} + ); +} + 1; diff --git a/lib/pgBackRest/Version.pm b/lib/pgBackRest/Version.pm index a3959ffb7..1a0fa4ed8 100644 --- a/lib/pgBackRest/Version.pm +++ b/lib/pgBackRest/Version.pm @@ -26,7 +26,7 @@ use constant BACKREST_EXE => lc(BACKRE # repositories or manifests can be read - that's the job of the format number. #----------------------------------------------------------------------------------------------------------------------------------- our # 'our' keyword is on a separate line to make the ExtUtils::MakeMaker parser happy. -$VERSION = '1.02'; +$VERSION = '1.03dev'; push @EXPORT, qw($VERSION); diff --git a/test/lib/pgBackRestTest/BackupCommonTest.pm b/test/lib/pgBackRestTest/BackupCommonTest.pm index e1747d0f6..e6bb495ea 100644 --- a/test/lib/pgBackRestTest/BackupCommonTest.pm +++ b/test/lib/pgBackRestTest/BackupCommonTest.pm @@ -292,6 +292,7 @@ sub BackRestTestBackup_ClusterStart my $bHotStandby = shift; my $bArchive = shift; my $bArchiveAlways = shift; + my $bArchiveInvalid = shift; # Set default $iPort = defined($iPort) ? $iPort : BackRestTestCommon_DbPortGet(); @@ -307,7 +308,8 @@ sub BackRestTestBackup_ClusterStart } # Create the archive command - my $strArchive = BackRestTestCommon_CommandMainAbsGet() . ' --stanza=' . BackRestTestCommon_StanzaGet() . + my $strArchive = BackRestTestCommon_CommandMainAbsGet() . ' --stanza=' . + (defined($bArchiveInvalid) ? 'bogus' : BackRestTestCommon_StanzaGet()) . ' --config=' . BackRestTestCommon_DbPathGet() . '/pgbackrest.conf archive-push %p'; # Start the cluster @@ -389,6 +391,7 @@ sub BackRestTestBackup_ClusterCreate my $iPort = shift; my $bArchive = shift; my $strXlogPath = shift; + my $bArchiveInvalid = shift; # Defaults $strPath = defined($strPath) ? $strPath : BackRestTestCommon_DbCommonPathGet(); @@ -399,7 +402,7 @@ sub BackRestTestBackup_ClusterCreate '/initdb' . (BackRestTestCommon_DbVersion() >= PG_VERSION_92 ? ' --xlogdir=${strXlogPath}' : '') . " --pgdata=${strPath} --auth=trust"); - BackRestTestBackup_ClusterStart($strPath, $iPort, undef, $bArchive); + BackRestTestBackup_ClusterStart($strPath, $iPort, undef, $bArchive, undef, $bArchiveInvalid); # Connect user session BackRestTestBackup_PgConnect(); @@ -1306,6 +1309,31 @@ sub BackRestTestBackup_Backup return BackRestTestBackup_BackupEnd(); } +#################################################################################################################################### +# BackRestTestBackup_Check +#################################################################################################################################### +push @EXPORT, qw(BackRestTestBackup_Check); + +sub BackRestTestBackup_Check +{ + my $strStanza = shift; + my $bRemote = shift; + my $iArchiveTimeout = shift; + my $strComment = shift; + my $iExpectedExitStatus = shift; + + $strComment = "check" . (defined($strStanza) ? " ${strStanza}" : '') . " (" . $strComment . ")"; + &log(INFO, " $strComment"); + + my $strCommand = ($bRemote ? BackRestTestCommon_CommandMainAbsGet() : BackRestTestCommon_CommandMainGet()) . + ' --config=' . ($bRemote ? BackRestTestCommon_RepoPathGet() : BackRestTestCommon_DbPathGet()) . + "/pgbackrest.conf --archive-timeout=${iArchiveTimeout} --stanza=${strStanza} check --log-level-console=detail"; + + executeTest($strCommand, + {bRemote => $bRemote, strComment => $strComment, iExpectedExitStatus => $iExpectedExitStatus, + oLogTest => $oBackupLogTest}); +} + #################################################################################################################################### # BackRestTestBackup_Info #################################################################################################################################### diff --git a/test/lib/pgBackRestTest/BackupTest.pm b/test/lib/pgBackRestTest/BackupTest.pm index 47323680d..87499618c 100755 --- a/test/lib/pgBackRestTest/BackupTest.pm +++ b/test/lib/pgBackRestTest/BackupTest.pm @@ -1717,7 +1717,8 @@ sub BackRestTestBackup_Test # Create the cluster if ($bCreate) { - BackRestTestBackup_ClusterCreate(); + # For the 'fail on missing archive.info file' test, the archive.info file must not be found so set archive invalid + BackRestTestBackup_ClusterCreate(undef, undef, undef, undef, true); $bCreate = false; } @@ -1754,16 +1755,68 @@ sub BackRestTestBackup_Test # Test invalid archive command #----------------------------------------------------------------------------------------------------------------------- $strType = BACKUP_TYPE_FULL; - $strComment = 'fail on invalid archive_command'; + + # NOTE: This must run before the success test since that will create the archive.info file + $strComment = 'fail on missing archive.info file'; + + BackRestTestBackup_Check($strStanza, $bRemote, 0.1, $strComment, ERROR_FILE_MISSING); + + # Clean up the archive_timeout error from the postgresql log by stopping the cluster and removing the log file before + # running the next test + BackRestTestBackup_ClusterStop(undef, undef, true); + BackRestTestCommon_FileRemove(BackRestTestCommon_DbCommonPathGet() . '/postgresql.log'); # Check archive_command_not_set error - BackRestTestBackup_ClusterStop(); + $strComment = 'fail on invalid archive_command'; BackRestTestBackup_ClusterStart(undef, undef, undef, false); - BackRestTestBackup_Backup($strType, $strStanza, $strComment, {iExpectedExitStatus => ERROR_ARCHIVE_COMMAND_INVALID}); + BackRestTestBackup_Backup( + $strType, $strStanza, $strComment, + {iExpectedExitStatus => ERROR_ARCHIVE_COMMAND_INVALID}); + + BackRestTestBackup_Check($strStanza, $bRemote, 0.1, $strComment, ERROR_ARCHIVE_COMMAND_INVALID); + + # If running the remote tests then also need to run check locally + if ($bRemote) + { + BackRestTestBackup_Check($strStanza, false, 0.1, "${strComment} - remote", ERROR_ARCHIVE_COMMAND_INVALID); + } + + # Clean up the archive_command error from the postgresql log by stopping the cluster and removing the log file before + # running the next test + BackRestTestBackup_ClusterStop(undef, undef, true); + BackRestTestCommon_FileRemove(BackRestTestCommon_DbCommonPathGet() . '/postgresql.log'); + + # Providing a sufficient archive-timeout, verify that the check command runs successfully. + $strComment = 'verify success'; + + BackRestTestBackup_ClusterStart(); + BackRestTestBackup_Check($strStanza, $bRemote, 5, $strComment, 0); + + # If running the remote tests then also need to run check locally + if ($bRemote) + { + BackRestTestBackup_Check($strStanza, false, 5, "${strComment} - remote", 0); + } + + # Check archive_timeout error + $strComment = 'fail on archive timeout'; + BackRestTestBackup_ClusterStop(); + BackRestTestBackup_ClusterStart(undef, undef, undef, undef, undef, true); + BackRestTestBackup_Check($strStanza, $bRemote, 0.1, $strComment, ERROR_ARCHIVE_TIMEOUT); + + # If running the remote tests then also need to run check locally + if ($bRemote) + { + BackRestTestBackup_Check($strStanza, false, 0.1, "${strComment} - remote", ERROR_ARCHIVE_TIMEOUT); + } + + # Clean up the archive_timeout error from the postgresql log by stopping the cluster and removing the log file + # before running the next test + BackRestTestBackup_ClusterStop(undef, undef, true); + BackRestTestCommon_FileRemove(BackRestTestCommon_DbCommonPathGet() . '/postgresql.log'); # Reset the cluster to a normal state so the next test will work - BackRestTestBackup_ClusterStop(); BackRestTestBackup_ClusterStart(); # Full backup