From 41b27c0456793694316daa882ffbd2209fd249c3 Mon Sep 17 00:00:00 2001 From: David Steele Date: Wed, 22 Jun 2016 18:01:18 -0400 Subject: [PATCH] Protocol timeout option and keep-alive fixes. * Fixed an issue where keep-alives could be starved out by lots of small files during multi-threaded operation and were completely absent during single-threaded operation when resuming from a previous incomplete backup. Reported by Janice Parkinson. * Added the protocol-timeout option. Previously protocol-timeout was set as db-timeout + 30 seconds. * Failure to shutdown remotes at the end of the backup no longer throws an exception. A warning is still generated that recommends a higher protocol-timeout. --- bin/pgbackrest | 5 +- doc/xml/reference.xml | 9 +++ doc/xml/release.xml | 21 +++++++ lib/pgBackRest/Backup.pm | 20 +++++- lib/pgBackRest/Common/Exit.pm | 21 +------ lib/pgBackRest/Common/Log.pm | 2 + lib/pgBackRest/Config/Config.pm | 69 +++++++++++++++------ lib/pgBackRest/Config/ConfigHelpData.pm | 23 +++++-- lib/pgBackRest/Protocol/CommonMaster.pm | 43 ++++++++++--- test/expect/backup-synthetic-005.log | 25 +++++--- test/lib/pgBackRestTest/BackupCommonTest.pm | 5 +- test/lib/pgBackRestTest/BackupTest.pm | 14 ++++- test/lib/pgBackRestTest/ConfigTest.pm | 15 ++++- 13 files changed, 203 insertions(+), 69 deletions(-) diff --git a/bin/pgbackrest b/bin/pgbackrest index 69db7c8f1..07292b1db 100755 --- a/bin/pgbackrest +++ b/bin/pgbackrest @@ -78,7 +78,7 @@ eval optionGet(OPTION_BUFFER_SIZE), optionGet(OPTION_COMPRESS_LEVEL), optionGet(OPTION_COMPRESS_LEVEL_NETWORK), - protocolTimeoutGet() + optionGet(OPTION_PROTOCOL_TIMEOUT) ); # ??? Would like to remove this check and allow a test lock @@ -226,6 +226,9 @@ eval } } + # Process exit test point + &log(TEST, TEST_PROCESS_EXIT); + lockRelease(); exitSafe(0); }; diff --git a/doc/xml/reference.xml b/doc/xml/reference.xml index 8457f9c96..38049378a 100644 --- a/doc/xml/reference.xml +++ b/doc/xml/reference.xml @@ -178,6 +178,15 @@ /backup/db/spool + + + Protocol timeout. + + Sets the timeout, in seconds, that the master or remote process will wait for a new message to be received on the protocol layer. This prevents processes from waiting indefinitely for a message. The protocol-timeout option must be greater than the db-timeout option. + + 630 + + Repository path where WAL segments and backups stored. diff --git a/doc/xml/release.xml b/doc/xml/release.xml index fb41b8e1d..8ef199c54 100644 --- a/doc/xml/release.xml +++ b/doc/xml/release.xml @@ -57,6 +57,11 @@ Dwaligon + + Janice Parkinson + jpabt + + Michael Renner terrorobe @@ -92,6 +97,14 @@ + + + + + +

Fixed an issue where keep-alives could be starved out by lots of small files during multi-threaded operation and were completely absent during single-threaded operation when resuming from a previous incomplete backup.

+
+ @@ -111,6 +124,14 @@

Added check command to validate that is configured correctly for archiving and backups.

+ + +

Added the protocol-timeout option. Previously protocol-timeout was set as db-timeout + 30 seconds.

+
+ + +

Failure to shutdown remotes at the end of the backup no longer throws an exception. A warning is still generated that recommends a higher protocol-timeout.

+
diff --git a/lib/pgBackRest/Backup.pm b/lib/pgBackRest/Backup.pm index 3bb574307..bd59e7f5e 100644 --- a/lib/pgBackRest/Backup.pm +++ b/lib/pgBackRest/Backup.pm @@ -448,6 +448,10 @@ sub processManifest $lManifestSaveCurrent = backupManifestUpdate($oBackupManifest, $$oFileCopy{repo_file}, $bCopied, $lCopySize, $lRepoSize, $strCopyChecksum, $lManifestSaveSize, $lManifestSaveCurrent); + + # A keep-alive is required here because if there are a large number of resumed files that need to be checksummed + # then the remote might timeout while waiting for a command. + protocolGet()->keepAlive(); } } } @@ -481,10 +485,12 @@ sub processManifest # Complete thread queues my $bDone = false; + my $bKeepAlive = false; do { $bDone = threadGroupComplete(); + $bKeepAlive = false; # Read the messages that are passed back from the backup threads while (my $oMessage = $oResultQueue->dequeue_nb()) @@ -494,10 +500,18 @@ sub processManifest $lManifestSaveCurrent = backupManifestUpdate($oBackupManifest, $$oMessage{repo_file}, $$oMessage{copied}, $$oMessage{size}, $$oMessage{repo_size}, $$oMessage{checksum}, $lManifestSaveSize, $lManifestSaveCurrent); + + # A keep-alive is required inside the loop because a flood of thread messages could prevent the keep-alive + # outside the loop from running in a timely fashion. + protocolGet()->keepAlive(); + $bKeepAlive = true; } - # Keep the protocol layer from timing out - protocolGet()->keepAlive(); + # This keep-alive only runs if the keep-alive in the loop did not run + if (!$bKeepAlive) + { + protocolGet()->keepAlive(); + } } while (!$bDone); } @@ -679,7 +693,7 @@ sub process $oDatabaseMap = $self->{oDb}->databaseMapGet(); } - # Buid the manifest + # Build the manifest $oBackupManifest->build($self->{oFile}, optionGet(OPTION_DB_PATH), $oLastManifest, optionGet(OPTION_ONLINE), $oTablespaceMap, $oDatabaseMap); &log(TEST, TEST_MANIFEST_BUILD); diff --git a/lib/pgBackRest/Common/Exit.pm b/lib/pgBackRest/Common/Exit.pm index 1262441ee..3838616d9 100644 --- a/lib/pgBackRest/Common/Exit.pm +++ b/lib/pgBackRest/Common/Exit.pm @@ -114,25 +114,8 @@ sub exitSafe } } - # Don't fail if protocol cannot be destroyed - eval - { - protocolDestroy(); - }; - - if ($@ && defined($iExitCode)) - { - my $oMessage = $@; - - if (blessed($oMessage) && $oMessage->isa('pgBackRest::Common::Exception')) - { - &log(WARN, 'unable to shutdown protocol (' . $oMessage->code() . '): ' . $oMessage->message()); - - exit $oMessage->code(); - } - - &log(WARN, "unable to shutdown protocol: $oMessage"); - } + # Close the remote + protocolDestroy(); # Don't fail if the lock can't be released eval diff --git a/lib/pgBackRest/Common/Log.pm b/lib/pgBackRest/Common/Log.pm index 9f9acb704..c56ab08a1 100644 --- a/lib/pgBackRest/Common/Log.pm +++ b/lib/pgBackRest/Common/Log.pm @@ -94,6 +94,8 @@ use constant TEST_BACKUP_START => 'BACKUP-S push @EXPORT, qw(TEST_BACKUP_START); use constant TEST_KEEP_ALIVE => 'KEEP_ALIVE'; push @EXPORT, qw(TEST_KEEP_ALIVE); +use constant TEST_PROCESS_EXIT => 'TEST_PROCESS_EXIT'; + push @EXPORT, qw(TEST_PROCESS_EXIT); use constant TEST_ARCHIVE_PUSH_ASYNC_START => 'ARCHIVE-PUSH-ASYNC-START'; push @EXPORT, qw(TEST_ARCHIVE_PUSH_ASYNC_START); diff --git a/lib/pgBackRest/Config/Config.pm b/lib/pgBackRest/Config/Config.pm index 57959b224..5c3da4078 100644 --- a/lib/pgBackRest/Config/Config.pm +++ b/lib/pgBackRest/Config/Config.pm @@ -259,6 +259,8 @@ use constant OPTION_COMPRESS_LEVEL_NETWORK => 'compress push @EXPORT, qw(OPTION_COMPRESS_LEVEL_NETWORK); use constant OPTION_NEUTRAL_UMASK => 'neutral-umask'; push @EXPORT, qw(OPTION_NEUTRAL_UMASK); +use constant OPTION_PROTOCOL_TIMEOUT => 'protocol-timeout'; + push @EXPORT, qw(OPTION_PROTOCOL_TIMEOUT); use constant OPTION_THREAD_MAX => 'thread-max'; push @EXPORT, qw(OPTION_THREAD_MAX); use constant OPTION_THREAD_TIMEOUT => 'thread-timeout'; @@ -424,6 +426,13 @@ use constant OPTION_DEFAULT_DB_TIMEOUT_MIN => WAIT_TIME use constant OPTION_DEFAULT_DB_TIMEOUT_MAX => 86400 * 7; push @EXPORT, qw(OPTION_DEFAULT_DB_TIMEOUT_MAX); +use constant OPTION_DEFAULT_PROTOCOL_TIMEOUT => OPTION_DEFAULT_DB_TIMEOUT + 30; + push @EXPORT, qw(OPTION_DEFAULT_PROTOCOL_TIMEOUT); +use constant OPTION_DEFAULT_PROTOCOL_TIMEOUT_MIN => OPTION_DEFAULT_DB_TIMEOUT_MIN; + push @EXPORT, qw(OPTION_DEFAULT_PROTOCOL_TIMEOUT_MIN); +use constant OPTION_DEFAULT_PROTOCOL_TIMEOUT_MAX => OPTION_DEFAULT_DB_TIMEOUT_MAX; + push @EXPORT, qw(OPTION_DEFAULT_PROTOCOL_TIMEOUT_MAX); + use constant OPTION_DEFAULT_CONFIG => '/etc/' . BACKREST_EXE . '.conf'; push @EXPORT, qw(OPTION_DEFAULT_CONFIG); use constant OPTION_DEFAULT_LOCK_PATH => '/tmp/' . BACKREST_EXE; @@ -921,14 +930,14 @@ my %oOptionRule = &OPTION_RULE_ALLOW_RANGE => [OPTION_DEFAULT_DB_TIMEOUT_MIN, OPTION_DEFAULT_DB_TIMEOUT_MAX], &OPTION_RULE_COMMAND => { - &CMD_ARCHIVE_GET => true, - &CMD_ARCHIVE_PUSH => true, + &CMD_ARCHIVE_GET => false, + &CMD_ARCHIVE_PUSH => false, &CMD_BACKUP => true, &CMD_CHECK => true, &CMD_EXPIRE => false, - &CMD_INFO => true, + &CMD_INFO => false, &CMD_REMOTE => true, - &CMD_RESTORE => true + &CMD_RESTORE => false } }, @@ -1060,6 +1069,25 @@ my %oOptionRule = }, }, + &OPTION_PROTOCOL_TIMEOUT => + { + &OPTION_RULE_SECTION => CONFIG_SECTION_GLOBAL, + &OPTION_RULE_TYPE => OPTION_TYPE_FLOAT, + &OPTION_RULE_DEFAULT => OPTION_DEFAULT_PROTOCOL_TIMEOUT, + &OPTION_RULE_ALLOW_RANGE => [OPTION_DEFAULT_PROTOCOL_TIMEOUT_MIN, OPTION_DEFAULT_PROTOCOL_TIMEOUT_MAX], + &OPTION_RULE_COMMAND => + { + &CMD_ARCHIVE_GET => true, + &CMD_ARCHIVE_PUSH => true, + &CMD_BACKUP => true, + &CMD_CHECK => true, + &CMD_EXPIRE => false, + &CMD_INFO => true, + &CMD_REMOTE => true, + &CMD_RESTORE => true + } + }, + &OPTION_REPO_PATH => { &OPTION_RULE_SECTION => CONFIG_SECTION_GLOBAL, @@ -1687,6 +1715,16 @@ sub configLoad umask(0000); } + # Protocol timeout should be greater than db timeout + if (optionTest(OPTION_DB_TIMEOUT) && optionTest(OPTION_PROTOCOL_TIMEOUT) && + optionGet(OPTION_PROTOCOL_TIMEOUT) <= optionGet(OPTION_DB_TIMEOUT)) + { + confess &log(ERROR, + "'" . optionGet(OPTION_PROTOCOL_TIMEOUT) . "' is not valid for '" . OPTION_PROTOCOL_TIMEOUT . "' option\n" . + "HINT: 'protocol-timeout' option should be greater than 'db-timeout' option.", + ERROR_OPTION_INVALID_VALUE); + } + # Check if the backup host is remote if (optionTest(OPTION_BACKUP_HOST)) { @@ -2461,7 +2499,7 @@ sub protocolGet optionGet(OPTION_BUFFER_SIZE), commandTest(CMD_EXPIRE) ? OPTION_DEFAULT_COMPRESS_LEVEL : optionGet(OPTION_COMPRESS_LEVEL), commandTest(CMD_EXPIRE) ? OPTION_DEFAULT_COMPRESS_LEVEL_NETWORK : optionGet(OPTION_COMPRESS_LEVEL_NETWORK), - protocolTimeoutGet() + optionGet(OPTION_PROTOCOL_TIMEOUT) ); } @@ -2483,7 +2521,7 @@ sub protocolGet commandTest(CMD_EXPIRE) ? OPTION_DEFAULT_COMPRESS_LEVEL_NETWORK : optionGet(OPTION_COMPRESS_LEVEL_NETWORK), optionRemoteTypeTest(DB) ? optionGet(OPTION_DB_HOST) : optionGet(OPTION_BACKUP_HOST), optionRemoteTypeTest(DB) ? optionGet(OPTION_DB_USER) : optionGet(OPTION_BACKUP_USER), - protocolTimeoutGet() + optionGet(OPTION_PROTOCOL_TIMEOUT) ); if (!defined($bStore) || $bStore) @@ -2496,19 +2534,6 @@ sub protocolGet push @EXPORT, qw(protocolGet); -#################################################################################################################################### -# protocolTimeoutGet -# -# Get the protocol time - for the moment this is based on the db timeout + 30 seconds. -#################################################################################################################################### -sub protocolTimeoutGet -{ - return optionGet(OPTION_DB_TIMEOUT) >= OPTION_DEFAULT_DB_TIMEOUT ? - optionGet(OPTION_DB_TIMEOUT) + 30 : optionGet(OPTION_DB_TIMEOUT); -} - -push @EXPORT, qw(protocolTimeoutGet); - #################################################################################################################################### # protocolDestroy # @@ -2516,11 +2541,15 @@ push @EXPORT, qw(protocolTimeoutGet); #################################################################################################################################### sub protocolDestroy { + my $iExitStatus = 0; + if (defined($oProtocol)) { - $oProtocol->close(); + $iExitStatus = $oProtocol->close(); undef($oProtocol); } + + return $iExitStatus; } push @EXPORT, qw(protocolDestroy); diff --git a/lib/pgBackRest/Config/ConfigHelpData.pm b/lib/pgBackRest/Config/ConfigHelpData.pm index 1f37f1311..6424ad95f 100644 --- a/lib/pgBackRest/Config/ConfigHelpData.pm +++ b/lib/pgBackRest/Config/ConfigHelpData.pm @@ -439,6 +439,19 @@ my $oConfigHelpData = "the command line." }, + # PROTOCOL-TIMEOUT Option Help + #--------------------------------------------------------------------------------------------------------------------------- + 'protocol-timeout' => + { + section => 'general', + summary => + "Protocol timeout.", + description => + "Sets the timeout, in seconds, that the master or remote process will wait for a new message to be received on " . + "the protocol layer. This prevents processes from waiting indefinitely for a message. The protocol-timeout " . + "option must be greater than the db-timeout option." + }, + # RECOVERY-OPTION Option Help #--------------------------------------------------------------------------------------------------------------------------- 'recovery-option' => @@ -686,12 +699,12 @@ my $oConfigHelpData = 'config' => 'default', 'config-remote' => 'section', 'db-path' => 'section', - 'db-timeout' => 'section', 'lock-path' => 'section', 'log-level-console' => 'section', 'log-level-file' => 'section', 'log-path' => 'section', 'neutral-umask' => 'section', + 'protocol-timeout' => 'section', 'repo-path' => 'section', 'stanza' => 'default' } @@ -721,12 +734,12 @@ my $oConfigHelpData = 'config' => 'default', 'config-remote' => 'section', 'db-path' => 'section', - 'db-timeout' => 'section', 'lock-path' => 'section', 'log-level-console' => 'section', 'log-level-file' => 'section', 'log-path' => 'section', 'neutral-umask' => 'section', + 'protocol-timeout' => 'section', 'repo-path' => 'section', 'spool-path' => 'section', 'stanza' => 'default' @@ -803,6 +816,7 @@ my $oConfigHelpData = "archive-check is automatically disabled for the backup." }, + 'protocol-timeout' => 'section', 'repo-path' => 'section', 'resume' => 'section', 'retention-archive' => 'section', @@ -869,6 +883,7 @@ my $oConfigHelpData = 'log-level-file' => 'section', 'log-path' => 'section', 'neutral-umask' => 'section', + 'protocol-timeout' => 'section', 'repo-path' => 'section', 'stanza' => 'default' } @@ -941,7 +956,6 @@ my $oConfigHelpData = 'compress-level-network' => 'section', 'config' => 'default', 'config-remote' => 'section', - 'db-timeout' => 'section', 'lock-path' => 'section', 'log-level-console' => 'section', 'log-level-file' => 'section', @@ -960,6 +974,7 @@ my $oConfigHelpData = "* json - Exhaustive machine-readable backup information in JSON format." }, + 'protocol-timeout' => 'section', 'repo-path' => 'section', 'stanza' => 'default' } @@ -987,7 +1002,6 @@ my $oConfigHelpData = 'config-remote' => 'section', 'db-include' => 'section', 'db-path' => 'section', - 'db-timeout' => 'section', # DELTA Option Help #------------------------------------------------------------------------------------------------------------------- @@ -1031,6 +1045,7 @@ my $oConfigHelpData = 'log-level-file' => 'section', 'log-path' => 'section', 'neutral-umask' => 'section', + 'protocol-timeout' => 'section', 'recovery-option' => 'section', 'repo-path' => 'section', diff --git a/lib/pgBackRest/Protocol/CommonMaster.pm b/lib/pgBackRest/Protocol/CommonMaster.pm index 9f1d45510..7c785a743 100644 --- a/lib/pgBackRest/Protocol/CommonMaster.pm +++ b/lib/pgBackRest/Protocol/CommonMaster.pm @@ -10,6 +10,7 @@ use Carp qw(confess); use File::Basename qw(dirname); use Time::HiRes qw(gettimeofday); +use Scalar::Util qw(blessed); use lib dirname($0) . '/../lib'; use pgBackRest::Common::Exception; @@ -92,22 +93,48 @@ sub close { my $self = shift; + # Exit status defaults to success + my $iExitStatus = 0; + # Only send the exit command if the process is running if (defined($self->{io}) && defined($self->{io}->pIdGet())) { &log(TRACE, "sending exit command to process"); - $self->cmdWrite('exit'); + eval + { + $self->cmdWrite('exit'); + }; + + if ($@) + { + my $oMessage = $@; + my $strError = 'unable to shutdown protocol'; + my $strHint = 'HINT: the process completed successfully but protocol-timeout may need to be increased.'; + + if (blessed($oMessage) && $oMessage->isa('pgBackRest::Common::Exception')) + { + $iExitStatus = $oMessage->code(); + } + else + { + if (!defined($oMessage)) + { + $oMessage = 'unknown error'; + } + + $iExitStatus = ERROR_UNKNOWN; + } + + &log(WARN, + $strError . ($iExitStatus == ERROR_UNKNOWN ? '' : ' [' . $oMessage->code() . ']') . ': ' . + ($iExitStatus == ERROR_UNKNOWN ? $oMessage : $oMessage->message()) . "\n${strHint}"); + } undef($self->{io}); - - # &log(TRACE, "waiting for remote process"); - # if (!$self->waitPid(5, false)) - # { - # &log(TRACE, "killed remote process"); - # kill('KILL', $self->{pId}); - # } } + + return $iExitStatus; } #################################################################################################################################### diff --git a/test/expect/backup-synthetic-005.log b/test/expect/backup-synthetic-005.log index 5f37ea317..53833c1f9 100644 --- a/test/expect/backup-synthetic-005.log +++ b/test/expect/backup-synthetic-005.log @@ -26,13 +26,13 @@ full backup (error on link to a link) INFO: backup stop full backup -> [BACKREST_BIN] --config=[TEST_PATH]/backrest/pgbackrest.conf --no-online --manifest-save-threshold=3 --db-timeout=2 --type=full --stanza=db backup +> [BACKREST_BIN] --config=[TEST_PATH]/backrest/pgbackrest.conf --no-online --manifest-save-threshold=3 --protocol-timeout=2 --db-timeout=1 --type=full --stanza=db backup ------------------------------------------------------------------------------------------------------------------------------------ - INFO: backup start: --cmd-remote=[BACKREST_BIN] --no-compress --config=[TEST_PATH]/backrest/pgbackrest.conf --config-remote=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-path=[TEST_PATH]/db/common --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=2 --db-user=vagrant --lock-path=[TEST_PATH]/backrest/lock --log-level-console=debug --log-level-file=trace --log-path=[TEST_PATH]/backrest/log --manifest-save-threshold=3 --no-online --repo-path=[TEST_PATH]/backrest --stanza=db --start-fast --type=full + INFO: backup start: --cmd-remote=[BACKREST_BIN] --no-compress --config=[TEST_PATH]/backrest/pgbackrest.conf --config-remote=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-path=[TEST_PATH]/db/common --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=1 --db-user=vagrant --lock-path=[TEST_PATH]/backrest/lock --log-level-console=debug --log-level-file=trace --log-path=[TEST_PATH]/backrest/log --manifest-save-threshold=3 --no-online --protocol-timeout=2 --repo-path=[TEST_PATH]/backrest --stanza=db --start-fast --type=full DEBUG: Common:::Lock::lockAquire(): bFailOnNoLock = , bRemote = , iProcessIdx = [undef], strLockType = backup DEBUG: Common:::Lock::lockAquire=>: bResult = true - DEBUG: Protocol::RemoteMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 2, strCommand = [BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=2 --repo-path=[TEST_PATH]/backrest --stanza=db remote, strHost = 127.0.0.1, strUser = [USER-1] - DEBUG: Protocol::CommonMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 2, strCommand = ssh -o Compression=no -o PasswordAuthentication=no vagrant@127.0.0.1 '[BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=2 --repo-path=[TEST_PATH]/backrest --stanza=db remote', strName = remote + DEBUG: Protocol::RemoteMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 2, strCommand = [BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=1 --protocol-timeout=2 --repo-path=[TEST_PATH]/backrest --stanza=db remote, strHost = 127.0.0.1, strUser = [USER-1] + DEBUG: Protocol::CommonMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 2, strCommand = ssh -o Compression=no -o PasswordAuthentication=no vagrant@127.0.0.1 '[BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=1 --protocol-timeout=2 --repo-path=[TEST_PATH]/backrest --stanza=db remote', strName = remote DEBUG: File->new(): iThreadIdx = [undef], oProtocol = [object], strBackupPath = [TEST_PATH]/backrest, strDefaultFileMode = <0640>, strDefaultPathMode = <0750>, strRemote = db, strStanza = db DEBUG: File->pathCreate(): bCreateParents = true, bIgnoreExists = true, strMode = <0750>, strPath = backup.history, strPathType = backup:cluster DEBUG: BackupInfo->new(): bValidate = , strBackupClusterPath = [TEST_PATH]/backrest/backup/db @@ -105,7 +105,7 @@ full backup DEBUG: File->remove=>: bRemoved = false DEBUG: File->linkCreate(): bHard = , bPathCreate = , bRelative = true, strDestinationFile = latest, strDestinationPathType = backup:cluster, strSourceFile = [BACKUP-FULL-1], strSourcePathType = backup:cluster INFO: backup stop - INFO: expire start: --no-compress --config=[TEST_PATH]/backrest/pgbackrest.conf --db-host=127.0.0.1 --db-timeout=2 --lock-path=[TEST_PATH]/backrest/lock --log-level-console=debug --log-level-file=trace --log-path=[TEST_PATH]/backrest/log --repo-path=[TEST_PATH]/backrest --stanza=db + INFO: expire start: --no-compress --config=[TEST_PATH]/backrest/pgbackrest.conf --db-host=127.0.0.1 --db-timeout=1 --lock-path=[TEST_PATH]/backrest/lock --log-level-console=debug --log-level-file=trace --log-path=[TEST_PATH]/backrest/log --protocol-timeout=2 --repo-path=[TEST_PATH]/backrest --stanza=db DEBUG: File->new(): iThreadIdx = [undef], oProtocol = [object], strBackupPath = [TEST_PATH]/backrest, strDefaultFileMode = <0640>, strDefaultPathMode = <0750>, strRemote = db, strStanza = db DEBUG: BackupInfo->new(): bValidate = , strBackupClusterPath = [TEST_PATH]/backrest/backup/db DEBUG: BackupCommon::backupRegExpGet(): bAnchor = , bDifferential = true, bFull = true, bIncremental = true @@ -263,13 +263,13 @@ db-version="9.3" 1={"db-catalog-version":201306121,"db-control-version":937,"db-system-id":6156904820763115222,"db-version":"9.3"} full backup (protocol timeout) -> [BACKREST_BIN] --config=[TEST_PATH]/backrest/pgbackrest.conf --no-online --db-timeout=1 --type=full --stanza=db backup --test --test-delay=1 --test-point=backup-start=y +> [BACKREST_BIN] --config=[TEST_PATH]/backrest/pgbackrest.conf --no-online --protocol-timeout=1 --db-timeout=.1 --type=full --stanza=db backup --test --test-delay=1 --test-point=backup-start=y ------------------------------------------------------------------------------------------------------------------------------------ - INFO: backup start: --cmd-remote=[BACKREST_BIN] --no-compress --config=[TEST_PATH]/backrest/pgbackrest.conf --config-remote=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-path=[TEST_PATH]/db/common --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=1 --db-user=vagrant --lock-path=[TEST_PATH]/backrest/lock --log-level-console=debug --log-level-file=trace --log-path=[TEST_PATH]/backrest/log --no-online --repo-path=[TEST_PATH]/backrest --stanza=db --start-fast --test --test-delay=1 --test-point=backup-start=y --type=full + INFO: backup start: --cmd-remote=[BACKREST_BIN] --no-compress --config=[TEST_PATH]/backrest/pgbackrest.conf --config-remote=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-path=[TEST_PATH]/db/common --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=.1 --db-user=vagrant --lock-path=[TEST_PATH]/backrest/lock --log-level-console=debug --log-level-file=trace --log-path=[TEST_PATH]/backrest/log --no-online --protocol-timeout=1 --repo-path=[TEST_PATH]/backrest --stanza=db --start-fast --test --test-delay=1 --test-point=backup-start=y --type=full DEBUG: Common:::Lock::lockAquire(): bFailOnNoLock = , bRemote = , iProcessIdx = [undef], strLockType = backup DEBUG: Common:::Lock::lockAquire=>: bResult = true - DEBUG: Protocol::RemoteMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 1, strCommand = [BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=1 --repo-path=[TEST_PATH]/backrest --stanza=db remote, strHost = 127.0.0.1, strUser = [USER-1] - DEBUG: Protocol::CommonMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 1, strCommand = ssh -o Compression=no -o PasswordAuthentication=no vagrant@127.0.0.1 '[BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=1 --repo-path=[TEST_PATH]/backrest --stanza=db remote', strName = remote + DEBUG: Protocol::RemoteMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 1, strCommand = [BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=.1 --protocol-timeout=1 --repo-path=[TEST_PATH]/backrest --stanza=db remote, strHost = 127.0.0.1, strUser = [USER-1] + DEBUG: Protocol::CommonMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 1, strCommand = ssh -o Compression=no -o PasswordAuthentication=no vagrant@127.0.0.1 '[BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pgbackrest.conf --db-host=127.0.0.1 --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=.1 --protocol-timeout=1 --repo-path=[TEST_PATH]/backrest --stanza=db remote', strName = remote DEBUG: File->new(): iThreadIdx = [undef], oProtocol = [object], strBackupPath = [TEST_PATH]/backrest, strDefaultFileMode = <0640>, strDefaultPathMode = <0750>, strRemote = db, strStanza = db DEBUG: File->pathCreate(): bCreateParents = true, bIgnoreExists = true, strMode = <0750>, strPath = backup.history, strPathType = backup:cluster DEBUG: BackupInfo->new(): bValidate = , strBackupClusterPath = [TEST_PATH]/backrest/backup/db @@ -3742,3 +3742,10 @@ info bogus } } ] + +diff backup (protocol shutdown timeout) +> [BACKREST_BIN] --config=[TEST_PATH]/backrest/pgbackrest.conf --no-online --protocol-timeout=1 --db-timeout=.5 --log-level-console=warn --type=diff --stanza=db backup --test --test-delay=1 --test-point=test_process_exit=y +------------------------------------------------------------------------------------------------------------------------------------ + ERROR: [141]: remote process terminated: ERROR [141]: unable to read line after 1 seconds + WARN: unable to shutdown protocol [141]: remote process terminated: ERROR [141]: unable to read line after 1 seconds + HINT: the process completed successfully but protocol-timeout may need to be increased. diff --git a/test/lib/pgBackRestTest/BackupCommonTest.pm b/test/lib/pgBackRestTest/BackupCommonTest.pm index ccaea2b72..965dfbbe5 100644 --- a/test/lib/pgBackRestTest/BackupCommonTest.pm +++ b/test/lib/pgBackRestTest/BackupCommonTest.pm @@ -1236,6 +1236,7 @@ push @EXPORT, qw(BackRestTestBackup_BackupEnd); sub BackRestTestBackup_BackupEnd { my $oExpectedManifestRef = shift; + my $bSupplemental = shift; my $iExitStatus = $oExecuteBackup->end(); @@ -1258,7 +1259,7 @@ sub BackRestTestBackup_BackupEnd BackRestTestBackup_BackupCompare($oBackupFile, $bBackupRemote, $strBackup, $oExpectedManifestRef); } - if (defined($oBackupLogTest)) + if (defined($oBackupLogTest) && (!defined($bSupplemental) || $bSupplemental)) { $oBackupLogTest->supplementalAdd(BackRestTestCommon_DbPathGet() . "/pgbackrest.conf", $bBackupRemote); @@ -1290,7 +1291,7 @@ sub BackRestTestBackup_BackupSynthetic my $oParam = shift; BackRestTestBackup_BackupBegin($strType, $strStanza, $strComment, $oParam); - return BackRestTestBackup_BackupEnd($oExpectedManifestRef); + return BackRestTestBackup_BackupEnd($oExpectedManifestRef, $$oParam{bSupplemental}); } #################################################################################################################################### diff --git a/test/lib/pgBackRestTest/BackupTest.pm b/test/lib/pgBackRestTest/BackupTest.pm index f484bc1a8..1d97be99b 100755 --- a/test/lib/pgBackRestTest/BackupTest.pm +++ b/test/lib/pgBackRestTest/BackupTest.pm @@ -963,7 +963,7 @@ sub BackRestTestBackup_Test if ($bNeutralTest && $bRemote) { - $strOptionalParam .= ' --db-timeout=2'; + $strOptionalParam .= ' --protocol-timeout=2 --db-timeout=1'; if ($iThreadMax > 1) { @@ -1019,7 +1019,7 @@ sub BackRestTestBackup_Test { BackRestTestBackup_BackupSynthetic( $strType, $strStanza, \%oManifest, 'protocol timeout', - {strOptionalParam => '--db-timeout=1', + {strOptionalParam => '--protocol-timeout=1 --db-timeout=.1', strTest => TEST_BACKUP_START, fTestDelay => 1, iExpectedExitStatus => ERROR_PROTOCOL_TIMEOUT}); @@ -1640,6 +1640,16 @@ sub BackRestTestBackup_Test executeTest('ls -1R ' . BackRestTestCommon_RepoPathGet() . "/backup/${strStanza}/" . PATH_BACKUP_HISTORY, {oLogTest => $oLogTest, bRemote => $bRemote}); } + + # Test protocol shutdown timeout + #----------------------------------------------------------------------------------------------------------------------- + if ($bNeutralTest && $bRemote) + { + BackRestTestBackup_BackupSynthetic( + $strType, $strStanza, \%oManifest, 'protocol shutdown timeout', + {strOptionalParam => '--protocol-timeout=1 --db-timeout=.5 --log-level-console=warn', + strTest => TEST_PROCESS_EXIT, fTestDelay => 1, bSupplemental => false}); + } } } } diff --git a/test/lib/pgBackRestTest/ConfigTest.pm b/test/lib/pgBackRestTest/ConfigTest.pm index 953938f69..29ffcbc3c 100755 --- a/test/lib/pgBackRestTest/ConfigTest.pm +++ b/test/lib/pgBackRestTest/ConfigTest.pm @@ -153,7 +153,8 @@ sub configLoadExpect } elsif ($iExpectedError == ERROR_OPTION_INVALID_VALUE) { - $strError = "'${strErrorParam1}' is not valid for '${strErrorParam2}' option"; + $strError = "'${strErrorParam1}' is not valid for '${strErrorParam2}' option" . + (defined($strErrorParam3) ? "\nHINT: ${strErrorParam3}." : ''); } elsif ($iExpectedError == ERROR_OPTION_INVALID_RANGE) { @@ -470,6 +471,18 @@ sub BackRestTestConfig_Test configLoadExpect($oOption, CMD_BACKUP, ERROR_OPTION_INVALID_VALUE, BOGUS, OPTION_RETENTION_ARCHIVE_TYPE); } + if (BackRestTestCommon_Run(++$iRun, CMD_BACKUP . ' invalid value ' . OPTION_PROTOCOL_TIMEOUT)) + { + optionSetTest($oOption, OPTION_STANZA, $strStanza); + optionSetTest($oOption, OPTION_DB_PATH, '/db'); + optionSetTest($oOption, OPTION_DB_TIMEOUT, 5); + optionSetTest($oOption, OPTION_PROTOCOL_TIMEOUT, 4); + + configLoadExpect( + $oOption, CMD_BACKUP, ERROR_OPTION_INVALID_VALUE, 4, OPTION_PROTOCOL_TIMEOUT, + "'protocol-timeout' option should be greater than 'db-timeout' option"); + } + if (BackRestTestCommon_Run(++$iRun, CMD_BACKUP . ' valid value ' . OPTION_RETENTION_ARCHIVE_TYPE)) { optionSetTest($oOption, OPTION_STANZA, $strStanza);