1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2024-12-14 10:13:05 +02:00

Improve protocol error handling.

In particular, "stop" errors are no longer reported as "unexpected".
This commit is contained in:
David Steele 2017-10-21 14:11:33 -04:00
parent 1f120f3fce
commit 44c7c0475a
8 changed files with 99 additions and 48 deletions

View File

@ -28,7 +28,7 @@ use pgBackRest::Protocol::Helper;
#################################################################################################################################### ####################################################################################################################################
# Run in eval block to catch errors # Run in eval block to catch errors
#################################################################################################################################### ####################################################################################################################################
local $EVAL_ERROR = undef; eval eval
{ {
################################################################################################################################ ################################################################################################################################
# Load command line parameters and config # Load command line parameters and config
@ -100,17 +100,19 @@ local $EVAL_ERROR = undef; eval
my $oRemote = new pgBackRest::Protocol::Remote::Minion(cfgOption(CFGOPT_BUFFER_SIZE), cfgOption(CFGOPT_PROTOCOL_TIMEOUT)); my $oRemote = new pgBackRest::Protocol::Remote::Minion(cfgOption(CFGOPT_BUFFER_SIZE), cfgOption(CFGOPT_PROTOCOL_TIMEOUT));
# Acquire a remote lock (except for commands that are read-only or local processes) # Acquire a remote lock (except for commands that are read-only or local processes)
my $strLockName;
if (!(cfgOptionTest(CFGOPT_COMMAND, cfgCommandName(CFGCMD_ARCHIVE_GET)) || if (!(cfgOptionTest(CFGOPT_COMMAND, cfgCommandName(CFGCMD_ARCHIVE_GET)) ||
cfgOptionTest(CFGOPT_COMMAND, cfgCommandName(CFGCMD_INFO)) || cfgOptionTest(CFGOPT_COMMAND, cfgCommandName(CFGCMD_INFO)) ||
cfgOptionTest(CFGOPT_COMMAND, cfgCommandName(CFGCMD_RESTORE)) || cfgOptionTest(CFGOPT_COMMAND, cfgCommandName(CFGCMD_RESTORE)) ||
cfgOptionTest(CFGOPT_COMMAND, cfgCommandName(CFGCMD_CHECK)) || cfgOptionTest(CFGOPT_COMMAND, cfgCommandName(CFGCMD_CHECK)) ||
cfgOptionTest(CFGOPT_COMMAND, cfgCommandName(CFGCMD_LOCAL)))) cfgOptionTest(CFGOPT_COMMAND, cfgCommandName(CFGCMD_LOCAL))))
{ {
lockAcquire(cfgOption(CFGOPT_COMMAND), undef, true); $strLockName = cfgOption(CFGOPT_COMMAND);
} }
# Process remote requests # Process remote requests
exitSafe($oRemote->process()); exitSafe($oRemote->process($strLockName));
} }
################################################################################################################################ ################################################################################################################################
@ -280,7 +282,10 @@ local $EVAL_ERROR = undef; eval
#################################################################################################################################### ####################################################################################################################################
or do or do
{ {
exitSafe(undef, $EVAL_ERROR); # Perl 5.10 seems to have a problem propogating errors up through a large call stack, so in the case that the error arrives
# blank just use the last logged error instead. Don't do this in all cases because newer Perls seem to work fine and there are
# other errors that could be arriving in $EVAL_ERROR.
exitSafe(undef, defined($EVAL_ERROR) && length($EVAL_ERROR) > 0 ? $EVAL_ERROR : logErrorLast());
}; };
# uncoverable statement - errors should be handled in the do block above # uncoverable statement - errors should be handled in the do block above

View File

@ -42,6 +42,10 @@
</release-feature-list> </release-feature-list>
<release-refactor-list> <release-refactor-list>
<release-item>
<p>Improve protocol error handling. In particular, <quote>stop</quote> errors are no longer reported as <quote>unexpected</quote>.</p>
</release-item>
<release-item> <release-item>
<p>Update C naming conventions.</p> <p>Update C naming conventions.</p>
</release-item> </release-item>

View File

@ -1855,7 +1855,7 @@
<execute user="backrest" err-expect="62" output="y"> <execute user="backrest" err-expect="62" output="y">
<exe-cmd>{[project-exe]} {[dash]}-stanza={[postgres-cluster-demo]} backup</exe-cmd> <exe-cmd>{[project-exe]} {[dash]}-stanza={[postgres-cluster-demo]} backup</exe-cmd>
<exe-highlight>process .* terminated unexpectedly.*: stop file exists for all stanzas</exe-highlight> <exe-highlight>\: stop file exists for all stanzas</exe-highlight>
</execute> </execute>
</execute-list> </execute-list>
@ -1896,7 +1896,7 @@
<execute user="backrest" err-expect="62" output="y"> <execute user="backrest" err-expect="62" output="y">
<exe-cmd>{[project-exe]} {[dash]}-stanza={[postgres-cluster-demo]} backup</exe-cmd> <exe-cmd>{[project-exe]} {[dash]}-stanza={[postgres-cluster-demo]} backup</exe-cmd>
<exe-highlight>process .* terminated unexpectedly.*: stop file exists for stanza demo</exe-highlight> <exe-highlight>\: stop file exists for stanza demo</exe-highlight>
</execute> </execute>
</execute-list> </execute-list>

View File

@ -112,7 +112,7 @@ sub error
while (defined(my $strLine = $oIoError->readLine(true, false))) while (defined(my $strLine = $oIoError->readLine(true, false)))
{ {
$strError .= "${strLine}\n"; $strError .= (defined($strError) ? "\n" : '') . $strLine;
} }
delete($self->{iProcessId}); delete($self->{iProcessId});

View File

@ -84,6 +84,9 @@ my $bLogDisable = 0;
# Allow errors to be logged as warnings # Allow errors to be logged as warnings
my $bLogWarnOnError = 0; my $bLogWarnOnError = 0;
# Store the last logged error
my $oErrorLast;
# Test globals # Test globals
my $bTest = false; my $bTest = false;
my $fTestDelay; my $fTestDelay;
@ -588,7 +591,7 @@ sub logErrorResult
my $strMessage = shift; my $strMessage = shift;
my $strResult = shift; my $strResult = shift;
confess &log(ERROR, $strMessage . (defined($strResult) ? ": $strResult" : ''), $iCode); confess &log(ERROR, $strMessage . (defined($strResult) ? ': ' . trim($strResult) : ''), $iCode);
} }
push @EXPORT, qw(logErrorResult); push @EXPORT, qw(logErrorResult);
@ -776,18 +779,29 @@ sub log
} }
} }
# Throw a typed exception if code is defined # Return a typed exception if code is defined
if (defined($iCode)) if (defined($iCode))
{ {
return new pgBackRest::Common::Exception($strLevel, $iCode, $strMessage, longmess(), $rExtra); $oErrorLast = new pgBackRest::Common::Exception($strLevel, $iCode, $strMessage, longmess(), $rExtra);
return $oErrorLast;
} }
# Return the message test so it can be used in a confess # Return the message so it can be used in a confess
return $strMessage; return $strMessage;
} }
push @EXPORT, qw(log); push @EXPORT, qw(log);
####################################################################################################################################
# logErrorLast - get the last logged error
####################################################################################################################################
sub logErrorLast
{
return $oErrorLast;
}
push @EXPORT, qw(logErrorLast);
#################################################################################################################################### ####################################################################################################################################
# testSet # testSet
# #

View File

@ -55,9 +55,6 @@ sub new
# Create JSON object # Create JSON object
$self->{oJSON} = JSON::PP->new()->allow_nonref(); $self->{oJSON} = JSON::PP->new()->allow_nonref();
# Check greeting to be sure the protocol matches
$self->greetingRead();
# Setup the keepalive timer # Setup the keepalive timer
$self->{fKeepAliveTimeout} = $self->io()->timeout() / 2 > 120 ? 120 : $self->io()->timeout() / 2; $self->{fKeepAliveTimeout} = $self->io()->timeout() / 2 > 120 ? 120 : $self->io()->timeout() / 2;
$self->{fKeepAliveTime} = gettimeofday(); $self->{fKeepAliveTime} = gettimeofday();
@ -65,6 +62,9 @@ sub new
# Set the error prefix used when raising error messages # Set the error prefix used when raising error messages
$self->{strErrorPrefix} = 'raised on ' . $self->{strId} . ' host'; $self->{strErrorPrefix} = 'raised on ' . $self->{strId} . ' host';
# Check greeting to be sure the protocol matches
$self->greetingRead();
# Return from function and log return values if any # Return from function and log return values if any
return logDebugReturn return logDebugReturn
( (

View File

@ -14,6 +14,7 @@ use JSON::PP;
use pgBackRest::Common::Exception; use pgBackRest::Common::Exception;
use pgBackRest::Common::Ini; use pgBackRest::Common::Ini;
use pgBackRest::Common::Lock;
use pgBackRest::Common::Log; use pgBackRest::Common::Log;
use pgBackRest::Common::String; use pgBackRest::Common::String;
use pgBackRest::Protocol::Base::Master; use pgBackRest::Protocol::Base::Master;
@ -133,50 +134,79 @@ sub cmdRead
sub process sub process
{ {
my $self = shift; my $self = shift;
my $strLockName = shift;
# Reset stderr log level so random errors do not get output # Reset stderr log level so random errors do not get output
logLevelSet(undef, undef, OFF); logLevelSet(undef, undef, OFF);
# A permanent error will be returned from every command once it has been set. In some cases this is a more graceful way to
# exit than a hard error.
my $oPermanentError;
# Loop until the exit command is received # Loop until the exit command is received
eval eval
{ {
# Aquire a lock if a lock name is defined. This is done here so any errors will be transmitted through the protocol layer
# and cause a graceful shutdown rather than a remote abort.
if (defined($strLockName))
{
eval
{
lockAcquire($strLockName, undef, true);
return true;
}
or do
{
$oPermanentError = $EVAL_ERROR;
}
}
while (true) while (true)
{ {
my ($strCommand, $rParam) = $self->cmdRead(); my ($strCommand, $rParam) = $self->cmdRead();
last if ($strCommand eq OP_EXIT); last if ($strCommand eq OP_EXIT);
eval # If permanent error is set then always return it
if (defined($oPermanentError))
{ {
# Check for the command in the map and run it if found $self->errorWrite($oPermanentError);
if (defined($self->{hCommandMap}{$strCommand}))
{
$self->outputWrite($self->{hCommandMap}{$strCommand}->($rParam));
}
# Run the standard NOOP command. This this can be overridden in hCommandMap to implement a custom NOOP.
elsif ($strCommand eq OP_NOOP)
{
protocolKeepAlive();
$self->outputWrite();
}
else
{
confess "invalid command: ${strCommand}";
}
# Run the post command if defined
if (defined($self->{hCommandMap}{&OP_POST}))
{
$self->{hCommandMap}{&OP_POST}->();
}
return true;
} }
# Process errors # Else process as usual
or do else
{ {
$self->errorWrite($EVAL_ERROR); eval
}; {
# Check for the command in the map and run it if found
if (defined($self->{hCommandMap}{$strCommand}))
{
$self->outputWrite($self->{hCommandMap}{$strCommand}->($rParam));
}
# Run the standard NOOP command. This this can be overridden in hCommandMap to implement a custom NOOP.
elsif ($strCommand eq OP_NOOP)
{
protocolKeepAlive();
$self->outputWrite();
}
else
{
confess "invalid command: ${strCommand}";
}
# Run the post command if defined
if (defined($self->{hCommandMap}{&OP_POST}))
{
$self->{hCommandMap}{&OP_POST}->();
}
return true;
}
# Process errors
or do
{
$self->errorWrite($EVAL_ERROR);
};
}
} }
return true; return true;

View File

@ -513,7 +513,6 @@ P00 TEST: PgBaCkReStTeSt-BACKUP-START-PgBaCkReStTeSt
P00 DEBUG: Protocol::Helper::protocolGet(): bCache = <true>, iProcessIdx = [undef], iRemoteIdx = 1, strBackRestBin = [undef], strCommand = <backup>, strRemoteType = db P00 DEBUG: Protocol::Helper::protocolGet(): bCache = <true>, iProcessIdx = [undef], iRemoteIdx = 1, strBackRestBin = [undef], strCommand = <backup>, strRemoteType = db
P00 DEBUG: Protocol::Helper::protocolGet: found cached protocol P00 DEBUG: Protocol::Helper::protocolGet: found cached protocol
P00 ERROR: [042]: process 'db-master remote' terminated unexpectedly [042]: unable to read line after 1 second(s) from stdio P00 ERROR: [042]: process 'db-master remote' terminated unexpectedly [042]: unable to read line after 1 second(s) from stdio
P00 DEBUG: Common::Exit::exitSafe(): iExitCode = [undef], oException = [object], strSignal = [undef] P00 DEBUG: Common::Exit::exitSafe(): iExitCode = [undef], oException = [object], strSignal = [undef]
P00 DEBUG: Protocol::Helper::protocolDestroy(): bComplete = false, iRemoteIdx = [undef], strRemoteType = [undef] P00 DEBUG: Protocol::Helper::protocolDestroy(): bComplete = false, iRemoteIdx = [undef], strRemoteType = [undef]
P00 DEBUG: Protocol::Helper::protocolDestroy: found cached protocol: iRemoteIdx = 1, strRemoteType = db P00 DEBUG: Protocol::Helper::protocolDestroy: found cached protocol: iRemoteIdx = 1, strRemoteType = db
@ -611,7 +610,6 @@ P00 TEST: PgBaCkReStTeSt-BACKUP-START-PgBaCkReStTeSt
P00 DEBUG: Protocol::Helper::protocolGet(): bCache = <true>, iProcessIdx = [undef], iRemoteIdx = 1, strBackRestBin = [undef], strCommand = <backup>, strRemoteType = db P00 DEBUG: Protocol::Helper::protocolGet(): bCache = <true>, iProcessIdx = [undef], iRemoteIdx = 1, strBackRestBin = [undef], strCommand = <backup>, strRemoteType = db
P00 DEBUG: Protocol::Helper::protocolGet: found cached protocol P00 DEBUG: Protocol::Helper::protocolGet: found cached protocol
P00 ERROR: [063]: process 'db-master remote' terminated unexpectedly [063]: terminated on signal [SIGTERM] P00 ERROR: [063]: process 'db-master remote' terminated unexpectedly [063]: terminated on signal [SIGTERM]
P00 DEBUG: Common::Exit::exitSafe(): iExitCode = [undef], oException = [object], strSignal = [undef] P00 DEBUG: Common::Exit::exitSafe(): iExitCode = [undef], oException = [object], strSignal = [undef]
P00 DEBUG: Protocol::Helper::protocolDestroy(): bComplete = false, iRemoteIdx = [undef], strRemoteType = [undef] P00 DEBUG: Protocol::Helper::protocolDestroy(): bComplete = false, iRemoteIdx = [undef], strRemoteType = [undef]
P00 DEBUG: Protocol::Helper::protocolDestroy: found cached protocol: iRemoteIdx = 1, strRemoteType = db P00 DEBUG: Protocol::Helper::protocolDestroy: found cached protocol: iRemoteIdx = 1, strRemoteType = db
@ -657,8 +655,8 @@ P00 DEBUG: Protocol::Helper::protocolGet(): bCache = <true>, iProcessIdx =
P00 DEBUG: Protocol::Helper::protocolGet: create (cached) remote protocol P00 DEBUG: Protocol::Helper::protocolGet: create (cached) remote protocol
P00 DEBUG: Protocol::Remote::Master->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 60, iSshPort = [undef], strCommand = [BACKREST-BIN] --buffer-size=4194304 --command=backup --compress-level=6 --compress-level-network=3 --config=[TEST_PATH]/db-master/pgbackrest.conf --db-timeout=45 --db1-path=[TEST_PATH]/db-master/db/base --protocol-timeout=60 --repo-path=[TEST_PATH]/backup/repo --stanza=db --type=db remote, strCommandSSH = ssh, strHost = db-master, strUser = [USER-1] P00 DEBUG: Protocol::Remote::Master->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 60, iSshPort = [undef], strCommand = [BACKREST-BIN] --buffer-size=4194304 --command=backup --compress-level=6 --compress-level-network=3 --config=[TEST_PATH]/db-master/pgbackrest.conf --db-timeout=45 --db1-path=[TEST_PATH]/db-master/db/base --protocol-timeout=60 --repo-path=[TEST_PATH]/backup/repo --stanza=db --type=db remote, strCommandSSH = ssh, strHost = db-master, strUser = [USER-1]
P00 DEBUG: Protocol::Command::Master->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 60, strCommand = ssh -o LogLevel=error -o Compression=no -o PasswordAuthentication=no [USER-1]@db-master '[BACKREST-BIN] --buffer-size=4194304 --command=backup --compress-level=6 --compress-level-network=3 --config=[TEST_PATH]/db-master/pgbackrest.conf --db-timeout=45 --db1-path=[TEST_PATH]/db-master/db/base --protocol-timeout=60 --repo-path=[TEST_PATH]/backup/repo --stanza=db --type=db remote', strId = 'db-master remote', strName = remote P00 DEBUG: Protocol::Command::Master->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 60, strCommand = ssh -o LogLevel=error -o Compression=no -o PasswordAuthentication=no [USER-1]@db-master '[BACKREST-BIN] --buffer-size=4194304 --command=backup --compress-level=6 --compress-level-network=3 --config=[TEST_PATH]/db-master/pgbackrest.conf --db-timeout=45 --db1-path=[TEST_PATH]/db-master/db/base --protocol-timeout=60 --repo-path=[TEST_PATH]/backup/repo --stanza=db --type=db remote', strId = 'db-master remote', strName = remote
P00 ERROR: [062]: process 'db-master remote' terminated unexpectedly [062]: stop file exists for all stanzas P00 ERROR: [062]: raised on 'db-master remote' host: stop file exists for all stanzas
P00 DEBUG: Protocol::Command::Master->close=>: iExitStatus = 0
P00 DEBUG: Common::Exit::exitSafe(): iExitCode = [undef], oException = [object], strSignal = [undef] P00 DEBUG: Common::Exit::exitSafe(): iExitCode = [undef], oException = [object], strSignal = [undef]
P00 DEBUG: Protocol::Helper::protocolDestroy(): bComplete = false, iRemoteIdx = [undef], strRemoteType = [undef] P00 DEBUG: Protocol::Helper::protocolDestroy(): bComplete = false, iRemoteIdx = [undef], strRemoteType = [undef]
P00 DEBUG: Protocol::Helper::protocolDestroy=>: iExitStatus = 0 P00 DEBUG: Protocol::Helper::protocolDestroy=>: iExitStatus = 0
@ -728,8 +726,8 @@ P00 DEBUG: Protocol::Helper::protocolGet(): bCache = <true>, iProcessIdx =
P00 DEBUG: Protocol::Helper::protocolGet: create (cached) remote protocol P00 DEBUG: Protocol::Helper::protocolGet: create (cached) remote protocol
P00 DEBUG: Protocol::Remote::Master->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 60, iSshPort = [undef], strCommand = [BACKREST-BIN] --buffer-size=4194304 --command=backup --compress-level=6 --compress-level-network=3 --config=[TEST_PATH]/db-master/pgbackrest.conf --db-timeout=45 --db1-path=[TEST_PATH]/db-master/db/base --protocol-timeout=60 --repo-path=[TEST_PATH]/backup/repo --stanza=db --type=db remote, strCommandSSH = ssh, strHost = db-master, strUser = [USER-1] P00 DEBUG: Protocol::Remote::Master->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 60, iSshPort = [undef], strCommand = [BACKREST-BIN] --buffer-size=4194304 --command=backup --compress-level=6 --compress-level-network=3 --config=[TEST_PATH]/db-master/pgbackrest.conf --db-timeout=45 --db1-path=[TEST_PATH]/db-master/db/base --protocol-timeout=60 --repo-path=[TEST_PATH]/backup/repo --stanza=db --type=db remote, strCommandSSH = ssh, strHost = db-master, strUser = [USER-1]
P00 DEBUG: Protocol::Command::Master->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 60, strCommand = ssh -o LogLevel=error -o Compression=no -o PasswordAuthentication=no [USER-1]@db-master '[BACKREST-BIN] --buffer-size=4194304 --command=backup --compress-level=6 --compress-level-network=3 --config=[TEST_PATH]/db-master/pgbackrest.conf --db-timeout=45 --db1-path=[TEST_PATH]/db-master/db/base --protocol-timeout=60 --repo-path=[TEST_PATH]/backup/repo --stanza=db --type=db remote', strId = 'db-master remote', strName = remote P00 DEBUG: Protocol::Command::Master->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 60, strCommand = ssh -o LogLevel=error -o Compression=no -o PasswordAuthentication=no [USER-1]@db-master '[BACKREST-BIN] --buffer-size=4194304 --command=backup --compress-level=6 --compress-level-network=3 --config=[TEST_PATH]/db-master/pgbackrest.conf --db-timeout=45 --db1-path=[TEST_PATH]/db-master/db/base --protocol-timeout=60 --repo-path=[TEST_PATH]/backup/repo --stanza=db --type=db remote', strId = 'db-master remote', strName = remote
P00 ERROR: [062]: process 'db-master remote' terminated unexpectedly [062]: stop file exists for stanza db P00 ERROR: [062]: raised on 'db-master remote' host: stop file exists for stanza db
P00 DEBUG: Protocol::Command::Master->close=>: iExitStatus = 0
P00 DEBUG: Common::Exit::exitSafe(): iExitCode = [undef], oException = [object], strSignal = [undef] P00 DEBUG: Common::Exit::exitSafe(): iExitCode = [undef], oException = [object], strSignal = [undef]
P00 DEBUG: Protocol::Helper::protocolDestroy(): bComplete = false, iRemoteIdx = [undef], strRemoteType = [undef] P00 DEBUG: Protocol::Helper::protocolDestroy(): bComplete = false, iRemoteIdx = [undef], strRemoteType = [undef]
P00 DEBUG: Protocol::Helper::protocolDestroy=>: iExitStatus = 0 P00 DEBUG: Protocol::Helper::protocolDestroy=>: iExitStatus = 0