1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2026-05-22 10:15:16 +02:00

Fixed an issue where longer-running backups/restores would timeout when remote and threaded.

Keepalives are now used to make sure the remote for the main process does not timeout while the thread remotes do all the work.  The error messages for timeouts was also improved to make debugging easier.
This commit is contained in:
David Steele
2015-12-24 10:32:25 -05:00
parent b0a6954671
commit ba098d7b91
14 changed files with 167 additions and 29 deletions
+30 -2
View File
@@ -848,12 +848,40 @@ sub BackRestTestBackup_Test
# Full backup
#-----------------------------------------------------------------------------------------------------------------------
my $strType = 'full';
my $strOptionalParam = '--manifest-save-threshold=3';
my $strTestPoint;
if ($bNeutralTest && $bRemote)
{
$strOptionalParam .= ' --db-timeout=2';
if ($iThreadMax > 1)
{
$strTestPoint = TEST_KEEP_ALIVE;
}
}
BackRestTestBackup_ManifestLinkCreate(\%oManifest, 'base', 'link-test', '/test');
BackRestTestBackup_ManifestPathCreate(\%oManifest, 'base', 'path-test');
my $strFullBackup = BackRestTestBackup_BackupSynthetic($strType, $strStanza, \%oManifest, undef,
{strOptionalParam => '--manifest-save-threshold=3'});
my $strFullBackup = BackRestTestBackup_BackupSynthetic(
$strType, $strStanza, \%oManifest, undef,
{strOptionalParam => $strOptionalParam, strTest => $strTestPoint, fTestDelay => 0});
# Test protocol timeout
#-----------------------------------------------------------------------------------------------------------------------
if ($bNeutralTest && $bRemote)
{
BackRestTestBackup_BackupSynthetic(
$strType, $strStanza, \%oManifest, 'protocol timeout',
{strOptionalParam => '--db-timeout=1',
strTest => TEST_BACKUP_START,
fTestDelay => 1,
iExpectedExitStatus => ERROR_PROTOCOL_TIMEOUT});
# Remove the aborted backup so the next backup is not a resume
BackRestTestCommon_PathRemove(BackRestTestCommon_RepoPathGet() . "/temp/${strStanza}.tmp", $bRemote);
}
# Stop operations and make sure the correct error occurs
#-----------------------------------------------------------------------------------------------------------------------
+7 -2
View File
@@ -214,9 +214,14 @@ sub endRetry
}
# This is a hack to make regression tests pass even when threaded backup/restore sometimes return 255
if ($self->{iExpectedExitStatus} == -1 && ($iExitStatus == 0 || $iExitStatus == 255))
if ($self->{iExpectedExitStatus} == -1)
{
return 0;
if ($iExitStatus == 0 || $iExitStatus == 255)
{
return 0;
}
$self->{iExpectedExitStatus} = 0;
}
if ($iExitStatus != 0 || ($self->{iExpectedExitStatus} != 0 && $iExitStatus != $self->{iExpectedExitStatus}))
+42 -5
View File
@@ -2,14 +2,14 @@ run 005 - rmt 1, cmp 0, hardlink 0
==================================
full backup
> [BACKREST_BIN] --config=[TEST_PATH]/backrest/pg_backrest.conf --no-start-stop --manifest-save-threshold=3 --type=full --stanza=db backup
> [BACKREST_BIN] --config=[TEST_PATH]/backrest/pg_backrest.conf --no-start-stop --manifest-save-threshold=3 --db-timeout=2 --type=full --stanza=db backup
------------------------------------------------------------------------------------------------------------------------------------
INFO: backup start: --cmd-remote=[BACKREST_BIN] --no-compress --config=[TEST_PATH]/backrest/pg_backrest.conf --config-remote=[TEST_PATH]/db/pg_backrest.conf --db-host=127.0.0.1 --db-path=[TEST_PATH]/db/common --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-user=vagrant --log-level-console=debug --log-level-file=trace --manifest-save-threshold=3 --no-start-stop --repo-path=[TEST_PATH]/backrest --repo-remote-path=[TEST_PATH]/backrest --stanza=db --start-fast --type=full
INFO: backup start: --cmd-remote=[BACKREST_BIN] --no-compress --config=[TEST_PATH]/backrest/pg_backrest.conf --config-remote=[TEST_PATH]/db/pg_backrest.conf --db-host=127.0.0.1 --db-path=[TEST_PATH]/db/common --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=2 --db-user=vagrant --log-level-console=debug --log-level-file=trace --manifest-save-threshold=3 --no-start-stop --repo-path=[TEST_PATH]/backrest --repo-remote-path=[TEST_PATH]/backrest --stanza=db --start-fast --type=full
DEBUG: Common:::Lock::lockAquire(): bFailOnNoLock = <true>, bRemote = <false>, iProcessIdx = [undef], strLockType = backup
DEBUG: Common:::Lock::lockStopTest(): strRepoPath = [TEST_PATH]/backrest
DEBUG: Common:::Lock::lockAquire=>: bResult = true
DEBUG: Protocol::RemoteMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 1830, strCommand = [BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pg_backrest.conf --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --repo-remote-path=[TEST_PATH]/backrest --stanza=db remote, strHost = 127.0.0.1, strUser = [USER-1]
DEBUG: Protocol::CommonMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 1830, strCommand = ssh -o Compression=no -o PasswordAuthentication=no vagrant@127.0.0.1 '[BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pg_backrest.conf --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --repo-remote-path=[TEST_PATH]/backrest --stanza=db remote', strName = remote
DEBUG: Protocol::RemoteMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 2, strCommand = [BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pg_backrest.conf --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=2 --repo-remote-path=[TEST_PATH]/backrest --stanza=db remote, strHost = 127.0.0.1, strUser = [USER-1]
DEBUG: Protocol::CommonMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 2, strCommand = ssh -o Compression=no -o PasswordAuthentication=no vagrant@127.0.0.1 '[BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pg_backrest.conf --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=2 --repo-remote-path=[TEST_PATH]/backrest --stanza=db remote', strName = remote
DEBUG: File->new(): iThreadIdx = [undef], oProtocol = [object], strBackupPath = [TEST_PATH]/backrest, strDefaultFileMode = <0640>, strDefaultPathMode = <0750>, strRemote = db, strStanza = db
DEBUG: File->pathCreate(): bIgnoreExists = true, strMode = <0750>, strPath = [undef], strPathType = backup:cluster
DEBUG: File->exists(): strPath = [undef], strPathType = backup:cluster
@@ -54,7 +54,7 @@ DEBUG: File->remove=>: bRemoved = false
DEBUG: File->linkCreate(): bHard = <false>, bPathCreate = <true>, bRelative = true, strDestinationFile = latest, strDestinationPathType = backup:cluster, strSourceFile = [BACKUP-FULL-1], strSourcePathType = backup:cluster
DEBUG: File->pathCreate(): bIgnoreExists = <false>, strMode = <0750>, strPath = [TEST_PATH]/backrest/backup/db, strPathType = backup:absolute
INFO: backup stop
INFO: expire start: --no-compress --config=[TEST_PATH]/backrest/pg_backrest.conf --log-level-console=debug --log-level-file=trace --repo-path=[TEST_PATH]/backrest --stanza=db
INFO: expire start: --no-compress --config=[TEST_PATH]/backrest/pg_backrest.conf --db-timeout=2 --log-level-console=debug --log-level-file=trace --repo-path=[TEST_PATH]/backrest --stanza=db
DEBUG: File->new(): iThreadIdx = [undef], oProtocol = [object], strBackupPath = [TEST_PATH]/backrest, strDefaultFileMode = <0640>, strDefaultPathMode = <0750>, strRemote = db, strStanza = db
DEBUG: BackupInfo->new(): strBackupClusterPath = [TEST_PATH]/backrest/backup/db
INFO: archive retention type not set - archive logs will not be expired
@@ -190,6 +190,43 @@ db-version="9.3"
[db:history]
1={"db-catalog-version":201306121,"db-control-version":937,"db-system-id":6156904820763115222,"db-version":"9.3"}
full backup (protocol timeout)
> [BACKREST_BIN] --config=[TEST_PATH]/backrest/pg_backrest.conf --no-start-stop --db-timeout=1 --type=full --stanza=db backup --test --test-delay=1 --test-point=backup-start=y
------------------------------------------------------------------------------------------------------------------------------------
INFO: backup start: --cmd-remote=[BACKREST_BIN] --no-compress --config=[TEST_PATH]/backrest/pg_backrest.conf --config-remote=[TEST_PATH]/db/pg_backrest.conf --db-host=127.0.0.1 --db-path=[TEST_PATH]/db/common --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=1 --db-user=vagrant --log-level-console=debug --log-level-file=trace --no-start-stop --repo-path=[TEST_PATH]/backrest --repo-remote-path=[TEST_PATH]/backrest --stanza=db --start-fast --test --test-delay=1 --test-point=backup-start=y --type=full
DEBUG: Common:::Lock::lockAquire(): bFailOnNoLock = <true>, bRemote = <false>, iProcessIdx = [undef], strLockType = backup
DEBUG: Common:::Lock::lockStopTest(): strRepoPath = [TEST_PATH]/backrest
DEBUG: Common:::Lock::lockAquire=>: bResult = true
DEBUG: Protocol::RemoteMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 1, strCommand = [BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pg_backrest.conf --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=1 --repo-remote-path=[TEST_PATH]/backrest --stanza=db remote, strHost = 127.0.0.1, strUser = [USER-1]
DEBUG: Protocol::CommonMaster->new(): iBufferMax = 4194304, iCompressLevel = 6, iCompressLevelNetwork = 3, iProtocolTimeout = 1, strCommand = ssh -o Compression=no -o PasswordAuthentication=no vagrant@127.0.0.1 '[BACKREST_BIN] --command=backup --config=[TEST_PATH]/db/pg_backrest.conf --db-port=[PORT-1] --db-socket-path=[TEST_PATH]/db --db-timeout=1 --repo-remote-path=[TEST_PATH]/backrest --stanza=db remote', strName = remote
DEBUG: File->new(): iThreadIdx = [undef], oProtocol = [object], strBackupPath = [TEST_PATH]/backrest, strDefaultFileMode = <0640>, strDefaultPathMode = <0750>, strRemote = db, strStanza = db
DEBUG: File->pathCreate(): bIgnoreExists = true, strMode = <0750>, strPath = [undef], strPathType = backup:cluster
DEBUG: File->exists(): strPath = [undef], strPathType = backup:cluster
DEBUG: File->exists=>: bExists = true
DEBUG: BackupInfo->new(): strBackupClusterPath = [TEST_PATH]/backrest/backup/db
DEBUG: Backup->typeFind(): strBackupClusterPath = [TEST_PATH]/backrest/backup/db, strType = full
DEBUG: Backup->typeFind=>: strLabel = [undef]
DEBUG: Db->info(): oFile = [object], strDbPath = [TEST_PATH]/db/common
DEBUG: Db->info=>: fDbVersion = 9.3, iCatalogVersion = 201306121, iControlVersion = 937, ullDbSysId = 6156904820763115222
DEBUG: File->exists(): strPath = [TEST_PATH]/db/common/postmaster.pid, strPathType = db:absolute
DEBUG: File->exists=>: bExists = false
DEBUG: Manifest->build(): bNoStartStop = true, oFile = [object], oLastManifest = [undef], oTablespaceMapRef = [undef], strDbClusterPath = [TEST_PATH]/db/common, strLevel = [undef]
DEBUG: File->manifest(): oManifestHashRef = [hash], strPath = [TEST_PATH]/db/common/pg_tblspc, strPathType = db:absolute
DEBUG: File->manifest(): oManifestHashRef = [hash], strPath = [TEST_PATH]/db/common, strPathType = db:absolute
DEBUG: File->wait(): strPathType = db:absolute
DEBUG: Backup->process: create temp backup path [TEST_PATH]/backrest/temp/db.tmp
DEBUG: File->pathCreate(): bIgnoreExists = <false>, strMode = <0750>, strPath = [undef], strPathType = backup:tmp
DEBUG: Backup->processManifest(): bCompress = false, bHardLink = false, oBackupManifest = [object], strType = full
DEBUG: File->pathCreate(): bIgnoreExists = <false>, strMode = <0750>, strPath = base/base, strPathType = backup:tmp
DEBUG: File->pathCreate(): bIgnoreExists = <false>, strMode = <0750>, strPath = base/global, strPathType = backup:tmp
DEBUG: File->pathCreate(): bIgnoreExists = <false>, strMode = <0750>, strPath = base/path-test, strPathType = backup:tmp
DEBUG: File->pathCreate(): bIgnoreExists = <false>, strMode = <0750>, strPath = base/pg_tblspc, strPathType = backup:tmp
DEBUG: File->copy(): bAppendChecksum = <false>, bDestinationCompress = false, bDestinationPathCreate = true, bIgnoreMissingSource = true, bSourceCompressed = false, lModificationTime = [MODIFICATION-TIME-1], strDestinationFile = base/global/pg_control, strDestinationPathType = backup:tmp, strGroup = [undef], strMode = <0640>, strSourceFile = [TEST_PATH]/db/common/global/pg_control, strSourcePathType = db:absolute, strUser = [undef]
ERROR: [141]: remote process terminated: ERROR [141]: unable to read line after 1 seconds
DEBUG: Exit::exitSafe(): iExitCode = 141, strSignal = [undef]
INFO: backup stop
DEBUG: Common:::Lock::lockRelease(): bFailOnNoLock = false
> [BACKREST_BIN] --config=[TEST_PATH]/db/pg_backrest.conf --force stop
------------------------------------------------------------------------------------------------------------------------------------
INFO: stop start: --config=[TEST_PATH]/db/pg_backrest.conf --force --log-level-console=debug --log-level-file=trace --repo-path=[TEST_PATH]/local