1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2024-12-14 10:13:05 +02:00

Added an optional delay after manifest build so that files are not copied in the same second that the manifest is built. This can result in (admittedly unlikely) race conditions that can produce an invalid backup. I was also able to reduce the sleep types when waiting for thread termination - so unit test times are improved by almost 100%.

This commit is contained in:
David Steele 2015-01-02 14:18:07 -05:00
parent 297b22cb2b
commit 2e080eedb8
3 changed files with 27 additions and 5 deletions

View File

@ -177,7 +177,7 @@ sub backup_thread_complete
{
# !!! This should be shorter - currently it is this to be sure that backups to not happen more than once a second and cause
# path naming conflicts
sleep(1);
sleep(.1);
# If a timeout has been defined, make sure we have not been running longer than that
if (defined($iTimeout))
@ -823,7 +823,7 @@ sub backup_manifest_build
my %oManifestHash;
$oFile->manifest(PATH_DB_ABSOLUTE, $strDbClusterPath, \%oManifestHash);
$oFile->manifest(PATH_DB_ABSOLUTE, $strDbClusterPath, \%oManifestHash, true);
foreach my $strName (sort(keys $oManifestHash{name}))
{

View File

@ -8,6 +8,7 @@ use strict;
use warnings;
use Carp;
use POSIX;
use Net::OpenSSH;
use File::Basename;
use File::Copy qw(cp);
@ -18,6 +19,7 @@ use Fcntl ':mode';
use IO::Compress::Gzip qw(gzip $GzipError);
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use IO::String;
use Time::HiRes qw/gettimeofday usleep/;
use lib dirname($0) . '/../lib';
use BackRest::Exception;
@ -961,6 +963,10 @@ sub manifest
my $strPathType = shift;
my $strPath = shift;
my $oManifestHashRef = shift;
my $bPause = shift; # Wait until next second before returning?
# Set defaults
$bPause = defined($bPause) ? ($bPause ? true : false) : false;
# Set operation variables
my $strPathOp = $self->path_get($strPathType, $strPath);
@ -990,6 +996,22 @@ sub manifest
else
{
$self->manifest_recurse($strPathType, $strPathOp, undef, 0, $oManifestHashRef, $strDebug);
# If pause is requested then sleep into the next clock second. The manifest is being built to determine which files to
# copy, but sometimes the files can be modified after the manifest is built but before a second has elapsed. In this case
# there is a small window where an earlier version of the file might be copied and a later version of the file will have
# the same size/timestamp. By waiting this race condition is eliminated. Waiting a partial second must be done on the db
# side to be correct (which is why pause is not in the backup code). The wait could also be a full second but that has an
# impact on how long it takes to run unit tests, and this is simple enough.
if ($bPause)
{
my $lTimeBegin = gettimeofday();
my $lSleepMs = ceil(((int($lTimeBegin) + 1) - $lTimeBegin) * 1000);
usleep($lSleepMs * 1000);
&log(DEBUG, "slept ${lSleepMs}ms after manifest: begin ${lTimeBegin}, end " . gettimeofday());
}
}
}

View File

@ -1504,7 +1504,7 @@ sub BackRestTestBackup_Test
# Construct filename to test
my $strFile = BackRestTestCommon_DbCommonPathGet() . "/base";
# Get the oid of the user db
# Get the oid of the postgres db
my $strSql = "select oid from pg_database where datname = 'postgres'";
my $hStatement = $hDb->prepare($strSql);
@ -1523,7 +1523,7 @@ sub BackRestTestBackup_Test
$hStatement->execute() or
confess &log(ERROR, "Unable to execute: ${strSql}");
my @oyRow = $hStatement->fetchrow_array();
@oyRow = $hStatement->fetchrow_array();
$strFile .= '/' . $oyRow[0];
&log(INFO, 'table filename = ' . $strFile);
@ -1532,7 +1532,7 @@ sub BackRestTestBackup_Test
BackRestTestBackup_PgExecute("select pg_start_backup('test');");
# File modified in the same second that the manifest is taken and file is copied
# File modified in the same second after the manifest is taken and file is copied
while ($iRun < $iRunMax)
{
# Increment the run, log, and decide whether this unit test should be run