You've already forked pgbackrest
mirror of
https://github.com/pgbackrest/pgbackrest.git
synced 2025-07-15 01:04:37 +02:00
Add retries to PostgreSQL sleep when starting a backup.
Inaccuracies in sleep time or clock skew might make a single sleep insufficient to reach the next second. Add a few retries to make the process more reliable but still avoid an infinite loop if something is seriously wrong.
This commit is contained in:
@ -89,6 +89,15 @@
|
|||||||
|
|
||||||
<p>Improve performance of large file lists in <cmd>backup</cmd>/<cmd>restore</cmd> commands.</p>
|
<p>Improve performance of large file lists in <cmd>backup</cmd>/<cmd>restore</cmd> commands.</p>
|
||||||
</release-item>
|
</release-item>
|
||||||
|
|
||||||
|
<release-item>
|
||||||
|
<release-item-contributor-list>
|
||||||
|
<release-item-ideator id="vitaliy.kukharik"/>
|
||||||
|
<release-item-reviewer id="cynthia.shang"/>
|
||||||
|
</release-item-contributor-list>
|
||||||
|
|
||||||
|
<p>Add retries to <postgres/> sleep when starting a backup.</p>
|
||||||
|
</release-item>
|
||||||
</release-improvement-list>
|
</release-improvement-list>
|
||||||
|
|
||||||
<release-development-list>
|
<release-development-list>
|
||||||
|
@ -324,11 +324,25 @@ backupTime(BackupData *backupData, bool waitRemainder)
|
|||||||
// Sleep the remainder of the second when requested (this is so copyStart is not subject to one second resolution issues)
|
// Sleep the remainder of the second when requested (this is so copyStart is not subject to one second resolution issues)
|
||||||
if (waitRemainder)
|
if (waitRemainder)
|
||||||
{
|
{
|
||||||
sleepMSec(MSEC_PER_SEC - (timeMSec % MSEC_PER_SEC));
|
unsigned int retry = 0;
|
||||||
|
|
||||||
|
// Just to be safe we'll loop until PostgreSQL reports that we have slept long enough
|
||||||
|
do
|
||||||
|
{
|
||||||
|
// Error if the clock has not advanced after several attempts
|
||||||
|
if (retry == 3)
|
||||||
|
THROW_FMT(KernelError, PG_NAME " clock has not advanced to the next second after %u tries", retry);
|
||||||
|
|
||||||
|
// Sleep remainder of current second
|
||||||
|
sleepMSec(((TimeMSec)(result + 1) * MSEC_PER_SEC) - timeMSec);
|
||||||
|
|
||||||
// Check time again to be sure we slept long enough
|
// Check time again to be sure we slept long enough
|
||||||
if (result >= (time_t)(dbTimeMSec(backupData->dbPrimary) / MSEC_PER_SEC))
|
timeMSec = dbTimeMSec(backupData->dbPrimary);
|
||||||
THROW(AssertError, "invalid sleep for online backup time with wait remainder");
|
|
||||||
|
// Increment retry to prevent an infinite loop
|
||||||
|
retry++;
|
||||||
|
}
|
||||||
|
while ((time_t)(timeMSec / MSEC_PER_SEC) <= result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1206,7 +1206,7 @@ testRun(void)
|
|||||||
const String *repoPath = strNewFmt("%s/repo", testPath());
|
const String *repoPath = strNewFmt("%s/repo", testPath());
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------------------------------------------
|
||||||
TEST_TITLE("error when second does not advance after sleep");
|
TEST_TITLE("sleep retries and stall error");
|
||||||
|
|
||||||
StringList *argList = strLstNew();
|
StringList *argList = strLstNew();
|
||||||
strLstAddZ(argList, "--" CFGOPT_STANZA "=test1");
|
strLstAddZ(argList, "--" CFGOPT_STANZA "=test1");
|
||||||
@ -1225,9 +1225,16 @@ testRun(void)
|
|||||||
// Connect to primary
|
// Connect to primary
|
||||||
HRNPQ_MACRO_OPEN_GE_92(1, "dbname='postgres' port=5432", PG_VERSION_93, strZ(pg1Path), false, NULL, NULL),
|
HRNPQ_MACRO_OPEN_GE_92(1, "dbname='postgres' port=5432", PG_VERSION_93, strZ(pg1Path), false, NULL, NULL),
|
||||||
|
|
||||||
// Don't advance time after wait
|
// Advance the time slowly to force retries
|
||||||
HRNPQ_MACRO_TIME_QUERY(1, 1575392588998),
|
HRNPQ_MACRO_TIME_QUERY(1, 1575392588998),
|
||||||
HRNPQ_MACRO_TIME_QUERY(1, 1575392588999),
|
HRNPQ_MACRO_TIME_QUERY(1, 1575392588999),
|
||||||
|
HRNPQ_MACRO_TIME_QUERY(1, 1575392589001),
|
||||||
|
|
||||||
|
// Stall time to force an error
|
||||||
|
HRNPQ_MACRO_TIME_QUERY(1, 1575392589998),
|
||||||
|
HRNPQ_MACRO_TIME_QUERY(1, 1575392589997),
|
||||||
|
HRNPQ_MACRO_TIME_QUERY(1, 1575392589998),
|
||||||
|
HRNPQ_MACRO_TIME_QUERY(1, 1575392589999),
|
||||||
|
|
||||||
HRNPQ_MACRO_DONE()
|
HRNPQ_MACRO_DONE()
|
||||||
});
|
});
|
||||||
@ -1235,7 +1242,9 @@ testRun(void)
|
|||||||
BackupData *backupData = backupInit(
|
BackupData *backupData = backupInit(
|
||||||
infoBackupNew(PG_VERSION_93, PG_VERSION_93, pgCatalogTestVersion(PG_VERSION_93), NULL));
|
infoBackupNew(PG_VERSION_93, PG_VERSION_93, pgCatalogTestVersion(PG_VERSION_93), NULL));
|
||||||
|
|
||||||
TEST_ERROR(backupTime(backupData, true), AssertError, "invalid sleep for online backup time with wait remainder");
|
TEST_RESULT_INT(backupTime(backupData, true), 1575392588, "multiple tries for sleep");
|
||||||
|
TEST_ERROR(backupTime(backupData, true), KernelError, "PostgreSQL clock has not advanced to the next second after 3 tries");
|
||||||
|
|
||||||
dbFree(backupData->dbPrimary);
|
dbFree(backupData->dbPrimary);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user