mirror of
https://github.com/pgbackrest/pgbackrest.git
synced 2025-04-17 11:46:39 +02:00
Loop while waiting for checkpoint LSN to reach replay LSN.
It is possible for the checkpoint LSN to lag slightly behind the replay LSN until pg_control has been updated. Add a loop to keep checking rather than failing when the checkpoint LSN has not been updated.
This commit is contained in:
parent
a6691c6f61
commit
8fac1ac50d
@ -72,6 +72,19 @@
|
||||
|
||||
<p>Change level of <cmd>backup</cmd>/<cmd>restore</cmd> copied file logging to detail.</p>
|
||||
</release-item>
|
||||
|
||||
<release-item>
|
||||
<github-issue id="1460"/>
|
||||
<github-pull-request id="1450"/>
|
||||
|
||||
<release-item-contributor-list>
|
||||
<release-item-ideator id="fatih.mencutekin"/>
|
||||
<release-item-contributor id="stefan.fercot"/>
|
||||
<release-item-reviewer id="david.steele"/>
|
||||
</release-item-contributor-list>
|
||||
|
||||
<p>Loop while waiting for checkpoint LSN to reach replay LSN.</p>
|
||||
</release-item>
|
||||
</release-improvement-list>
|
||||
|
||||
<release-development-list>
|
||||
@ -9999,6 +10012,11 @@
|
||||
<contributor-id type="github">evanbenoit</contributor-id>
|
||||
</contributor>
|
||||
|
||||
<contributor id="fatih.mencutekin">
|
||||
<contributor-name-display>Fatih Mencutekin</contributor-name-display>
|
||||
<contributor-id type="github">fatihmtekin</contributor-id>
|
||||
</contributor>
|
||||
|
||||
<contributor id="floris.van.nee">
|
||||
<contributor-name-display>Floris van Nee</contributor-name-display>
|
||||
<contributor-id type="github">fvannee</contributor-id>
|
||||
|
43
src/db/db.c
43
src/db/db.c
@ -567,30 +567,37 @@ dbReplayWait(Db *this, const String *targetLsn, TimeMSec timeout)
|
||||
// Perform a checkpoint
|
||||
dbExec(this, STRDEF("checkpoint"));
|
||||
|
||||
// On PostgreSQL >= 9.6 the checkpoint location can be verified
|
||||
//
|
||||
// ??? We have seen one instance where this check failed. Is there any chance that the replayed position could be ahead of
|
||||
// the checkpoint recorded in pg_control? It seems possible since it would be safer if the checkpoint in pg_control was
|
||||
// behind rather than ahead, so add a loop to keep checking until the checkpoint has been recorded or timeout.
|
||||
// On PostgreSQL >= 9.6 the checkpoint location can be verified so loop until lsn has been reached or timeout
|
||||
if (dbPgVersion(this) >= PG_VERSION_96)
|
||||
{
|
||||
// Build the query
|
||||
const String *query = strNewFmt(
|
||||
"select (checkpoint_%s > '%s')::bool as targetReached,\n"
|
||||
" checkpoint_%s::text as checkpointLsn\n"
|
||||
" from pg_catalog.pg_control_checkpoint()",
|
||||
lsnName, strZ(targetLsn), lsnName);
|
||||
wait = waitNew(timeout);
|
||||
targetReached = false;
|
||||
const String *checkpointLsn = NULL;
|
||||
|
||||
// Execute query
|
||||
VariantList *row = dbQueryRow(this, query);
|
||||
do
|
||||
{
|
||||
// Build the query
|
||||
const String *query = strNewFmt(
|
||||
"select (checkpoint_%s > '%s')::bool as targetReached,\n"
|
||||
" checkpoint_%s::text as checkpointLsn\n"
|
||||
" from pg_catalog.pg_control_checkpoint()",
|
||||
lsnName, strZ(targetLsn), lsnName);
|
||||
|
||||
// Verify target was reached
|
||||
if (!varBool(varLstGet(row, 0)))
|
||||
// Execute the query and get checkpointLsn
|
||||
VariantList *row = dbQueryRow(this, query);
|
||||
targetReached = varBool(varLstGet(row, 0));
|
||||
checkpointLsn = varStr(varLstGet(row, 1));
|
||||
|
||||
protocolKeepAlive();
|
||||
}
|
||||
while (!targetReached && waitMore(wait));
|
||||
|
||||
// Error if a timeout occurred before the target lsn was reached
|
||||
if (!targetReached)
|
||||
{
|
||||
THROW_FMT(
|
||||
ArchiveTimeoutError,
|
||||
"the checkpoint lsn %s is less than the target lsn %s even though the replay lsn is %s",
|
||||
strZ(varStr(varLstGet(row, 1))), strZ(targetLsn), strZ(replayLsn));
|
||||
ArchiveTimeoutError, "timeout before standby checkpoint lsn reached %s - only reached %s", strZ(targetLsn),
|
||||
strZ(checkpointLsn));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -452,14 +452,15 @@ Macros for defining groups of functions that implement various queries and comma
|
||||
#define HRNPQ_MACRO_REPLAY_TARGET_REACHED_GE_10(sessionParam, targetLsnParam, targetReachedParam, reachedLsnParam) \
|
||||
HRNPQ_MACRO_REPLAY_TARGET_REACHED(sessionParam, "wal", "lsn", targetLsnParam, targetReachedParam, reachedLsnParam)
|
||||
|
||||
#define HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED(sessionParam, lsnNameParam, targetLsnParam, targetReachedParam, checkpointLsnParam) \
|
||||
#define HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED( \
|
||||
sessionParam, lsnNameParam, targetLsnParam, targetReachedParam, checkpointLsnParam, sleepParam) \
|
||||
{.session = sessionParam, \
|
||||
.function = HRNPQ_SENDQUERY, \
|
||||
.param = strZ(strNewFmt( \
|
||||
"[\"select (checkpoint_" lsnNameParam " > '%s')::bool as targetReached,\\n" \
|
||||
" checkpoint_" lsnNameParam "::text as checkpointLsn\\n" \
|
||||
" from pg_catalog.pg_control_checkpoint()\"]", targetLsnParam)), \
|
||||
.resultInt = 1}, \
|
||||
.resultInt = 1, .sleep = sleepParam}, \
|
||||
{.session = sessionParam, .function = HRNPQ_CONSUMEINPUT}, \
|
||||
{.session = sessionParam, .function = HRNPQ_ISBUSY}, \
|
||||
{.session = sessionParam, .function = HRNPQ_GETRESULT}, \
|
||||
@ -473,11 +474,13 @@ Macros for defining groups of functions that implement various queries and comma
|
||||
{.session = sessionParam, .function = HRNPQ_CLEAR}, \
|
||||
{.session = sessionParam, .function = HRNPQ_GETRESULT, .resultNull = true}
|
||||
|
||||
#define HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_96(sessionParam, targetLsnParam, targetReachedParam, checkpointLsnParam) \
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED(sessionParam, "location", targetLsnParam, targetReachedParam, checkpointLsnParam)
|
||||
#define HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_96(sessionParam, targetLsnParam, targetReachedParam, checkpointLsnParam, sleepParam) \
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED( \
|
||||
sessionParam, "location", targetLsnParam, targetReachedParam, checkpointLsnParam, sleepParam)
|
||||
|
||||
#define HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_GE_10(sessionParam, targetLsnParam, targetReachedParam, checkpointLsnParam) \
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED(sessionParam, "lsn", targetLsnParam, targetReachedParam, checkpointLsnParam)
|
||||
#define HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_GE_10( \
|
||||
sessionParam, targetLsnParam, targetReachedParam, checkpointLsnParam, sleepParam) \
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED(sessionParam, "lsn", targetLsnParam, targetReachedParam, checkpointLsnParam, sleepParam)
|
||||
|
||||
#define HRNPQ_MACRO_REPLAY_WAIT_LE_95(sessionParam, targetLsnParam) \
|
||||
HRNPQ_MACRO_REPLAY_TARGET_REACHED_LE_96(sessionParam, targetLsnParam, true, "X/X"), \
|
||||
@ -486,12 +489,12 @@ Macros for defining groups of functions that implement various queries and comma
|
||||
#define HRNPQ_MACRO_REPLAY_WAIT_96(sessionParam, targetLsnParam) \
|
||||
HRNPQ_MACRO_REPLAY_TARGET_REACHED_LE_96(sessionParam, targetLsnParam, true, "X/X"), \
|
||||
HRNPQ_MACRO_CHECKPOINT(sessionParam), \
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_96(sessionParam, targetLsnParam, true, "X/X")
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_96(sessionParam, targetLsnParam, true, "X/X", 0)
|
||||
|
||||
#define HRNPQ_MACRO_REPLAY_WAIT_GE_10(sessionParam, targetLsnParam) \
|
||||
HRNPQ_MACRO_REPLAY_TARGET_REACHED_GE_10(sessionParam, targetLsnParam, true, "X/X"), \
|
||||
HRNPQ_MACRO_CHECKPOINT(sessionParam), \
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_GE_10(sessionParam, targetLsnParam, true, "X/X")
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_GE_10(sessionParam, targetLsnParam, true, "X/X", 0)
|
||||
|
||||
#define HRNPQ_MACRO_CLOSE(sessionParam) \
|
||||
{.session = sessionParam, .function = HRNPQ_FINISH}
|
||||
|
@ -470,10 +470,11 @@ testRun(void)
|
||||
HRNPQ_MACRO_REPLAY_TARGET_REACHED_PROGRESS_GE_10(2, "5/5", false, "5/3", "5/3", false, 250),
|
||||
HRNPQ_MACRO_REPLAY_TARGET_REACHED_PROGRESS_GE_10(2, "5/5", false, "5/3", "5/3", false, 0),
|
||||
|
||||
// Checkpoint target not reached
|
||||
// Checkpoint target timeout waiting for sync
|
||||
HRNPQ_MACRO_REPLAY_TARGET_REACHED_GE_10(2, "5/5", true, "5/5"),
|
||||
HRNPQ_MACRO_CHECKPOINT(2),
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_GE_10(2, "5/5", false, "5/4"),
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_GE_10(2, "5/5", false, "5/4", 250),
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_GE_10(2, "5/5", false, "5/4", 0),
|
||||
|
||||
// Wait for standby to sync
|
||||
HRNPQ_MACRO_REPLAY_TARGET_REACHED_GE_10(2, "5/5", false, "5/3"),
|
||||
@ -481,7 +482,7 @@ testRun(void)
|
||||
HRNPQ_MACRO_REPLAY_TARGET_REACHED_PROGRESS_GE_10(2, "5/5", false, "5/4", "5/3", true, 0),
|
||||
HRNPQ_MACRO_REPLAY_TARGET_REACHED_PROGRESS_GE_10(2, "5/5", true, "5/5", "5/4", true, 0),
|
||||
HRNPQ_MACRO_CHECKPOINT(2),
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_GE_10(2, "5/5", true, "X/X"),
|
||||
HRNPQ_MACRO_CHECKPOINT_TARGET_REACHED_GE_10(2, "5/5", true, "X/X", 0),
|
||||
|
||||
// Close standby
|
||||
HRNPQ_MACRO_CLOSE(2),
|
||||
@ -509,8 +510,8 @@ testRun(void)
|
||||
"timeout before standby replayed to 5/5 - only reached 5/3");
|
||||
|
||||
TEST_ERROR(
|
||||
dbReplayWait(db.standby, STRDEF("5/5"), 1000), ArchiveTimeoutError,
|
||||
"the checkpoint lsn 5/4 is less than the target lsn 5/5 even though the replay lsn is 5/5");
|
||||
dbReplayWait(db.standby, STRDEF("5/5"), 200), ArchiveTimeoutError,
|
||||
"timeout before standby checkpoint lsn reached 5/5 - only reached 5/4");
|
||||
|
||||
TEST_RESULT_VOID(dbReplayWait(db.standby, STRDEF("5/5"), 1000), "sync standby");
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user