mirror of
https://github.com/pgbackrest/pgbackrest.git
synced 2024-12-12 10:04:14 +02:00
Add archive-missing-retry option.
Retry a WAL segment that was previously reported as missing by the archive-get command. This prevents notifications in the spool path from a prior restore from being used and possibly causing a recovery failure if consistency has not been reached. Disabling this option allows PostgreSQL to more reliably recognize when the end of the WAL in the archive has been reached, which permits it to switch over to streaming from the primary. With retries enabled, a steady stream of WAL being archived will cause PostgreSQL to continue getting WAL from the archive rather than switch to streaming. When disabling this option it is important to ensure that the spool path for the stanza is empty. The restore command does this automatically if the spool path is configured at restore time. Otherwise, it is up to the user to ensure the spool path is empty.
This commit is contained in:
parent
cacfdd94d7
commit
10038db9c9
@ -50,6 +50,17 @@
|
|||||||
<p>Add support for <proper>AWS</proper> <proper>S3</proper> server-side encryption using <proper>KMS</proper>.</p>
|
<p>Add support for <proper>AWS</proper> <proper>S3</proper> server-side encryption using <proper>KMS</proper>.</p>
|
||||||
</release-item>
|
</release-item>
|
||||||
|
|
||||||
|
<release-item>
|
||||||
|
<github-pull-request id="1642"/>
|
||||||
|
|
||||||
|
<release-item-contributor-list>
|
||||||
|
<release-item-contributor id="david.steele"/>
|
||||||
|
<release-item-reviewer id="stefan.fercot"/>
|
||||||
|
</release-item-contributor-list>
|
||||||
|
|
||||||
|
<p>Add <br-option>archive-missing-retry</br-option> option.</p>
|
||||||
|
</release-item>
|
||||||
|
|
||||||
<release-item>
|
<release-item>
|
||||||
<github-issue id="600"/>
|
<github-issue id="600"/>
|
||||||
<github-pull-request id="1651"/>
|
<github-pull-request id="1651"/>
|
||||||
|
@ -1040,6 +1040,15 @@ option:
|
|||||||
main: {}
|
main: {}
|
||||||
async: {}
|
async: {}
|
||||||
|
|
||||||
|
archive-missing-retry:
|
||||||
|
section: global
|
||||||
|
type: boolean
|
||||||
|
default: true
|
||||||
|
command:
|
||||||
|
archive-get: {}
|
||||||
|
command-role:
|
||||||
|
main: {}
|
||||||
|
|
||||||
archive-push-queue-max:
|
archive-push-queue-max:
|
||||||
section: global
|
section: global
|
||||||
type: size
|
type: size
|
||||||
|
@ -1291,6 +1291,21 @@
|
|||||||
<example>n</example>
|
<example>n</example>
|
||||||
</config-key>
|
</config-key>
|
||||||
|
|
||||||
|
<!-- ======================================================================================================= -->
|
||||||
|
<config-key id="archive-missing-retry" name="Retry Missing WAL Segment">
|
||||||
|
<summary>Retry missing WAL segment</summary>
|
||||||
|
|
||||||
|
<text>
|
||||||
|
<p>Retry a WAL segment that was previously reported as missing by the <cmd>archive-get</cmd> command when in asynchronous mode. This prevents notifications in the spool path from a prior restore from being used and possibly causing a recovery failure if consistency has not been reached.</p>
|
||||||
|
|
||||||
|
<p>Disabling this option allows <postgres/> to more reliably recognize when the end of the WAL in the archive has been reached, which permits it to switch over to streaming from the primary. With retries enabled, a steady stream of WAL being archived will cause <postgres/> to continue getting WAL from the archive rather than switch to streaming.</p>
|
||||||
|
|
||||||
|
<p>When disabling this option it is important to ensure that the spool path for the stanza is empty. The <cmd>restore</cmd> command does this automatically if the spool path is configured at restore time. Otherwise, it is up to the user to ensure the spool path is empty.</p>
|
||||||
|
</text>
|
||||||
|
|
||||||
|
<example>n</example>
|
||||||
|
</config-key>
|
||||||
|
|
||||||
<!-- CONFIG - ARCHIVE SECTION - ARCHIVE-QUEUE-MAX KEY -->
|
<!-- CONFIG - ARCHIVE SECTION - ARCHIVE-QUEUE-MAX KEY -->
|
||||||
<config-key id="archive-push-queue-max" name="Maximum Archive Push Queue Size">
|
<config-key id="archive-push-queue-max" name="Maximum Archive Push Queue Size">
|
||||||
<summary>Maximum size of the <postgres/> archive queue.</summary>
|
<summary>Maximum size of the <postgres/> archive queue.</summary>
|
||||||
|
@ -647,11 +647,15 @@ cmdArchiveGet(void)
|
|||||||
// Check if the WAL segment is already in the queue
|
// Check if the WAL segment is already in the queue
|
||||||
found = storageExistsP(storageSpool(), strNewFmt(STORAGE_SPOOL_ARCHIVE_IN "/%s", strZ(walSegment)));
|
found = storageExistsP(storageSpool(), strNewFmt(STORAGE_SPOOL_ARCHIVE_IN "/%s", strZ(walSegment)));
|
||||||
|
|
||||||
|
// Determine whether a missing WAL segment will be retried. Retrying is safer, but not retrying lets PostgreSQL
|
||||||
|
// know that there are probably no more WAL segments in the archive which means it can switch to streaming.
|
||||||
|
const bool missingRetry = first && cfgOptionBool(cfgOptArchiveMissingRetry);
|
||||||
|
|
||||||
// Check for errors or missing files. For archive-get ok indicates that the process succeeded but there is no WAL
|
// Check for errors or missing files. For archive-get ok indicates that the process succeeded but there is no WAL
|
||||||
// file to download, or that there was a warning. Do not error on the first run so the async process can be spawned
|
// file to download, or that there was a warning. Do not error on the first run so the async process can be spawned
|
||||||
// to correct any errors from a previous run. Do not warn on the first run if the segment was not found so the async
|
// to correct any errors from a previous run. Do not warn on the first run if the segment was not found so the async
|
||||||
// process can be spawned to check for the file again.
|
// process can be spawned to check for the file again.
|
||||||
if (archiveAsyncStatus(archiveModeGet, walSegment, !first, found || !first))
|
if (archiveAsyncStatus(archiveModeGet, walSegment, !first, found || !missingRetry))
|
||||||
{
|
{
|
||||||
storageRemoveP(
|
storageRemoveP(
|
||||||
storageSpoolWrite(), strNewFmt(STORAGE_SPOOL_ARCHIVE_IN "/%s" STATUS_EXT_OK, strZ(walSegment)),
|
storageSpoolWrite(), strNewFmt(STORAGE_SPOOL_ARCHIVE_IN "/%s" STATUS_EXT_OK, strZ(walSegment)),
|
||||||
@ -662,7 +666,7 @@ cmdArchiveGet(void)
|
|||||||
// spawned by a prior archive-get execution, which means we should spawn the async process again to see if the
|
// spawned by a prior archive-get execution, which means we should spawn the async process again to see if the
|
||||||
// file exists now. This also prevents spool files from a previous recovery interfering with the current
|
// file exists now. This also prevents spool files from a previous recovery interfering with the current
|
||||||
// recovery.
|
// recovery.
|
||||||
if (!found && !first)
|
if (!found && !missingRetry)
|
||||||
{
|
{
|
||||||
foundOk = true;
|
foundOk = true;
|
||||||
break;
|
break;
|
||||||
|
@ -47,6 +47,7 @@ Option constants
|
|||||||
#define CFGOPT_ARCHIVE_COPY "archive-copy"
|
#define CFGOPT_ARCHIVE_COPY "archive-copy"
|
||||||
#define CFGOPT_ARCHIVE_GET_QUEUE_MAX "archive-get-queue-max"
|
#define CFGOPT_ARCHIVE_GET_QUEUE_MAX "archive-get-queue-max"
|
||||||
#define CFGOPT_ARCHIVE_HEADER_CHECK "archive-header-check"
|
#define CFGOPT_ARCHIVE_HEADER_CHECK "archive-header-check"
|
||||||
|
#define CFGOPT_ARCHIVE_MISSING_RETRY "archive-missing-retry"
|
||||||
#define CFGOPT_ARCHIVE_MODE "archive-mode"
|
#define CFGOPT_ARCHIVE_MODE "archive-mode"
|
||||||
#define CFGOPT_ARCHIVE_MODE_CHECK "archive-mode-check"
|
#define CFGOPT_ARCHIVE_MODE_CHECK "archive-mode-check"
|
||||||
#define CFGOPT_ARCHIVE_PUSH_QUEUE_MAX "archive-push-queue-max"
|
#define CFGOPT_ARCHIVE_PUSH_QUEUE_MAX "archive-push-queue-max"
|
||||||
@ -129,7 +130,7 @@ Option constants
|
|||||||
#define CFGOPT_TLS_SERVER_PORT "tls-server-port"
|
#define CFGOPT_TLS_SERVER_PORT "tls-server-port"
|
||||||
#define CFGOPT_TYPE "type"
|
#define CFGOPT_TYPE "type"
|
||||||
|
|
||||||
#define CFG_OPTION_TOTAL 153
|
#define CFG_OPTION_TOTAL 154
|
||||||
|
|
||||||
/***********************************************************************************************************************************
|
/***********************************************************************************************************************************
|
||||||
Option value constants
|
Option value constants
|
||||||
@ -360,6 +361,7 @@ typedef enum
|
|||||||
cfgOptArchiveCopy,
|
cfgOptArchiveCopy,
|
||||||
cfgOptArchiveGetQueueMax,
|
cfgOptArchiveGetQueueMax,
|
||||||
cfgOptArchiveHeaderCheck,
|
cfgOptArchiveHeaderCheck,
|
||||||
|
cfgOptArchiveMissingRetry,
|
||||||
cfgOptArchiveMode,
|
cfgOptArchiveMode,
|
||||||
cfgOptArchiveModeCheck,
|
cfgOptArchiveModeCheck,
|
||||||
cfgOptArchivePushQueueMax,
|
cfgOptArchivePushQueueMax,
|
||||||
|
@ -896,6 +896,33 @@ static const ParseRuleOption parseRuleOption[CFG_OPTION_TOTAL] =
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------------------------------------------------------
|
||||||
|
PARSE_RULE_OPTION
|
||||||
|
(
|
||||||
|
PARSE_RULE_OPTION_NAME("archive-missing-retry"),
|
||||||
|
PARSE_RULE_OPTION_TYPE(cfgOptTypeBoolean),
|
||||||
|
PARSE_RULE_OPTION_NEGATE(true),
|
||||||
|
PARSE_RULE_OPTION_RESET(true),
|
||||||
|
PARSE_RULE_OPTION_REQUIRED(true),
|
||||||
|
PARSE_RULE_OPTION_SECTION(cfgSectionGlobal),
|
||||||
|
|
||||||
|
PARSE_RULE_OPTION_COMMAND_ROLE_MAIN_VALID_LIST
|
||||||
|
(
|
||||||
|
PARSE_RULE_OPTION_COMMAND(cfgCmdArchiveGet)
|
||||||
|
),
|
||||||
|
|
||||||
|
PARSE_RULE_OPTIONAL
|
||||||
|
(
|
||||||
|
PARSE_RULE_OPTIONAL_GROUP
|
||||||
|
(
|
||||||
|
PARSE_RULE_OPTIONAL_DEFAULT
|
||||||
|
(
|
||||||
|
PARSE_RULE_VAL_BOOL_TRUE,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------------------
|
||||||
PARSE_RULE_OPTION
|
PARSE_RULE_OPTION
|
||||||
(
|
(
|
||||||
@ -9197,6 +9224,7 @@ static const ConfigOption optionResolveOrder[] =
|
|||||||
cfgOptArchiveAsync,
|
cfgOptArchiveAsync,
|
||||||
cfgOptArchiveGetQueueMax,
|
cfgOptArchiveGetQueueMax,
|
||||||
cfgOptArchiveHeaderCheck,
|
cfgOptArchiveHeaderCheck,
|
||||||
|
cfgOptArchiveMissingRetry,
|
||||||
cfgOptArchiveMode,
|
cfgOptArchiveMode,
|
||||||
cfgOptArchivePushQueueMax,
|
cfgOptArchivePushQueueMax,
|
||||||
cfgOptArchiveTimeout,
|
cfgOptArchiveTimeout,
|
||||||
|
@ -1119,6 +1119,41 @@ testRun(void)
|
|||||||
// Check that the ok file is missing since it should have been removed on the first loop and removed again on a subsequent
|
// Check that the ok file is missing since it should have been removed on the first loop and removed again on a subsequent
|
||||||
// loop once the async process discovered that the file was missing and wrote the ok file again.
|
// loop once the async process discovered that the file was missing and wrote the ok file again.
|
||||||
TEST_STORAGE_LIST_EMPTY(storageSpool(), STORAGE_SPOOL_ARCHIVE_IN);
|
TEST_STORAGE_LIST_EMPTY(storageSpool(), STORAGE_SPOOL_ARCHIVE_IN);
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------------------------------
|
||||||
|
TEST_TITLE("do not retry missing segment");
|
||||||
|
|
||||||
|
argList = strLstNew();
|
||||||
|
hrnCfgArgRawZ(argList, cfgOptPgPath, TEST_PATH "/pg");
|
||||||
|
hrnCfgArgRawZ(argList, cfgOptRepoPath, TEST_PATH "/repo");
|
||||||
|
hrnCfgArgRawZ(argList, cfgOptStanza, "test1");
|
||||||
|
hrnCfgArgRawZ(argList, cfgOptArchiveTimeout, "10");
|
||||||
|
hrnCfgArgRawZ(argList, cfgOptSpoolPath, TEST_PATH "/spool");
|
||||||
|
hrnCfgArgRawBool(argList, cfgOptArchiveAsync, true);
|
||||||
|
hrnCfgArgRawBool(argList, cfgOptArchiveMissingRetry, false);
|
||||||
|
strLstAddZ(argList, "000000010000000100000001");
|
||||||
|
strLstAddZ(argList, "pg_wal/RECOVERYXLOG");
|
||||||
|
HRN_CFG_LOAD(cfgCmdArchiveGet, argList);
|
||||||
|
|
||||||
|
// Make sure that a WAL segment is found when the ok file is missing
|
||||||
|
HRN_STORAGE_PUT_EMPTY(
|
||||||
|
storageRepoWrite(), STORAGE_REPO_ARCHIVE "/10-2/000000010000000100000001-abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd");
|
||||||
|
|
||||||
|
TEST_RESULT_VOID(cmdArchiveGet(), "get async");
|
||||||
|
TEST_RESULT_LOG("P00 INFO: found 000000010000000100000001 in the archive asynchronously");
|
||||||
|
|
||||||
|
// Remove the ok file created by the async process
|
||||||
|
TEST_STORAGE_LIST(storageSpoolWrite(), STORAGE_SPOOL_ARCHIVE_IN, "000000010000000100000002.ok\n", .remove = true);
|
||||||
|
|
||||||
|
// Write an ok file
|
||||||
|
HRN_STORAGE_PUT_EMPTY(storageSpoolWrite(), STORAGE_SPOOL_ARCHIVE_IN "/000000010000000100000001.ok");
|
||||||
|
|
||||||
|
// Missing should be returned since archive-missing-retry=n
|
||||||
|
TEST_RESULT_VOID(cmdArchiveGet(), "get async");
|
||||||
|
TEST_RESULT_LOG("P00 INFO: unable to find 000000010000000100000001 in the archive asynchronously");
|
||||||
|
|
||||||
|
// Check that the ok file was removed
|
||||||
|
TEST_STORAGE_LIST_EMPTY(storageSpool(), STORAGE_SPOOL_ARCHIVE_IN);
|
||||||
}
|
}
|
||||||
|
|
||||||
FUNCTION_HARNESS_RETURN_VOID();
|
FUNCTION_HARNESS_RETURN_VOID();
|
||||||
|
Loading…
Reference in New Issue
Block a user