mirror of
https://github.com/pgbackrest/pgbackrest.git
synced 2025-03-03 14:52:21 +02:00
Add write fault-tolerance to archive-push command.
The archive-push command will continue to push even after it gets a write error on one or more repos. The idea is to archive to as many repos as possible even we still need to throw an error to PostgreSQL to prevent it from removing the WAL file.
This commit is contained in:
parent
a1280c41e5
commit
3b8f0ef7ae
@ -18,7 +18,7 @@ freebsd_12_task:
|
||||
install_script: pkg install -y git postgresql-libpqxx pkgconf libxml2 gmake perl5 p5-YAML rsync
|
||||
|
||||
script:
|
||||
- perl ${CIRRUS_WORKING_DIR}/test/test.pl --no-gen --make-cmd=gmake --vm=none --vm-max=2 --no-coverage --no-valgrind --module=command --test=backup --test=archive-push
|
||||
- perl ${CIRRUS_WORKING_DIR}/test/test.pl --no-gen --make-cmd=gmake --vm=none --vm-max=2 --no-coverage --no-valgrind --module=command --test=backup
|
||||
|
||||
debug_script:
|
||||
- ls -lah ${CIRRUS_WORKING_DIR}
|
||||
@ -39,7 +39,7 @@ macos_catalina_task:
|
||||
- cpanm --local-lib=/usr/local/opt/perl5 install YAML
|
||||
|
||||
script:
|
||||
- ${CIRRUS_WORKING_DIR}/test/test.pl --no-gen --vm=none --vm-max=2 --no-coverage --no-valgrind --module=command --test=backup --test=archive-push
|
||||
- ${CIRRUS_WORKING_DIR}/test/test.pl --no-gen --vm=none --vm-max=2 --no-coverage --no-valgrind --module=command --test=backup
|
||||
|
||||
debug_script:
|
||||
- ls -lah ${CIRRUS_WORKING_DIR}
|
||||
|
@ -57,6 +57,7 @@
|
||||
<commit subject="Remove restore default repo from integration tests."/>
|
||||
<commit subject="Make --repo optional for backup command."/>
|
||||
<commit subject="Refactor archive-push command warnings to work like archive-get."/>
|
||||
<commit subject="Add write fault-tolerance to archive-push command."/>
|
||||
|
||||
<release-item-contributor-list>
|
||||
<release-item-contributor id="cynthia.shang"/>
|
||||
|
@ -16,6 +16,68 @@ Archive Push File
|
||||
#include "postgres/interface.h"
|
||||
#include "storage/helper.h"
|
||||
|
||||
/***********************************************************************************************************************************
|
||||
Catch write errors during processing
|
||||
|
||||
We want to continue when there are write errors during processing so add them to a list to be reported later and return false so the
|
||||
caller knows to stop writing on the affected repo.
|
||||
***********************************************************************************************************************************/
|
||||
typedef enum
|
||||
{
|
||||
archivePushFileIoTypeOpen,
|
||||
archivePushFileIoTypeWrite,
|
||||
archivePushFileIoTypeClose,
|
||||
} ArchivePushFileIoType;
|
||||
|
||||
static bool
|
||||
archivePushFileIo(ArchivePushFileIoType type, IoWrite *write, const Buffer *buffer, unsigned int repoIdx, StringList *errorList)
|
||||
{
|
||||
FUNCTION_TEST_BEGIN();
|
||||
FUNCTION_TEST_PARAM(ENUM, type);
|
||||
FUNCTION_TEST_PARAM(IO_WRITE, write);
|
||||
FUNCTION_TEST_PARAM(BUFFER, buffer);
|
||||
FUNCTION_TEST_PARAM(UINT, repoIdx);
|
||||
FUNCTION_TEST_PARAM(STRING_LIST, errorList);
|
||||
FUNCTION_TEST_END();
|
||||
|
||||
ASSERT(write != NULL);
|
||||
ASSERT(errorList != NULL);
|
||||
|
||||
bool result = true;
|
||||
|
||||
// Process write operation
|
||||
TRY_BEGIN()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case archivePushFileIoTypeOpen:
|
||||
ioWriteOpen(write);
|
||||
break;
|
||||
|
||||
case archivePushFileIoTypeWrite:
|
||||
ASSERT(buffer != NULL);
|
||||
ioWrite(write, buffer);
|
||||
break;
|
||||
|
||||
case archivePushFileIoTypeClose:
|
||||
ioWriteClose(write);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Handle errors
|
||||
CATCH_ANY()
|
||||
{
|
||||
strLstAdd(
|
||||
errorList,
|
||||
strNewFmt(
|
||||
"repo%u: [%s] %s", cfgOptionGroupIdxToKey(cfgOptGrpRepo, repoIdx), errorTypeName(errorType()), errorMessage()));
|
||||
result = false;
|
||||
}
|
||||
TRY_END();
|
||||
|
||||
FUNCTION_TEST_RETURN(result);
|
||||
}
|
||||
|
||||
/**********************************************************************************************************************************/
|
||||
ArchivePushFileResult
|
||||
archivePushFile(
|
||||
@ -37,6 +99,7 @@ archivePushFile(
|
||||
ASSERT(repoData != NULL);
|
||||
|
||||
ArchivePushFileResult result = {.warnList = strLstNew()};
|
||||
StringList *errorList = strLstNew();
|
||||
|
||||
MEM_CONTEXT_TEMP_BEGIN()
|
||||
{
|
||||
@ -178,7 +241,10 @@ archivePushFile(
|
||||
for (unsigned int repoIdx = 0; repoIdx < repoTotal; repoIdx++)
|
||||
{
|
||||
if (destinationCopy[repoIdx])
|
||||
ioWriteOpen(storageWriteIo(destination[repoIdx]));
|
||||
{
|
||||
destinationCopy[repoIdx] = archivePushFileIo(
|
||||
archivePushFileIoTypeOpen, storageWriteIo(destination[repoIdx]), NULL, repoIdx, errorList);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy data from source to destination
|
||||
@ -193,7 +259,10 @@ archivePushFile(
|
||||
for (unsigned int repoIdx = 0; repoIdx < repoTotal; repoIdx++)
|
||||
{
|
||||
if (destinationCopy[repoIdx])
|
||||
ioWrite(storageWriteIo(destination[repoIdx]), read);
|
||||
{
|
||||
destinationCopy[repoIdx] = archivePushFileIo(
|
||||
archivePushFileIoTypeWrite, storageWriteIo(destination[repoIdx]), read, repoIdx, errorList);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear buffer
|
||||
@ -207,11 +276,19 @@ archivePushFile(
|
||||
for (unsigned int repoIdx = 0; repoIdx < repoTotal; repoIdx++)
|
||||
{
|
||||
if (destinationCopy[repoIdx])
|
||||
ioWriteClose(storageWriteIo(destination[repoIdx]));
|
||||
{
|
||||
destinationCopy[repoIdx] = archivePushFileIo(
|
||||
archivePushFileIoTypeClose, storageWriteIo(destination[repoIdx]), NULL, repoIdx, errorList);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
MEM_CONTEXT_TEMP_END();
|
||||
|
||||
// Throw any errors, even if some files were successful. It is important that PostgreSQL recieves an error so it does not
|
||||
// remove the file.
|
||||
if (strLstSize(errorList) > 0)
|
||||
THROW_FMT(CommandError, CFGCMD_ARCHIVE_PUSH " command encountered error(s):\n%s", strZ(strLstJoin(errorList, "\n")));
|
||||
|
||||
FUNCTION_LOG_RETURN_STRUCT(result);
|
||||
}
|
||||
|
@ -521,11 +521,38 @@ testRun(void)
|
||||
true, "check repo3 for WAL file");
|
||||
|
||||
// -------------------------------------------------------------------------------------------------------------------------
|
||||
TEST_TITLE("remove WAL from one repo and push again");
|
||||
TEST_TITLE("write error on one repo but other repo succeeds");
|
||||
|
||||
storageRemoveP(
|
||||
storageTest, strNewFmt("repo2/archive/test/11-1/0000000100000001/000000010000000100000002-%s", walBuffer2Sha1),
|
||||
.errorOnMissing = true);
|
||||
storageRemoveP(
|
||||
storageTest, strNewFmt("repo3/archive/test/11-1/0000000100000001/000000010000000100000002-%s", walBuffer2Sha1),
|
||||
.errorOnMissing = true);
|
||||
|
||||
HRN_STORAGE_MODE(storageTest, "repo2/archive/test/11-1/0000000100000001", .mode = 0500);
|
||||
|
||||
TEST_ERROR(
|
||||
cmdArchivePush(), CommandError,
|
||||
strZ(
|
||||
strNewFmt(
|
||||
"archive-push command encountered error(s):\n"
|
||||
"repo2: [FileOpenError] unable to open file '" TEST_PATH "/repo2/archive/test/11-1/0000000100000001"
|
||||
"/000000010000000100000002-%s' for write: [13] Permission denied", walBuffer2Sha1)));
|
||||
|
||||
TEST_RESULT_BOOL(
|
||||
storageExistsP(
|
||||
storageTest, strNewFmt("repo2/archive/test/11-1/0000000100000001/000000010000000100000002-%s", walBuffer2Sha1)),
|
||||
false, "check repo2 for no WAL file");
|
||||
TEST_RESULT_BOOL(
|
||||
storageExistsP(
|
||||
storageTest, strNewFmt("repo3/archive/test/11-1/0000000100000001/000000010000000100000002-%s", walBuffer2Sha1)),
|
||||
true, "check repo3 for WAL file");
|
||||
|
||||
HRN_STORAGE_MODE(storageTest, "repo2/archive/test/11-1/0000000100000001");
|
||||
|
||||
// -------------------------------------------------------------------------------------------------------------------------
|
||||
TEST_TITLE("push WAL to one repo");
|
||||
|
||||
TEST_RESULT_VOID(cmdArchivePush(), "push the WAL segment");
|
||||
harnessLogResult(
|
||||
|
Loading…
x
Reference in New Issue
Block a user