1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-03-03 14:52:21 +02:00

Add write fault-tolerance to archive-push command.

The archive-push command will continue to push even after it gets a write error on one or more repos. The idea is to archive to as many repos as possible even we still need to throw an error to PostgreSQL to prevent it from removing the WAL file.
This commit is contained in:
David Steele 2021-02-26 16:52:59 -05:00
parent a1280c41e5
commit 3b8f0ef7ae
4 changed files with 111 additions and 6 deletions

View File

@ -18,7 +18,7 @@ freebsd_12_task:
install_script: pkg install -y git postgresql-libpqxx pkgconf libxml2 gmake perl5 p5-YAML rsync
script:
- perl ${CIRRUS_WORKING_DIR}/test/test.pl --no-gen --make-cmd=gmake --vm=none --vm-max=2 --no-coverage --no-valgrind --module=command --test=backup --test=archive-push
- perl ${CIRRUS_WORKING_DIR}/test/test.pl --no-gen --make-cmd=gmake --vm=none --vm-max=2 --no-coverage --no-valgrind --module=command --test=backup
debug_script:
- ls -lah ${CIRRUS_WORKING_DIR}
@ -39,7 +39,7 @@ macos_catalina_task:
- cpanm --local-lib=/usr/local/opt/perl5 install YAML
script:
- ${CIRRUS_WORKING_DIR}/test/test.pl --no-gen --vm=none --vm-max=2 --no-coverage --no-valgrind --module=command --test=backup --test=archive-push
- ${CIRRUS_WORKING_DIR}/test/test.pl --no-gen --vm=none --vm-max=2 --no-coverage --no-valgrind --module=command --test=backup
debug_script:
- ls -lah ${CIRRUS_WORKING_DIR}

View File

@ -57,6 +57,7 @@
<commit subject="Remove restore default repo from integration tests."/>
<commit subject="Make --repo optional for backup command."/>
<commit subject="Refactor archive-push command warnings to work like archive-get."/>
<commit subject="Add write fault-tolerance to archive-push command."/>
<release-item-contributor-list>
<release-item-contributor id="cynthia.shang"/>

View File

@ -16,6 +16,68 @@ Archive Push File
#include "postgres/interface.h"
#include "storage/helper.h"
/***********************************************************************************************************************************
Catch write errors during processing
We want to continue when there are write errors during processing so add them to a list to be reported later and return false so the
caller knows to stop writing on the affected repo.
***********************************************************************************************************************************/
typedef enum
{
archivePushFileIoTypeOpen,
archivePushFileIoTypeWrite,
archivePushFileIoTypeClose,
} ArchivePushFileIoType;
static bool
archivePushFileIo(ArchivePushFileIoType type, IoWrite *write, const Buffer *buffer, unsigned int repoIdx, StringList *errorList)
{
FUNCTION_TEST_BEGIN();
FUNCTION_TEST_PARAM(ENUM, type);
FUNCTION_TEST_PARAM(IO_WRITE, write);
FUNCTION_TEST_PARAM(BUFFER, buffer);
FUNCTION_TEST_PARAM(UINT, repoIdx);
FUNCTION_TEST_PARAM(STRING_LIST, errorList);
FUNCTION_TEST_END();
ASSERT(write != NULL);
ASSERT(errorList != NULL);
bool result = true;
// Process write operation
TRY_BEGIN()
{
switch (type)
{
case archivePushFileIoTypeOpen:
ioWriteOpen(write);
break;
case archivePushFileIoTypeWrite:
ASSERT(buffer != NULL);
ioWrite(write, buffer);
break;
case archivePushFileIoTypeClose:
ioWriteClose(write);
break;
}
}
// Handle errors
CATCH_ANY()
{
strLstAdd(
errorList,
strNewFmt(
"repo%u: [%s] %s", cfgOptionGroupIdxToKey(cfgOptGrpRepo, repoIdx), errorTypeName(errorType()), errorMessage()));
result = false;
}
TRY_END();
FUNCTION_TEST_RETURN(result);
}
/**********************************************************************************************************************************/
ArchivePushFileResult
archivePushFile(
@ -37,6 +99,7 @@ archivePushFile(
ASSERT(repoData != NULL);
ArchivePushFileResult result = {.warnList = strLstNew()};
StringList *errorList = strLstNew();
MEM_CONTEXT_TEMP_BEGIN()
{
@ -178,7 +241,10 @@ archivePushFile(
for (unsigned int repoIdx = 0; repoIdx < repoTotal; repoIdx++)
{
if (destinationCopy[repoIdx])
ioWriteOpen(storageWriteIo(destination[repoIdx]));
{
destinationCopy[repoIdx] = archivePushFileIo(
archivePushFileIoTypeOpen, storageWriteIo(destination[repoIdx]), NULL, repoIdx, errorList);
}
}
// Copy data from source to destination
@ -193,7 +259,10 @@ archivePushFile(
for (unsigned int repoIdx = 0; repoIdx < repoTotal; repoIdx++)
{
if (destinationCopy[repoIdx])
ioWrite(storageWriteIo(destination[repoIdx]), read);
{
destinationCopy[repoIdx] = archivePushFileIo(
archivePushFileIoTypeWrite, storageWriteIo(destination[repoIdx]), read, repoIdx, errorList);
}
}
// Clear buffer
@ -207,11 +276,19 @@ archivePushFile(
for (unsigned int repoIdx = 0; repoIdx < repoTotal; repoIdx++)
{
if (destinationCopy[repoIdx])
ioWriteClose(storageWriteIo(destination[repoIdx]));
{
destinationCopy[repoIdx] = archivePushFileIo(
archivePushFileIoTypeClose, storageWriteIo(destination[repoIdx]), NULL, repoIdx, errorList);
}
}
}
}
MEM_CONTEXT_TEMP_END();
// Throw any errors, even if some files were successful. It is important that PostgreSQL recieves an error so it does not
// remove the file.
if (strLstSize(errorList) > 0)
THROW_FMT(CommandError, CFGCMD_ARCHIVE_PUSH " command encountered error(s):\n%s", strZ(strLstJoin(errorList, "\n")));
FUNCTION_LOG_RETURN_STRUCT(result);
}

View File

@ -521,11 +521,38 @@ testRun(void)
true, "check repo3 for WAL file");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("remove WAL from one repo and push again");
TEST_TITLE("write error on one repo but other repo succeeds");
storageRemoveP(
storageTest, strNewFmt("repo2/archive/test/11-1/0000000100000001/000000010000000100000002-%s", walBuffer2Sha1),
.errorOnMissing = true);
storageRemoveP(
storageTest, strNewFmt("repo3/archive/test/11-1/0000000100000001/000000010000000100000002-%s", walBuffer2Sha1),
.errorOnMissing = true);
HRN_STORAGE_MODE(storageTest, "repo2/archive/test/11-1/0000000100000001", .mode = 0500);
TEST_ERROR(
cmdArchivePush(), CommandError,
strZ(
strNewFmt(
"archive-push command encountered error(s):\n"
"repo2: [FileOpenError] unable to open file '" TEST_PATH "/repo2/archive/test/11-1/0000000100000001"
"/000000010000000100000002-%s' for write: [13] Permission denied", walBuffer2Sha1)));
TEST_RESULT_BOOL(
storageExistsP(
storageTest, strNewFmt("repo2/archive/test/11-1/0000000100000001/000000010000000100000002-%s", walBuffer2Sha1)),
false, "check repo2 for no WAL file");
TEST_RESULT_BOOL(
storageExistsP(
storageTest, strNewFmt("repo3/archive/test/11-1/0000000100000001/000000010000000100000002-%s", walBuffer2Sha1)),
true, "check repo3 for WAL file");
HRN_STORAGE_MODE(storageTest, "repo2/archive/test/11-1/0000000100000001");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("push WAL to one repo");
TEST_RESULT_VOID(cmdArchivePush(), "push the WAL segment");
harnessLogResult(