1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2024-12-12 10:04:14 +02:00

Retry errors in S3 batch file delete.

If the entire batch failed it would be retried, but individual file errors were not retried. This could cause pgBackRest to terminate during expiration or when removing an unresumable backup.

Rather than retry the entire batch, delete the errored files individually to take advantage of the HTTP retry rather than adding a new retry loop. These errors seem rare enough that it should not be a performance issue.
This commit is contained in:
David Steele 2022-02-11 08:11:39 -06:00 committed by GitHub
parent b26097f8d8
commit 551e5bc6f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 32 additions and 15 deletions

View File

@ -20,6 +20,21 @@
<p><b>IMPORTANT NOTE</b>: Repository size reported by the <cmd>info</cmd> command is now entirely based on what <backrest/> has written to storage. Previously, in certain cases, <backrest/> could detect if additional compression was being applied by the storage but this is no longer supported.</p>
</text>
<release-bug-list>
<release-item>
<github-issue id="1650"/>
<github-pull-request id="1653"/>
<release-item-contributor-list>
<release-item-ideator id="alex.richman"/>
<release-item-contributor id="david.steele"/>
<release-item-reviewer id="reid.thompson"/>
</release-item-contributor-list>
<p>Retry errors in S3 batch file delete.</p>
</release-item>
</release-bug-list>
<release-feature-list>
<release-item>
<github-issue id="1430"/>
@ -10548,6 +10563,11 @@
<contributor-id type="github">aleszeleny</contributor-id>
</contributor>
<contributor id="alex.richman">
<contributor-name-display>Alex Richman</contributor-name-display>
<contributor-id type="github">alex-richman-onesignal</contributor-id>
</contributor>
<contributor id="anderson.a.mallmann">
<contributor-name-display>Anderson A. Mallmann</contributor-name-display>
<contributor-id type="github">aamallmann</contributor-id>

View File

@ -49,14 +49,12 @@ STRING_STATIC(S3_QUERY_VALUE_LIST_TYPE_2_STR, "2");
/***********************************************************************************************************************************
XML tags
***********************************************************************************************************************************/
STRING_STATIC(S3_XML_TAG_CODE_STR, "Code");
STRING_STATIC(S3_XML_TAG_COMMON_PREFIXES_STR, "CommonPrefixes");
STRING_STATIC(S3_XML_TAG_CONTENTS_STR, "Contents");
STRING_STATIC(S3_XML_TAG_DELETE_STR, "Delete");
STRING_STATIC(S3_XML_TAG_ERROR_STR, "Error");
STRING_STATIC(S3_XML_TAG_KEY_STR, "Key");
STRING_STATIC(S3_XML_TAG_LAST_MODIFIED_STR, "LastModified");
STRING_STATIC(S3_XML_TAG_MESSAGE_STR, "Message");
STRING_STATIC(S3_XML_TAG_NEXT_CONTINUATION_TOKEN_STR, "NextContinuationToken");
STRING_STATIC(S3_XML_TAG_OBJECT_STR, "Object");
STRING_STATIC(S3_XML_TAG_PREFIX_STR, "Prefix");
@ -882,15 +880,13 @@ storageS3PathRemoveInternal(StorageS3 *this, HttpRequest *request, XmlDocument *
{
XmlNodeList *errorList = xmlNodeChildList(xmlDocumentRoot(xmlDocumentNewBuf(response)), S3_XML_TAG_ERROR_STR);
if (xmlNodeLstSize(errorList) > 0)
// Attempt to remove errored files one at a time rather than retrying the batch
for (unsigned int errorIdx = 0; errorIdx < xmlNodeLstSize(errorList); errorIdx++)
{
XmlNode *error = xmlNodeLstGet(errorList, 0);
THROW_FMT(
FileRemoveError, STORAGE_ERROR_PATH_REMOVE_FILE ": [%s] %s",
strZ(xmlNodeContent(xmlNodeChild(error, S3_XML_TAG_KEY_STR, true))),
strZ(xmlNodeContent(xmlNodeChild(error, S3_XML_TAG_CODE_STR, true))),
strZ(xmlNodeContent(xmlNodeChild(error, S3_XML_TAG_MESSAGE_STR, true))));
storageS3RequestP(
this, HTTP_VERB_DELETE_STR,
strNewFmt(
"/%s", strZ(xmlNodeContent(xmlNodeChild(xmlNodeLstGet(errorList, errorIdx), S3_XML_TAG_KEY_STR, true)))));
}
}

View File

@ -1302,7 +1302,7 @@ testRun(void)
TEST_RESULT_VOID(storagePathRemoveP(s3, STRDEF("/path/to"), .recurse = true), "remove");
// -----------------------------------------------------------------------------------------------------------------
TEST_TITLE("remove error");
TEST_TITLE("path remove error and retry");
testRequestP(service, s3, HTTP_VERB_GET, "/bucket/?list-type=2&prefix=path%2F");
testResponseP(
@ -1331,12 +1331,13 @@ testRun(void)
.content =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
"<DeleteResult xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">"
"<Error><Key>sample2.txt</Key><Code>AccessDenied</Code><Message>Access Denied</Message></Error>"
"<Error><Key>path/sample2.txt</Key><Code>AccessDenied</Code><Message>Access Denied</Message></Error>"
"</DeleteResult>");
TEST_ERROR(
storagePathRemoveP(s3, STRDEF("/path"), .recurse = true), FileRemoveError,
"unable to remove file 'sample2.txt': [AccessDenied] Access Denied");
testRequestP(service, s3, HTTP_VERB_DELETE, "/bucket/path/sample2.txt");
testResponseP(service, .code = 204);
TEST_RESULT_VOID(storagePathRemoveP(s3, STRDEF("/path"), .recurse = true), "remove path");
// -----------------------------------------------------------------------------------------------------------------
TEST_TITLE("remove file");