1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-07-01 00:25:06 +02:00

Retry S3 RequestTimeTooSkewed errors instead of immediately terminating.

The cause of this error seems to be that a failed request takes so long that a subsequent retry at the http level uses outdated headers.

We're not sure if pgBackRest it to blame here (in one case a kernel downgrade fixed it, in another case an incorrect network driver was the problem) so add retries to hopefully deal with the issue if it is not too persistent.  If SSL_write() has long delays before reporting an error then this will obviously affect backup performance.

Reported by sean0101n, Tim Garton, Jesper St John, Aleš Zelený.
This commit is contained in:
David Steele
2019-08-01 14:28:30 -04:00
parent 2eb3c9f95f
commit f9e1f3a798
3 changed files with 226 additions and 77 deletions

View File

@ -15,6 +15,17 @@
<release date="XXXX-XX-XX" version="2.16dev" title="UNDER DEVELOPMENT"> <release date="XXXX-XX-XX" version="2.16dev" title="UNDER DEVELOPMENT">
<release-core-list> <release-core-list>
<release-bug-list> <release-bug-list>
<release-item>
<release-item-contributor-list>
<release-item-ideator id="sean0101n"/>
<release-item-ideator id="tim.garton"/>
<release-item-ideator id="jesper.st.john"/>
<release-item-ideator id="ales.zeleny"/>
</release-item-contributor-list>
<p>Retry <proper>S3</proper> <id>RequestTimeTooSkewed</id> errors instead of immediately terminating.</p>
</release-item>
<release-item> <release-item>
<release-item-contributor-list> <release-item-contributor-list>
<release-item-ideator id="pavel.suderevsky"/> <release-item-ideator id="pavel.suderevsky"/>
@ -7132,6 +7143,11 @@
<contributor-id type="github">arogozin</contributor-id> <contributor-id type="github">arogozin</contributor-id>
</contributor> </contributor>
<contributor id="ales.zeleny">
<contributor-name-display>Ale&amp;scaron; Zelen&amp;yacute;</contributor-name-display>
<contributor-id type="github">aleszeleny</contributor-id>
</contributor>
<contributor id="andres.freund"> <contributor id="andres.freund">
<contributor-name-display>Andres Freund</contributor-name-display> <contributor-name-display>Andres Freund</contributor-name-display>
<contributor-id type="github">anarazel</contributor-id> <contributor-id type="github">anarazel</contributor-id>
@ -7514,6 +7530,11 @@
<contributor-id type="github">sfrazer</contributor-id> <contributor-id type="github">sfrazer</contributor-id>
</contributor> </contributor>
<contributor id="sean0101n">
<contributor-name-display>sean0101n</contributor-name-display>
<contributor-id type="github">sean0101n</contributor-id>
</contributor>
<contributor id="sebastien.lardiere"> <contributor id="sebastien.lardiere">
<contributor-name-display>Lardi&amp;egrave;re S&amp;eacute;bastien</contributor-name-display> <contributor-name-display>Lardi&amp;egrave;re S&amp;eacute;bastien</contributor-name-display>
<contributor-id type="github">slardiere</contributor-id> <contributor-id type="github">slardiere</contributor-id>
@ -7534,6 +7555,11 @@
<contributor-id type="github">sfrost</contributor-id> <contributor-id type="github">sfrost</contributor-id>
</contributor> </contributor>
<contributor id="tim.garton">
<contributor-name-display>Tim Garton</contributor-name-display>
<contributor-id type="github">ralfthewise</contributor-id>
</contributor>
<contributor id="todd.vernick"> <contributor id="todd.vernick">
<contributor-name-display>Todd Vernick</contributor-name-display> <contributor-name-display>Todd Vernick</contributor-name-display>
<contributor-id type="github">gintoddic</contributor-id> <contributor-id type="github">gintoddic</contributor-id>

View File

@ -45,6 +45,11 @@ STRING_STATIC(S3_QUERY_PREFIX_STR, "prefix");
STRING_STATIC(S3_QUERY_VALUE_LIST_TYPE_2_STR, "2"); STRING_STATIC(S3_QUERY_VALUE_LIST_TYPE_2_STR, "2");
/***********************************************************************************************************************************
S3 errors
***********************************************************************************************************************************/
STRING_STATIC(S3_ERROR_REQUEST_TIME_TOO_SKEWED_STR, "RequestTimeTooSkewed");
/*********************************************************************************************************************************** /***********************************************************************************************************************************
XML tags XML tags
***********************************************************************************************************************************/ ***********************************************************************************************************************************/
@ -249,98 +254,139 @@ storageS3Request(
ASSERT(uri != NULL); ASSERT(uri != NULL);
StorageS3RequestResult result = {0}; StorageS3RequestResult result = {0};
unsigned int retryRemaining = 2;
bool done;
MEM_CONTEXT_TEMP_BEGIN() do
{ {
// Create header list and add content length done = true;
HttpHeader *requestHeader = httpHeaderNew(this->headerRedactList);
// Set content length MEM_CONTEXT_TEMP_BEGIN()
httpHeaderAdd(
requestHeader, HTTP_HEADER_CONTENT_LENGTH_STR,
body == NULL || bufUsed(body) == 0 ? ZERO_STR : strNewFmt("%zu", bufUsed(body)));
// Calculate content-md5 header if there is content
if (body != NULL)
{ {
char md5Hash[HASH_TYPE_MD5_SIZE_HEX]; // Create header list and add content length
encodeToStr(encodeBase64, bufPtr(cryptoHashOne(HASH_TYPE_MD5_STR, body)), HASH_TYPE_M5_SIZE, md5Hash); HttpHeader *requestHeader = httpHeaderNew(this->headerRedactList);
httpHeaderAdd(requestHeader, HTTP_HEADER_CONTENT_MD5_STR, STR(md5Hash));
}
// Generate authorization header // Set content length
storageS3Auth( httpHeaderAdd(
this, verb, httpUriEncode(uri, true), query, storageS3DateTime(time(NULL)), requestHeader, requestHeader, HTTP_HEADER_CONTENT_LENGTH_STR,
body == NULL || bufUsed(body) == 0 ? body == NULL || bufUsed(body) == 0 ? ZERO_STR : strNewFmt("%zu", bufUsed(body)));
STRDEF("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") :
bufHex(cryptoHashOne(HASH_TYPE_SHA256_STR, body)));
// Get an http client // Calculate content-md5 header if there is content
HttpClient *httpClient = httpClientCacheGet(this->httpClientCache); if (body != NULL)
// Process request
Buffer *response = httpClientRequest(httpClient, verb, uri, query, requestHeader, body, returnContent);
// Error if the request was not successful
if (!httpClientResponseCodeOk(httpClient) &&
(!allowMissing || httpClientResponseCode(httpClient) != HTTP_RESPONSE_CODE_NOT_FOUND))
{
// General error message
String *error = strNewFmt(
"S3 request failed with %u: %s", httpClientResponseCode(httpClient),
strPtr(httpClientResponseMessage(httpClient)));
// Output uri/query
strCat(error, "\n*** URI/Query ***:");
strCatFmt(error, "\n%s", strPtr(httpUriEncode(uri, true)));
if (query != NULL)
strCatFmt(error, "?%s", strPtr(httpQueryRender(query)));
// Output request headers
const StringList *requestHeaderList = httpHeaderList(requestHeader);
strCat(error, "\n*** Request Headers ***:");
for (unsigned int requestHeaderIdx = 0; requestHeaderIdx < strLstSize(requestHeaderList); requestHeaderIdx++)
{ {
const String *key = strLstGet(requestHeaderList, requestHeaderIdx); char md5Hash[HASH_TYPE_MD5_SIZE_HEX];
encodeToStr(encodeBase64, bufPtr(cryptoHashOne(HASH_TYPE_MD5_STR, body)), HASH_TYPE_M5_SIZE, md5Hash);
strCatFmt( httpHeaderAdd(requestHeader, HTTP_HEADER_CONTENT_MD5_STR, STR(md5Hash));
error, "\n%s: %s", strPtr(key),
httpHeaderRedact(requestHeader, key) || strEq(key, S3_HEADER_DATE_STR) ?
"<redacted>" : strPtr(httpHeaderGet(requestHeader, key)));
} }
// Output response headers // Generate authorization header
const HttpHeader *responseHeader = httpClientReponseHeader(httpClient); storageS3Auth(
const StringList *responseHeaderList = httpHeaderList(responseHeader); this, verb, httpUriEncode(uri, true), query, storageS3DateTime(time(NULL)), requestHeader,
body == NULL || bufUsed(body) == 0 ?
STRDEF("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") :
bufHex(cryptoHashOne(HASH_TYPE_SHA256_STR, body)));
if (strLstSize(responseHeaderList) > 0) // Get an http client
HttpClient *httpClient = httpClientCacheGet(this->httpClientCache);
// Process request
Buffer *response = httpClientRequest(httpClient, verb, uri, query, requestHeader, body, returnContent);
// Error if the request was not successful
if (!httpClientResponseCodeOk(httpClient) &&
(!allowMissing || httpClientResponseCode(httpClient) != HTTP_RESPONSE_CODE_NOT_FOUND))
{ {
strCat(error, "\n*** Response Headers ***:"); // If there are retries remaining and a response parse it as XML to extract the S3 error code
if (response != NULL && retryRemaining > 0)
for (unsigned int responseHeaderIdx = 0; responseHeaderIdx < strLstSize(responseHeaderList); responseHeaderIdx++)
{ {
const String *key = strLstGet(responseHeaderList, responseHeaderIdx); // Attempt to parse the XML and extract the S3 error code
strCatFmt(error, "\n%s: %s", strPtr(key), strPtr(httpHeaderGet(responseHeader, key))); TRY_BEGIN()
{
XmlNode *error = xmlDocumentRoot(xmlDocumentNewBuf(response));
const String *errorCode = xmlNodeContent(xmlNodeChild(error, S3_XML_TAG_CODE_STR, true));
if (strEq(errorCode, S3_ERROR_REQUEST_TIME_TOO_SKEWED_STR))
{
LOG_DEBUG(
"retry %s: %s", strPtr(errorCode),
strPtr(xmlNodeContent(xmlNodeChild(error, S3_XML_TAG_MESSAGE_STR, true))));
retryRemaining--;
done = false;
}
}
// On failure just drop through and report the error as usual
CATCH_ANY()
{
}
TRY_END();
}
// If not done then retry instead of reporting the error
if (done)
{
// General error message
String *error = strNewFmt(
"S3 request failed with %u: %s", httpClientResponseCode(httpClient),
strPtr(httpClientResponseMessage(httpClient)));
// Output uri/query
strCat(error, "\n*** URI/Query ***:");
strCatFmt(error, "\n%s", strPtr(httpUriEncode(uri, true)));
if (query != NULL)
strCatFmt(error, "?%s", strPtr(httpQueryRender(query)));
// Output request headers
const StringList *requestHeaderList = httpHeaderList(requestHeader);
strCat(error, "\n*** Request Headers ***:");
for (unsigned int requestHeaderIdx = 0; requestHeaderIdx < strLstSize(requestHeaderList); requestHeaderIdx++)
{
const String *key = strLstGet(requestHeaderList, requestHeaderIdx);
strCatFmt(
error, "\n%s: %s", strPtr(key),
httpHeaderRedact(requestHeader, key) || strEq(key, S3_HEADER_DATE_STR) ?
"<redacted>" : strPtr(httpHeaderGet(requestHeader, key)));
}
// Output response headers
const HttpHeader *responseHeader = httpClientReponseHeader(httpClient);
const StringList *responseHeaderList = httpHeaderList(responseHeader);
if (strLstSize(responseHeaderList) > 0)
{
strCat(error, "\n*** Response Headers ***:");
for (unsigned int responseHeaderIdx = 0; responseHeaderIdx < strLstSize(responseHeaderList); responseHeaderIdx++)
{
const String *key = strLstGet(responseHeaderList, responseHeaderIdx);
strCatFmt(error, "\n%s: %s", strPtr(key), strPtr(httpHeaderGet(responseHeader, key)));
}
}
// If there was content then output it
if (response!= NULL)
strCatFmt(error, "\n*** Response Content ***:\n%s", strPtr(strNewBuf(response)));
THROW(ProtocolError, strPtr(error));
} }
} }
else
{
// On success move the buffer to the calling context
result.httpClient = httpClient;
result.responseHeader = httpHeaderMove(httpHeaderDup(httpClientReponseHeader(httpClient), NULL), MEM_CONTEXT_OLD());
result.response = bufMove(response, MEM_CONTEXT_OLD());
}
// If there was content then output it
if (response!= NULL)
strCatFmt(error, "\n*** Response Content ***:\n%s", strPtr(strNewBuf(response)));
THROW(ProtocolError, strPtr(error));
} }
MEM_CONTEXT_TEMP_END();
// On success move the buffer to the calling context
result.httpClient = httpClient;
result.responseHeader = httpHeaderMove(httpHeaderDup(httpClientReponseHeader(httpClient), NULL), MEM_CONTEXT_OLD());
result.response = bufMove(response, MEM_CONTEXT_OLD());
} }
MEM_CONTEXT_TEMP_END(); while (!done);
FUNCTION_LOG_RETURN(STORAGE_S3_REQUEST_RESULT, result); FUNCTION_LOG_RETURN(STORAGE_S3_REQUEST_RESULT, result);
} }

View File

@ -113,6 +113,21 @@ testS3Server(void)
// ------------------------------------------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------------------------------------------
// File is written all at once // File is written all at once
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_PUT, "/file.txt", "ABCD")); harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_PUT, "/file.txt", "ABCD"));
harnessTlsServerReply(testS3ServerResponse(
403, "Forbidden", NULL,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
"<Error>"
"<Code>RequestTimeTooSkewed</Code>"
"<Message>The difference between the request time and the current time is too large.</Message>"
"<RequestTime>20190726T221748Z</RequestTime>"
"<ServerTime>2019-07-26T22:33:27Z</ServerTime>"
"<MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds>"
"<RequestId>601AA1A7F7E37AE9</RequestId>"
"<HostId>KYMys77PoloZrGCkiQRyOIl0biqdHsk4T2EdTkhzkH1l8x00D4lvv/py5uUuHwQXG9qz6NRuldQ=</HostId>"
"</Error>"));
harnessTlsServerAccept();
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_PUT, "/file.txt", "ABCD"));
harnessTlsServerReply(testS3ServerResponse(200, "OK", NULL, NULL)); harnessTlsServerReply(testS3ServerResponse(200, "OK", NULL, NULL));
// Zero-length file // Zero-length file
@ -209,9 +224,42 @@ testS3Server(void)
// storageDriverList() // storageDriverList()
// ------------------------------------------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------------------------------------------
// Throw error // Throw errors
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL)); harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL));
harnessTlsServerReply(testS3ServerResponse(344, "Another bad status", NULL, NULL)); harnessTlsServerReply(testS3ServerResponse( 344, "Another bad status", NULL, NULL));
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL));
harnessTlsServerReply(testS3ServerResponse(
344, "Another bad status with xml", NULL,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
"<Error>"
"<Code>SomeOtherCode</Code>"
"</Error>"));
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL));
harnessTlsServerReply(testS3ServerResponse(
403, "Forbidden", NULL,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
"<Error>"
"<Code>RequestTimeTooSkewed</Code>"
"<Message>The difference between the request time and the current time is too large.</Message>"
"</Error>"));
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL));
harnessTlsServerReply(testS3ServerResponse(
403, "Forbidden", NULL,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
"<Error>"
"<Code>RequestTimeTooSkewed</Code>"
"<Message>The difference between the request time and the current time is too large.</Message>"
"</Error>"));
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL));
harnessTlsServerReply(testS3ServerResponse(
403, "Forbidden", NULL,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
"<Error>"
"<Code>RequestTimeTooSkewed</Code>"
"<Message>The difference between the request time and the current time is too large.</Message>"
"</Error>"));
// list a file/path in root // list a file/path in root
harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL)); harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL));
@ -837,6 +885,35 @@ testRun(void)
"host: " S3_TEST_HOST "\n" "host: " S3_TEST_HOST "\n"
"x-amz-content-sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n" "x-amz-content-sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
"x-amz-date: <redacted>"); "x-amz-date: <redacted>");
TEST_ERROR(storageListNP(s3, strNew("/")), ProtocolError,
"S3 request failed with 344: Another bad status with xml\n"
"*** URI/Query ***:\n"
"/?delimiter=%2F&list-type=2\n"
"*** Request Headers ***:\n"
"authorization: <redacted>\n"
"content-length: 0\n"
"host: " S3_TEST_HOST "\n"
"x-amz-content-sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
"x-amz-date: <redacted>\n"
"*** Response Headers ***:\n"
"content-length: 79\n"
"*** Response Content ***:\n"
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><Error><Code>SomeOtherCode</Code></Error>");
TEST_ERROR(storageListNP(s3, strNew("/")), ProtocolError,
"S3 request failed with 403: Forbidden\n"
"*** URI/Query ***:\n"
"/?delimiter=%2F&list-type=2\n"
"*** Request Headers ***:\n"
"authorization: <redacted>\n"
"content-length: 0\n"
"host: " S3_TEST_HOST "\n"
"x-amz-content-sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
"x-amz-date: <redacted>\n"
"*** Response Headers ***:\n"
"content-length: 179\n"
"*** Response Content ***:\n"
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><Error><Code>RequestTimeTooSkewed</Code>"
"<Message>The difference between the request time and the current time is too large.</Message></Error>");
TEST_RESULT_STR(strPtr(strLstJoin(storageListNP(s3, strNew("/")), ",")), "path1,test1.txt", "list a file/path in root"); TEST_RESULT_STR(strPtr(strLstJoin(storageListNP(s3, strNew("/")), ",")), "path1,test1.txt", "list a file/path in root");
TEST_RESULT_STR( TEST_RESULT_STR(