From f9e1f3a79823ee1bd6656e4f7a8fb23e735b8ccf Mon Sep 17 00:00:00 2001 From: David Steele Date: Thu, 1 Aug 2019 14:28:30 -0400 Subject: [PATCH] Retry S3 RequestTimeTooSkewed errors instead of immediately terminating. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cause of this error seems to be that a failed request takes so long that a subsequent retry at the http level uses outdated headers. We're not sure if pgBackRest it to blame here (in one case a kernel downgrade fixed it, in another case an incorrect network driver was the problem) so add retries to hopefully deal with the issue if it is not too persistent. If SSL_write() has long delays before reporting an error then this will obviously affect backup performance. Reported by sean0101n, Tim Garton, Jesper St John, Aleš Zelený. --- doc/xml/release.xml | 26 ++++ src/storage/s3/storage.c | 196 +++++++++++++++++++------------ test/src/module/storage/s3Test.c | 81 ++++++++++++- 3 files changed, 226 insertions(+), 77 deletions(-) diff --git a/doc/xml/release.xml b/doc/xml/release.xml index a5b30f411..83860ad31 100644 --- a/doc/xml/release.xml +++ b/doc/xml/release.xml @@ -15,6 +15,17 @@ + + + + + + + + +

Retry S3 RequestTimeTooSkewed errors instead of immediately terminating.

+
+ @@ -7132,6 +7143,11 @@ arogozin + + Ale&scaron; Zelen&yacute; + aleszeleny + + Andres Freund anarazel @@ -7514,6 +7530,11 @@ sfrazer + + sean0101n + sean0101n + + Lardi&egrave;re S&eacute;bastien slardiere @@ -7534,6 +7555,11 @@ sfrost + + Tim Garton + ralfthewise + + Todd Vernick gintoddic diff --git a/src/storage/s3/storage.c b/src/storage/s3/storage.c index 9d54b6b1f..791b9a611 100644 --- a/src/storage/s3/storage.c +++ b/src/storage/s3/storage.c @@ -45,6 +45,11 @@ STRING_STATIC(S3_QUERY_PREFIX_STR, "prefix"); STRING_STATIC(S3_QUERY_VALUE_LIST_TYPE_2_STR, "2"); +/*********************************************************************************************************************************** +S3 errors +***********************************************************************************************************************************/ +STRING_STATIC(S3_ERROR_REQUEST_TIME_TOO_SKEWED_STR, "RequestTimeTooSkewed"); + /*********************************************************************************************************************************** XML tags ***********************************************************************************************************************************/ @@ -249,98 +254,139 @@ storageS3Request( ASSERT(uri != NULL); StorageS3RequestResult result = {0}; + unsigned int retryRemaining = 2; + bool done; - MEM_CONTEXT_TEMP_BEGIN() + do { - // Create header list and add content length - HttpHeader *requestHeader = httpHeaderNew(this->headerRedactList); + done = true; - // Set content length - httpHeaderAdd( - requestHeader, HTTP_HEADER_CONTENT_LENGTH_STR, - body == NULL || bufUsed(body) == 0 ? ZERO_STR : strNewFmt("%zu", bufUsed(body))); - - // Calculate content-md5 header if there is content - if (body != NULL) + MEM_CONTEXT_TEMP_BEGIN() { - char md5Hash[HASH_TYPE_MD5_SIZE_HEX]; - encodeToStr(encodeBase64, bufPtr(cryptoHashOne(HASH_TYPE_MD5_STR, body)), HASH_TYPE_M5_SIZE, md5Hash); - httpHeaderAdd(requestHeader, HTTP_HEADER_CONTENT_MD5_STR, STR(md5Hash)); - } + // Create header list and add content length + HttpHeader *requestHeader = httpHeaderNew(this->headerRedactList); - // Generate authorization header - storageS3Auth( - this, verb, httpUriEncode(uri, true), query, storageS3DateTime(time(NULL)), requestHeader, - body == NULL || bufUsed(body) == 0 ? - STRDEF("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") : - bufHex(cryptoHashOne(HASH_TYPE_SHA256_STR, body))); + // Set content length + httpHeaderAdd( + requestHeader, HTTP_HEADER_CONTENT_LENGTH_STR, + body == NULL || bufUsed(body) == 0 ? ZERO_STR : strNewFmt("%zu", bufUsed(body))); - // Get an http client - HttpClient *httpClient = httpClientCacheGet(this->httpClientCache); - - // Process request - Buffer *response = httpClientRequest(httpClient, verb, uri, query, requestHeader, body, returnContent); - - // Error if the request was not successful - if (!httpClientResponseCodeOk(httpClient) && - (!allowMissing || httpClientResponseCode(httpClient) != HTTP_RESPONSE_CODE_NOT_FOUND)) - { - // General error message - String *error = strNewFmt( - "S3 request failed with %u: %s", httpClientResponseCode(httpClient), - strPtr(httpClientResponseMessage(httpClient))); - - // Output uri/query - strCat(error, "\n*** URI/Query ***:"); - - strCatFmt(error, "\n%s", strPtr(httpUriEncode(uri, true))); - - if (query != NULL) - strCatFmt(error, "?%s", strPtr(httpQueryRender(query))); - - // Output request headers - const StringList *requestHeaderList = httpHeaderList(requestHeader); - - strCat(error, "\n*** Request Headers ***:"); - - for (unsigned int requestHeaderIdx = 0; requestHeaderIdx < strLstSize(requestHeaderList); requestHeaderIdx++) + // Calculate content-md5 header if there is content + if (body != NULL) { - const String *key = strLstGet(requestHeaderList, requestHeaderIdx); - - strCatFmt( - error, "\n%s: %s", strPtr(key), - httpHeaderRedact(requestHeader, key) || strEq(key, S3_HEADER_DATE_STR) ? - "" : strPtr(httpHeaderGet(requestHeader, key))); + char md5Hash[HASH_TYPE_MD5_SIZE_HEX]; + encodeToStr(encodeBase64, bufPtr(cryptoHashOne(HASH_TYPE_MD5_STR, body)), HASH_TYPE_M5_SIZE, md5Hash); + httpHeaderAdd(requestHeader, HTTP_HEADER_CONTENT_MD5_STR, STR(md5Hash)); } - // Output response headers - const HttpHeader *responseHeader = httpClientReponseHeader(httpClient); - const StringList *responseHeaderList = httpHeaderList(responseHeader); + // Generate authorization header + storageS3Auth( + this, verb, httpUriEncode(uri, true), query, storageS3DateTime(time(NULL)), requestHeader, + body == NULL || bufUsed(body) == 0 ? + STRDEF("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") : + bufHex(cryptoHashOne(HASH_TYPE_SHA256_STR, body))); - if (strLstSize(responseHeaderList) > 0) + // Get an http client + HttpClient *httpClient = httpClientCacheGet(this->httpClientCache); + + // Process request + Buffer *response = httpClientRequest(httpClient, verb, uri, query, requestHeader, body, returnContent); + + // Error if the request was not successful + if (!httpClientResponseCodeOk(httpClient) && + (!allowMissing || httpClientResponseCode(httpClient) != HTTP_RESPONSE_CODE_NOT_FOUND)) { - strCat(error, "\n*** Response Headers ***:"); - - for (unsigned int responseHeaderIdx = 0; responseHeaderIdx < strLstSize(responseHeaderList); responseHeaderIdx++) + // If there are retries remaining and a response parse it as XML to extract the S3 error code + if (response != NULL && retryRemaining > 0) { - const String *key = strLstGet(responseHeaderList, responseHeaderIdx); - strCatFmt(error, "\n%s: %s", strPtr(key), strPtr(httpHeaderGet(responseHeader, key))); + // Attempt to parse the XML and extract the S3 error code + TRY_BEGIN() + { + XmlNode *error = xmlDocumentRoot(xmlDocumentNewBuf(response)); + const String *errorCode = xmlNodeContent(xmlNodeChild(error, S3_XML_TAG_CODE_STR, true)); + + if (strEq(errorCode, S3_ERROR_REQUEST_TIME_TOO_SKEWED_STR)) + { + LOG_DEBUG( + "retry %s: %s", strPtr(errorCode), + strPtr(xmlNodeContent(xmlNodeChild(error, S3_XML_TAG_MESSAGE_STR, true)))); + + retryRemaining--; + done = false; + } + } + // On failure just drop through and report the error as usual + CATCH_ANY() + { + } + TRY_END(); + } + + // If not done then retry instead of reporting the error + if (done) + { + // General error message + String *error = strNewFmt( + "S3 request failed with %u: %s", httpClientResponseCode(httpClient), + strPtr(httpClientResponseMessage(httpClient))); + + // Output uri/query + strCat(error, "\n*** URI/Query ***:"); + + strCatFmt(error, "\n%s", strPtr(httpUriEncode(uri, true))); + + if (query != NULL) + strCatFmt(error, "?%s", strPtr(httpQueryRender(query))); + + // Output request headers + const StringList *requestHeaderList = httpHeaderList(requestHeader); + + strCat(error, "\n*** Request Headers ***:"); + + for (unsigned int requestHeaderIdx = 0; requestHeaderIdx < strLstSize(requestHeaderList); requestHeaderIdx++) + { + const String *key = strLstGet(requestHeaderList, requestHeaderIdx); + + strCatFmt( + error, "\n%s: %s", strPtr(key), + httpHeaderRedact(requestHeader, key) || strEq(key, S3_HEADER_DATE_STR) ? + "" : strPtr(httpHeaderGet(requestHeader, key))); + } + + // Output response headers + const HttpHeader *responseHeader = httpClientReponseHeader(httpClient); + const StringList *responseHeaderList = httpHeaderList(responseHeader); + + if (strLstSize(responseHeaderList) > 0) + { + strCat(error, "\n*** Response Headers ***:"); + + for (unsigned int responseHeaderIdx = 0; responseHeaderIdx < strLstSize(responseHeaderList); responseHeaderIdx++) + { + const String *key = strLstGet(responseHeaderList, responseHeaderIdx); + strCatFmt(error, "\n%s: %s", strPtr(key), strPtr(httpHeaderGet(responseHeader, key))); + } + } + + // If there was content then output it + if (response!= NULL) + strCatFmt(error, "\n*** Response Content ***:\n%s", strPtr(strNewBuf(response))); + + THROW(ProtocolError, strPtr(error)); } } + else + { + // On success move the buffer to the calling context + result.httpClient = httpClient; + result.responseHeader = httpHeaderMove(httpHeaderDup(httpClientReponseHeader(httpClient), NULL), MEM_CONTEXT_OLD()); + result.response = bufMove(response, MEM_CONTEXT_OLD()); + } - // If there was content then output it - if (response!= NULL) - strCatFmt(error, "\n*** Response Content ***:\n%s", strPtr(strNewBuf(response))); - - THROW(ProtocolError, strPtr(error)); } - - // On success move the buffer to the calling context - result.httpClient = httpClient; - result.responseHeader = httpHeaderMove(httpHeaderDup(httpClientReponseHeader(httpClient), NULL), MEM_CONTEXT_OLD()); - result.response = bufMove(response, MEM_CONTEXT_OLD()); + MEM_CONTEXT_TEMP_END(); } - MEM_CONTEXT_TEMP_END(); + while (!done); FUNCTION_LOG_RETURN(STORAGE_S3_REQUEST_RESULT, result); } diff --git a/test/src/module/storage/s3Test.c b/test/src/module/storage/s3Test.c index 57388b08d..2a3bf44ce 100644 --- a/test/src/module/storage/s3Test.c +++ b/test/src/module/storage/s3Test.c @@ -113,6 +113,21 @@ testS3Server(void) // ------------------------------------------------------------------------------------------------------------------------- // File is written all at once harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_PUT, "/file.txt", "ABCD")); + harnessTlsServerReply(testS3ServerResponse( + 403, "Forbidden", NULL, + "" + "" + "RequestTimeTooSkewed" + "The difference between the request time and the current time is too large." + "20190726T221748Z" + "2019-07-26T22:33:27Z" + "900000" + "601AA1A7F7E37AE9" + "KYMys77PoloZrGCkiQRyOIl0biqdHsk4T2EdTkhzkH1l8x00D4lvv/py5uUuHwQXG9qz6NRuldQ=" + "")); + + harnessTlsServerAccept(); + harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_PUT, "/file.txt", "ABCD")); harnessTlsServerReply(testS3ServerResponse(200, "OK", NULL, NULL)); // Zero-length file @@ -209,9 +224,42 @@ testS3Server(void) // storageDriverList() // ------------------------------------------------------------------------------------------------------------------------- - // Throw error + // Throw errors harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL)); - harnessTlsServerReply(testS3ServerResponse(344, "Another bad status", NULL, NULL)); + harnessTlsServerReply(testS3ServerResponse( 344, "Another bad status", NULL, NULL)); + + harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL)); + harnessTlsServerReply(testS3ServerResponse( + 344, "Another bad status with xml", NULL, + "" + "" + "SomeOtherCode" + "")); + + harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL)); + harnessTlsServerReply(testS3ServerResponse( + 403, "Forbidden", NULL, + "" + "" + "RequestTimeTooSkewed" + "The difference between the request time and the current time is too large." + "")); + harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL)); + harnessTlsServerReply(testS3ServerResponse( + 403, "Forbidden", NULL, + "" + "" + "RequestTimeTooSkewed" + "The difference between the request time and the current time is too large." + "")); + harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL)); + harnessTlsServerReply(testS3ServerResponse( + 403, "Forbidden", NULL, + "" + "" + "RequestTimeTooSkewed" + "The difference between the request time and the current time is too large." + "")); // list a file/path in root harnessTlsServerExpect(testS3ServerRequest(HTTP_VERB_GET, "/?delimiter=%2F&list-type=2", NULL)); @@ -837,6 +885,35 @@ testRun(void) "host: " S3_TEST_HOST "\n" "x-amz-content-sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n" "x-amz-date: "); + TEST_ERROR(storageListNP(s3, strNew("/")), ProtocolError, + "S3 request failed with 344: Another bad status with xml\n" + "*** URI/Query ***:\n" + "/?delimiter=%2F&list-type=2\n" + "*** Request Headers ***:\n" + "authorization: \n" + "content-length: 0\n" + "host: " S3_TEST_HOST "\n" + "x-amz-content-sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n" + "x-amz-date: \n" + "*** Response Headers ***:\n" + "content-length: 79\n" + "*** Response Content ***:\n" + "SomeOtherCode"); + TEST_ERROR(storageListNP(s3, strNew("/")), ProtocolError, + "S3 request failed with 403: Forbidden\n" + "*** URI/Query ***:\n" + "/?delimiter=%2F&list-type=2\n" + "*** Request Headers ***:\n" + "authorization: \n" + "content-length: 0\n" + "host: " S3_TEST_HOST "\n" + "x-amz-content-sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n" + "x-amz-date: \n" + "*** Response Headers ***:\n" + "content-length: 179\n" + "*** Response Content ***:\n" + "RequestTimeTooSkewed" + "The difference between the request time and the current time is too large."); TEST_RESULT_STR(strPtr(strLstJoin(storageListNP(s3, strNew("/")), ",")), "path1,test1.txt", "list a file/path in root"); TEST_RESULT_STR(