1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-01-30 05:39:12 +02:00

Improve retry error messages.

Centralize the code to allow it to be used in more places and update the protocol/server module to use the new code.

Since the time measurements make testing difficult, also add time and errorRetry harnesses to allow specific data to be used for testing. In the case of errorRetry, the production behavior is turned off by default during testing and only enabled for the errorRetry test module.
This commit is contained in:
David Steele 2023-04-06 11:03:46 +04:00
parent 801e396dac
commit df419b34b5
17 changed files with 467 additions and 32 deletions

View File

@ -113,6 +113,7 @@ SRCS = \
common/crypto/common.c \ common/crypto/common.c \
common/crypto/hash.c \ common/crypto/hash.c \
common/crypto/xxhash.c \ common/crypto/xxhash.c \
common/error/retry.c \
common/exec.c \ common/exec.c \
common/fork.c \ common/fork.c \
common/ini.c \ common/ini.c \

82
src/common/error/retry.c Normal file
View File

@ -0,0 +1,82 @@
/***********************************************************************************************************************************
Error Retry Message
***********************************************************************************************************************************/
#include "build.auto.h"
#include "common/debug.h"
#include "common/error/retry.h"
#include "common/time.h"
#include "common/type/string.h"
/***********************************************************************************************************************************
Object type
***********************************************************************************************************************************/
struct ErrorRetry
{
ErrorRetryPub pub; // Publicly accessible variables
String *messageLast; // Last error message
TimeMSec timeBegin; // Time error retries began
};
/**********************************************************************************************************************************/
FN_EXTERN ErrorRetry *
errRetryNew(void)
{
FUNCTION_TEST_VOID();
OBJ_NEW_BEGIN(ErrorRetry, .childQty = MEM_CONTEXT_QTY_MAX)
{
*this = (ErrorRetry)
{
.timeBegin = timeMSec(),
};
}
OBJ_NEW_END();
FUNCTION_TEST_RETURN(ERROR_RETRY, this);
}
/**********************************************************************************************************************************/
FN_EXTERN void
errRetryAdd(ErrorRetry *const this)
{
FUNCTION_TEST_BEGIN();
FUNCTION_TEST_PARAM(ERROR_RETRY, this);
FUNCTION_TEST_END();
// If the first error
if (this->messageLast == NULL)
{
MEM_CONTEXT_OBJ_BEGIN(this)
{
this->pub.type = errorType();
this->pub.message = strCatZ(strNew(), errorMessage());
this->messageLast = strCatZ(strNew(), errorMessage());
}
MEM_CONTEXT_OBJ_END();
}
// Else on each retry
else
{
strCatFmt(
this->pub.message, "\n[%s] on retry after %" PRIu64 "ms: ", errorTypeName(errorType()),
timeMSec() - this->timeBegin);
// If the message is the same as the last message
if (strEqZ(this->messageLast, errorMessage()))
{
strCatZ(this->pub.message, "[same message]");
}
// Else append new message
else
{
strCatZ(this->pub.message, errorMessage());
// Update last message
strTrunc(this->messageLast);
strCatZ(this->messageLast, errorMessage());
}
}
FUNCTION_TEST_RETURN_VOID();
}

70
src/common/error/retry.h Normal file
View File

@ -0,0 +1,70 @@
/***********************************************************************************************************************************
Error Retry Message
Accumulate errors during retries to provide a single, coherent error message to the user that includes information about the
original error and all the failed retries.
***********************************************************************************************************************************/
#ifndef COMMON_ERROR_RETRY_H
#define COMMON_ERROR_RETRY_H
/***********************************************************************************************************************************
Object type
***********************************************************************************************************************************/
typedef struct ErrorRetry ErrorRetry;
#include "common/error/error.h"
#include "common/type/object.h"
#include "common/type/string.h"
/***********************************************************************************************************************************
Constructor
***********************************************************************************************************************************/
FN_EXTERN ErrorRetry *errRetryNew(void);
/***********************************************************************************************************************************
Functions
***********************************************************************************************************************************/
// Add retry error
FN_EXTERN void errRetryAdd(ErrorRetry *this);
/***********************************************************************************************************************************
Getters/Setters
***********************************************************************************************************************************/
typedef struct ErrorRetryPub
{
const ErrorType *type; // Error type
String *message; // Error message
} ErrorRetryPub;
// Get error type
FN_INLINE_ALWAYS const ErrorType *
errRetryType(const ErrorRetry *const this)
{
return THIS_PUB(ErrorRetry)->type;
}
// Get error message
FN_INLINE_ALWAYS const String *
errRetryMessage(const ErrorRetry *const this)
{
return THIS_PUB(ErrorRetry)->message;
}
/***********************************************************************************************************************************
Destructor
***********************************************************************************************************************************/
FN_INLINE_ALWAYS void
errRetryFree(ErrorRetry *const this)
{
objFree(this);
}
/***********************************************************************************************************************************
Macros for function logging
***********************************************************************************************************************************/
#define FUNCTION_LOG_ERROR_RETRY_TYPE \
ErrorRetry *
#define FUNCTION_LOG_ERROR_RETRY_FORMAT(value, buffer, bufferSize) \
objNameToLog(value, "ErrorRetry", buffer, bufferSize)
#endif

View File

@ -4,6 +4,7 @@ Time Management
#ifndef COMMON_TIME_H #ifndef COMMON_TIME_H
#define COMMON_TIME_H #define COMMON_TIME_H
#include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <time.h> #include <time.h>

View File

@ -179,6 +179,7 @@ src_pgbackrest = [
'common/crypto/common.c', 'common/crypto/common.c',
'common/crypto/hash.c', 'common/crypto/hash.c',
'common/crypto/xxhash.c', 'common/crypto/xxhash.c',
'common/error/retry.c',
'common/exec.c', 'common/exec.c',
'common/fork.c', 'common/fork.c',
'common/ini.c', 'common/ini.c',

View File

@ -6,6 +6,7 @@ Protocol Server
#include <string.h> #include <string.h>
#include "common/debug.h" #include "common/debug.h"
#include "common/error/retry.h"
#include "common/log.h" #include "common/log.h"
#include "common/time.h" #include "common/time.h"
#include "common/type/json.h" #include "common/type/json.h"
@ -182,9 +183,7 @@ protocolServerProcess(
MEM_CONTEXT_TEMP_BEGIN() MEM_CONTEXT_TEMP_BEGIN()
{ {
// Variables to store first error message and retry messages // Variables to store first error message and retry messages
const ErrorType *errType = NULL; ErrorRetry *const errRetry = errRetryNew();
String *errMessage = NULL;
const String *errMessageFirst = NULL;
const String *errStackTrace = NULL; const String *errStackTrace = NULL;
// Initialize retries in case of command failure // Initialize retries in case of command failure
@ -203,27 +202,13 @@ protocolServerProcess(
} }
CATCH_ANY() CATCH_ANY()
{ {
// On first error record the error details. Only the first error will contain a stack trace since // Add the error retry info
// the first error is most likely to contain valuable information. errRetryAdd(errRetry);
if (errType == NULL)
{
errType = errorType();
errMessage = strCatZ(strNew(), errorMessage());
errMessageFirst = strNewZ(errorMessage());
errStackTrace = strNewZ(errorStackTrace());
}
// Else on a retry error only record the error type and message. Retry errors are less likely to
// contain valuable information but may be helpful for debugging.
else
{
strCatFmt(
errMessage, "\n[%s] on retry after %" PRIu64 "ms", errorTypeName(errorType()),
retrySleepMs);
// Only append the message if it differs from the first message // On first error record the stack trace. Only the first error will contain a stack trace since
if (!strEqZ(errMessageFirst, errorMessage())) // the first error is most likely to contain valuable information.
strCatFmt(errMessage, ": %s", errorMessage()); if (errStackTrace == NULL)
} errStackTrace = strNewZ(errorStackTrace());
// Are there retries remaining? // Are there retries remaining?
if (retryRemaining > 0) if (retryRemaining > 0)
@ -249,7 +234,10 @@ protocolServerProcess(
} }
// Else report error to the client // Else report error to the client
else else
protocolServerError(this, errorTypeCode(errType), errMessage, errStackTrace); {
protocolServerError(
this, errorTypeCode(errRetryType(errRetry)), errRetryMessage(errRetry), errStackTrace);
}
} }
TRY_END(); TRY_END();
} }

View File

@ -121,6 +121,12 @@ unit:
# ---------------------------------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------------------------------
- name: time - name: time
total: 3 total: 3
harness:
name: time
shim:
common/time:
function:
- timeMSec
coverage: coverage:
- common/time - common/time
@ -165,6 +171,19 @@ unit:
- common/type/variant - common/type/variant
- common/type/variantList - common/type/variantList
# ----------------------------------------------------------------------------------------------------------------------------
- name: error-retry
total: 1
harness:
name: errorRetry
shim:
common/error/retry:
function:
- errRetryAdd
coverage:
- common/error/retry
# ---------------------------------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------------------------------
- name: type-list - name: type-list
total: 4 total: 4

View File

@ -0,0 +1,67 @@
/***********************************************************************************************************************************
Error Retry Message Harness
***********************************************************************************************************************************/
#include "build.auto.h"
#include "common/harnessDebug.h"
#include "common/harnessErrorRetry.h"
/***********************************************************************************************************************************
Include shimmed C modules
***********************************************************************************************************************************/
{[SHIM_MODULE]}
static struct
{
bool detailEnable; // Error retry details enabled (not the default)
} hrnErrorRetryLocal;
/**********************************************************************************************************************************/
void
errRetryAdd(ErrorRetry *const this)
{
FUNCTION_HARNESS_BEGIN();
FUNCTION_HARNESS_PARAM(ERROR_RETRY, this);
FUNCTION_HARNESS_END();
if (!hrnErrorRetryLocal.detailEnable)
{
if (this->messageLast == NULL)
{
MEM_CONTEXT_OBJ_BEGIN(this)
{
this->pub.type = errorType();
this->pub.message = strCatZ(strNew(), errorMessage());
this->messageLast = strNewZ(errorMessage());
}
MEM_CONTEXT_OBJ_END();
}
else if (strEq(this->pub.message, this->messageLast))
strCatFmt(this->pub.message, "\n[RETRY DETAIL OMITTED]");
}
else
errRetryAdd_SHIMMED(this);
FUNCTION_HARNESS_RETURN_VOID();
}
/**********************************************************************************************************************************/
void
hrnErrorRetryDetailEnable(void)
{
FUNCTION_HARNESS_VOID();
hrnErrorRetryLocal.detailEnable = true;
FUNCTION_HARNESS_RETURN_VOID();
}
void
hrnErrorRetryDetailDisable(void)
{
FUNCTION_HARNESS_VOID();
hrnErrorRetryLocal.detailEnable = false;
FUNCTION_HARNESS_RETURN_VOID();
}

View File

@ -0,0 +1,14 @@
/***********************************************************************************************************************************
Error Retry Message Harness
***********************************************************************************************************************************/
#ifndef TEST_COMMON_HARNESS_ERROR_RETRY_H
#define TEST_COMMON_HARNESS_ERROR_RETRY_H
/***********************************************************************************************************************************
Functions
***********************************************************************************************************************************/
// Enable/disable error retry details
void hrnErrorRetryDetailEnable(void);
void hrnErrorRetryDetailDisable(void);
#endif

View File

@ -0,0 +1,66 @@
/***********************************************************************************************************************************
Time Harness
***********************************************************************************************************************************/
#include "build.auto.h"
#include "common/harnessDebug.h"
#include "common/harnessTime.h"
/***********************************************************************************************************************************
Include shimmed C modules
***********************************************************************************************************************************/
{[SHIM_MODULE]}
static struct
{
const TimeMSec *timeList; // Times to return from timeMSec()
size_t timeListSize; // Time list size
size_t timeListCurrent; // Time list current position
} hrnTimeLocal;
/**********************************************************************************************************************************/
void
hrnTimeMSecSet(const TimeMSec *const timeList, const size_t timeListSize)
{
FUNCTION_HARNESS_BEGIN();
FUNCTION_HARNESS_PARAM_P(VOID, timeList);
FUNCTION_HARNESS_PARAM(SIZE, timeListSize);
FUNCTION_HARNESS_END();
ASSERT(hrnTimeLocal.timeList == NULL);
ASSERT(timeListSize > 0);
hrnTimeLocal.timeList = timeList;
hrnTimeLocal.timeListSize = timeListSize;
hrnTimeLocal.timeListCurrent = 0;
FUNCTION_HARNESS_RETURN_VOID();
}
/**********************************************************************************************************************************/
TimeMSec
timeMSec(void)
{
FUNCTION_HARNESS_VOID();
TimeMSec result;
// Return normal result
if (hrnTimeLocal.timeList == NULL)
{
result = timeMSec_SHIMMED();
}
// Else return a value from the list
else
{
result = hrnTimeLocal.timeList[hrnTimeLocal.timeListCurrent];
// Reset list when the last value has been returned
hrnTimeLocal.timeListCurrent++;
if (hrnTimeLocal.timeListCurrent == hrnTimeLocal.timeListSize)
hrnTimeLocal.timeList = NULL;
}
FUNCTION_HARNESS_RETURN(TIME_MSEC, result);
}

View File

@ -0,0 +1,15 @@
/***********************************************************************************************************************************
Time Harness
***********************************************************************************************************************************/
#ifndef TEST_COMMON_HARNESS_TIME_H
#define TEST_COMMON_HARNESS_TIME_H
#include "common/time.h"
/***********************************************************************************************************************************
Functions
***********************************************************************************************************************************/
// Set times to be returned from timeMSec()
void hrnTimeMSecSet(const TimeMSec *timeList, size_t timeListSize);
#endif

View File

@ -520,7 +520,7 @@ testRun(void)
" [FormatError] unexpected eof in compressed data\n" " [FormatError] unexpected eof in compressed data\n"
" repo2: 10-1/0000000100000002/000000010000000200000000-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.gz" " repo2: 10-1/0000000100000002/000000010000000200000000-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.gz"
" [FormatError] unexpected eof in compressed data\n" " [FormatError] unexpected eof in compressed data\n"
" [FileReadError] on retry after 0ms"); " [RETRY DETAIL OMITTED]");
TEST_STORAGE_GET( TEST_STORAGE_GET(
storageSpoolWrite(), STORAGE_SPOOL_ARCHIVE_IN "/000000010000000200000000.error", storageSpoolWrite(), STORAGE_SPOOL_ARCHIVE_IN "/000000010000000200000000.error",
@ -530,7 +530,7 @@ testRun(void)
" [FormatError] unexpected eof in compressed data\n" " [FormatError] unexpected eof in compressed data\n"
"repo2: 10-1/0000000100000002/000000010000000200000000-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.gz" "repo2: 10-1/0000000100000002/000000010000000200000000-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.gz"
" [FormatError] unexpected eof in compressed data\n" " [FormatError] unexpected eof in compressed data\n"
"[FileReadError] on retry after 0ms\n" "[RETRY DETAIL OMITTED]\n"
"repo3: [ArchiveMismatchError] unable to retrieve the archive id for database version '10' and system-id" "repo3: [ArchiveMismatchError] unable to retrieve the archive id for database version '10' and system-id"
" '" HRN_PG_SYSTEMID_10_Z "'", " '" HRN_PG_SYSTEMID_10_Z "'",
.remove = true); .remove = true);

View File

@ -873,7 +873,7 @@ testRun(void)
"P01 DETAIL: pushed WAL file '000000010000000100000001' to the archive\n" "P01 DETAIL: pushed WAL file '000000010000000100000001' to the archive\n"
"P01 WARN: could not push WAL file '000000010000000100000002' to the archive (will be retried): " "P01 WARN: could not push WAL file '000000010000000100000002' to the archive (will be retried): "
"[55] raised from local-1 shim protocol: " STORAGE_ERROR_READ_MISSING "\n" "[55] raised from local-1 shim protocol: " STORAGE_ERROR_READ_MISSING "\n"
" [FileMissingError] on retry after 0ms", " [RETRY DETAIL OMITTED]",
TEST_PATH "/pg/pg_xlog/000000010000000100000002"); TEST_PATH "/pg/pg_xlog/000000010000000100000002");
TEST_STORAGE_EXISTS( TEST_STORAGE_EXISTS(

View File

@ -3074,7 +3074,7 @@ testRun(void)
cmdRestore(), FileMissingError, cmdRestore(), FileMissingError,
"raised from local-1 shim protocol: unable to open missing file" "raised from local-1 shim protocol: unable to open missing file"
" '" TEST_PATH "/repo/backup/test1/20161219-212741F_20161219-212918I/pg_data/global/pg_control' for read\n" " '" TEST_PATH "/repo/backup/test1/20161219-212741F_20161219-212918I/pg_data/global/pg_control' for read\n"
"[FileMissingError] on retry after 0ms"); "[RETRY DETAIL OMITTED]");
// Free local processes that were not freed because of the error // Free local processes that were not freed because of the error
protocolFree(); protocolFree();

View File

@ -1272,7 +1272,7 @@ testRun(void)
"local-1 shim protocol: unable to open file '" TEST_PATH "/repo/archive/db" "local-1 shim protocol: unable to open file '" TEST_PATH "/repo/archive/db"
"/11-2/0000000200000008/000000020000000800000003-656817043007aa2100c44c712bcb456db705dab9' for read:" "/11-2/0000000200000008/000000020000000800000003-656817043007aa2100c44c712bcb456db705dab9' for read:"
" [13] Permission denied\n" " [13] Permission denied\n"
" [FileOpenError] on retry after 0ms\n" " [RETRY DETAIL OMITTED]\n"
"P00 DETAIL: unable to open missing file '" TEST_PATH "/repo/backup/db/20181119-152800F/backup.manifest' for read\n" "P00 DETAIL: unable to open missing file '" TEST_PATH "/repo/backup/db/20181119-152800F/backup.manifest' for read\n"
"P00 DETAIL: unable to open missing file '" TEST_PATH "/repo/backup/db/20181119-152800F/backup.manifest.copy'" "P00 DETAIL: unable to open missing file '" TEST_PATH "/repo/backup/db/20181119-152800F/backup.manifest.copy'"
" for read\n" " for read\n"
@ -1287,7 +1287,7 @@ testRun(void)
"P01 INFO: invalid result UNPROCESSEDBACKUP/pg_data/testother: [41] raised from local-1 shim protocol:" "P01 INFO: invalid result UNPROCESSEDBACKUP/pg_data/testother: [41] raised from local-1 shim protocol:"
" unable to open file '" TEST_PATH "/repo/backup/db/UNPROCESSEDBACKUP/pg_data/testother' for read: [13]" " unable to open file '" TEST_PATH "/repo/backup/db/UNPROCESSEDBACKUP/pg_data/testother' for read: [13]"
" Permission denied\n" " Permission denied\n"
" [FileOpenError] on retry after 0ms\n" " [RETRY DETAIL OMITTED]\n"
"P00 DETAIL: archiveId: 11-2, wal start: 000000020000000700000FFD, wal stop: 000000020000000800000000\n" "P00 DETAIL: archiveId: 11-2, wal start: 000000020000000700000FFD, wal stop: 000000020000000800000000\n"
"P00 DETAIL: archiveId: 11-2, wal start: 000000020000000800000002, wal stop: 000000020000000800000003\n" "P00 DETAIL: archiveId: 11-2, wal start: 000000020000000800000002, wal stop: 000000020000000800000003\n"
"P00 DETAIL: archiveId: 11-2, wal start: 000000030000000000000000, wal stop: 000000030000000000000001\n" "P00 DETAIL: archiveId: 11-2, wal start: 000000030000000000000000, wal stop: 000000030000000000000001\n"

View File

@ -0,0 +1,112 @@
/***********************************************************************************************************************************
Error Retry Message Test
***********************************************************************************************************************************/
#include "common/harnessTime.h"
/***********************************************************************************************************************************
Test Run
***********************************************************************************************************************************/
static void
testRun(void)
{
FUNCTION_HARNESS_VOID();
// *****************************************************************************************************************************
if (testBegin("ErrorRetry"))
{
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("retry (detail disabled)");
{
ErrorRetry *const retry = errRetryNew();
TRY_BEGIN()
{
THROW(FormatError, "message1");
}
CATCH_ANY()
{
TEST_RESULT_VOID(errRetryAdd(retry), "add retry");
}
TRY_END();
TRY_BEGIN()
{
THROW(KernelError, "message1");
}
CATCH_ANY()
{
TEST_RESULT_VOID(errRetryAdd(retry), "add retry");
}
TRY_END();
TEST_RESULT_BOOL(errRetryType(retry) == &FormatError, true, "error type");
TEST_RESULT_STR_Z(
errRetryMessage(retry),
"message1\n"
"[RETRY DETAIL OMITTED]",
"error message");
}
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("retry (detail enabled)");
{
hrnErrorRetryDetailEnable();
TimeMSec timeList[] = {0, 50, 75, 150};
hrnTimeMSecSet(timeList, LENGTH_OF(timeList));
ErrorRetry *const retry = errRetryNew();
TRY_BEGIN()
{
THROW(FormatError, "message1");
}
CATCH_ANY()
{
TEST_RESULT_VOID(errRetryAdd(retry), "add retry");
}
TRY_END();
TRY_BEGIN()
{
THROW(FormatError, "message1");
}
CATCH_ANY()
{
TEST_RESULT_VOID(errRetryAdd(retry), "add retry");
}
TRY_END();
TRY_BEGIN()
{
THROW(KernelError, "message2");
}
CATCH_ANY()
{
TEST_RESULT_VOID(errRetryAdd(retry), "add retry");
}
TRY_END();
TRY_BEGIN()
{
THROW(ServiceError, "message2");
}
CATCH_ANY()
{
TEST_RESULT_VOID(errRetryAdd(retry), "add retry");
}
TRY_END();
TEST_RESULT_BOOL(errRetryType(retry) == &FormatError, true, "error type");
TEST_RESULT_STR_Z(
errRetryMessage(retry),
"message1\n"
"[FormatError] on retry after 50ms: [same message]\n"
"[KernelError] on retry after 75ms: message2\n"
"[ServiceError] on retry after 150ms: [same message]",
"error message");
}
}
FUNCTION_HARNESS_RETURN_VOID();
}

View File

@ -702,8 +702,7 @@ testRun(void)
TEST_ERROR( TEST_ERROR(
protocolClientExecute(client, protocolCommandNew(TEST_PROTOCOL_COMMAND_ERROR), false), FormatError, protocolClientExecute(client, protocolCommandNew(TEST_PROTOCOL_COMMAND_ERROR), false), FormatError,
"raised from test client: ERR_MESSAGE\n" "raised from test client: ERR_MESSAGE\n"
"[FormatError] on retry after 0ms\n" "[RETRY DETAIL OMITTED]");
"[FormatError] on retry after 500ms: ERR_MESSAGE_RETRY");
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
TEST_TITLE("free client"); TEST_TITLE("free client");