1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-11-06 08:49:29 +02:00

Optimize WAL segment check after successful backup.

The prior code did one list command against the storage for each WAL segment. This led to a lot of lists and was especially inefficient when the WAL (or the majority of it) was already present.

Optimize to keep the contents of a WAL directory and use them on a subsequent search. Leave the optimizations for a single WAL segment since other places still use that mode.
This commit is contained in:
David Steele
2023-09-10 12:45:58 -04:00
committed by GitHub
parent edbd520c81
commit 9d3a605900
12 changed files with 376 additions and 92 deletions

View File

@@ -52,6 +52,7 @@ SRCS_COMMON = \
SRCS = \
command/annotate/annotate.c \
command/archive/common.c \
command/archive/find.c \
command/archive/get/file.c \
command/archive/get/get.c \
command/archive/get/protocol.c \

View File

@@ -20,7 +20,6 @@ Archive Common
#include "postgres/interface.h"
#include "postgres/version.h"
#include "storage/helper.h"
#include "storage/helper.h"
/***********************************************************************************************************************************
WAL segment constants
@@ -405,77 +404,6 @@ walIsSegment(const String *walSegment)
FUNCTION_LOG_RETURN(BOOL, regExpMatch(regExpSegment, walSegment));
}
/**********************************************************************************************************************************/
FN_EXTERN String *
walSegmentFind(const Storage *storage, const String *archiveId, const String *walSegment, TimeMSec timeout)
{
FUNCTION_LOG_BEGIN(logLevelDebug);
FUNCTION_LOG_PARAM(STORAGE, storage);
FUNCTION_LOG_PARAM(STRING, archiveId);
FUNCTION_LOG_PARAM(STRING, walSegment);
FUNCTION_LOG_PARAM(TIME_MSEC, timeout);
FUNCTION_LOG_END();
ASSERT(storage != NULL);
ASSERT(archiveId != NULL);
ASSERT(walSegment != NULL);
ASSERT(walIsSegment(walSegment));
String *result = NULL;
MEM_CONTEXT_TEMP_BEGIN()
{
Wait *wait = waitNew(timeout);
do
{
// Get a list of all WAL segments that match
StringList *list = storageListP(
storage, strNewFmt(STORAGE_REPO_ARCHIVE "/%s/%s", strZ(archiveId), strZ(strSubN(walSegment, 0, 16))),
.expression = strNewFmt(
"^%s%s-[0-f]{40}" COMPRESS_TYPE_REGEXP "{0,1}$", strZ(strSubN(walSegment, 0, 24)),
walIsPartial(walSegment) ? WAL_SEGMENT_PARTIAL_EXT : ""),
.nullOnMissing = true);
// If there are results
if (list != NULL && !strLstEmpty(list))
{
// Error if there is more than one match
if (strLstSize(list) > 1)
{
THROW_FMT(
ArchiveDuplicateError,
"duplicates found in archive for WAL segment %s: %s\n"
"HINT: are multiple primaries archiving to this stanza?",
strZ(walSegment), strZ(strLstJoin(strLstSort(list, sortOrderAsc), ", ")));
}
// Copy file name of WAL segment found into the prior context
MEM_CONTEXT_PRIOR_BEGIN()
{
result = strDup(strLstGet(list, 0));
}
MEM_CONTEXT_PRIOR_END();
}
}
while (result == NULL && waitMore(wait));
}
MEM_CONTEXT_TEMP_END();
if (result == NULL && timeout != 0)
{
THROW_FMT(
ArchiveTimeoutError,
"WAL segment %s was not archived before the %" PRIu64 "ms timeout\n"
"HINT: check the archive_command to ensure that all options are correct (especially --stanza).\n"
"HINT: check the PostgreSQL server log for errors.\n"
"HINT: run the 'start' command if the stanza was previously stopped.",
strZ(walSegment), timeout);
}
FUNCTION_LOG_RETURN(STRING, result);
}
/**********************************************************************************************************************************/
FN_EXTERN String *
walSegmentNext(const String *walSegment, size_t walSegmentSize, unsigned int pgVersion)

View File

@@ -94,11 +94,6 @@ FN_EXTERN bool walIsSegment(const String *walSegment);
// Generates the location of the wal directory using a relative wal path and the supplied pg path
FN_EXTERN String *walPath(const String *walFile, const String *pgPath, const String *command);
// Find a WAL segment in the repository. The file name can have several things appended such as a hash, compression extension, and
// partial extension so it is possible to have multiple files that match the segment, though more than one match is not a good
// thing.
FN_EXTERN String *walSegmentFind(const Storage *storage, const String *archiveId, const String *walSegment, TimeMSec timeout);
// Get the next WAL segment given a WAL segment and WAL segment size
FN_EXTERN String *walSegmentNext(const String *walSegment, size_t walSegmentSize, unsigned int pgVersion);

230
src/command/archive/find.c Normal file
View File

@@ -0,0 +1,230 @@
/***********************************************************************************************************************************
Archive Segment Find
***********************************************************************************************************************************/
#include "build.auto.h"
#include "command/archive/common.h"
#include "command/archive/find.h"
#include "common/debug.h"
#include "common/log.h"
#include "common/memContext.h"
#include "common/regExp.h"
#include "common/wait.h"
#include "storage/helper.h"
/***********************************************************************************************************************************
Object type
***********************************************************************************************************************************/
struct WalSegmentFind
{
const Storage *storage; // Storage to find WAL in
const String *archiveId; // Archive id to find segments in
bool single; // Optimize for a single segment?
TimeMSec timeout; // Timeout for each segment
String *prefix; // Current list prefix
StringList *list; // List of found segments
};
/***********************************************************************************************************************************
Macros for function logging
***********************************************************************************************************************************/
#define FUNCTION_LOG_WAL_SEGMENT_FIND_TYPE \
WalSegmentFind *
#define FUNCTION_LOG_WAL_SEGMENT_FIND_FORMAT(value, buffer, bufferSize) \
objNameToLog(value, "WalSegmentFind", buffer, bufferSize)
/**********************************************************************************************************************************/
FN_EXTERN WalSegmentFind *
walSegmentFindNew(const Storage *const storage, const String *const archiveId, const bool single, const TimeMSec timeout)
{
FUNCTION_LOG_BEGIN(logLevelDebug);
FUNCTION_LOG_PARAM(STORAGE, storage);
FUNCTION_LOG_PARAM(STRING, archiveId);
FUNCTION_LOG_PARAM(BOOL, single);
FUNCTION_LOG_PARAM(TIME_MSEC, timeout);
FUNCTION_LOG_END();
ASSERT(storage != NULL);
ASSERT(archiveId != NULL);
OBJ_NEW_BEGIN(WalSegmentFind, .childQty = MEM_CONTEXT_QTY_MAX)
{
*this = (WalSegmentFind)
{
.storage = storage,
.archiveId = strDup(archiveId),
.single = single,
.timeout = timeout,
};
}
OBJ_NEW_END();
FUNCTION_LOG_RETURN(WAL_SEGMENT_FIND, this);
}
/**********************************************************************************************************************************/
FN_EXTERN String *
walSegmentFind(WalSegmentFind *const this, const String *const walSegment)
{
FUNCTION_LOG_BEGIN(logLevelDebug);
FUNCTION_LOG_PARAM(WAL_SEGMENT_FIND, this);
FUNCTION_LOG_PARAM(STRING, walSegment);
FUNCTION_LOG_END();
ASSERT(walSegment != NULL);
ASSERT(walIsSegment(walSegment));
String *result = NULL;
MEM_CONTEXT_TEMP_BEGIN()
{
Wait *const wait = waitNew(this->timeout);
const String *const prefix = strSubN(walSegment, 0, 16);
const String *const path = strNewFmt(STORAGE_REPO_ARCHIVE "/%s/%s", strZ(this->archiveId), strZ(prefix));
const String *const expression = strNewFmt(
"^%s%s-[0-f]{40}" COMPRESS_TYPE_REGEXP "{0,1}$", strZ(strSubN(walSegment, 0, 24)),
walIsPartial(walSegment) ? WAL_SEGMENT_PARTIAL_EXT : "");
RegExp *regExp = NULL;
do
{
// Get a list of all WAL segments that match the directory (and prefix when finding a single WAL)
if (this->list == NULL || !strEq(prefix, this->prefix))
{
MEM_CONTEXT_OBJ_BEGIN(this)
{
// Free and store prefix
if (!strEq(prefix, this->prefix))
{
strFree(this->prefix);
this->prefix = strDup(prefix);
}
// Free list
strLstFree(this->list);
// Get list
this->list = strLstSort(
storageListP(this->storage, path, .expression = this->single ? expression : NULL), sortOrderAsc);
}
MEM_CONTEXT_OBJ_END();
}
// If there are results
if (!strLstEmpty(this->list))
{
// By default the match size is the list size since filtering happened above. When not finding a single WAL then
// non-matching entries before the matching WAL will need to be removed and then the matching WAL counted.
unsigned int match = strLstSize(this->list);
if (!this->single)
{
// Build regexp if not yet built
if (regExp == NULL)
regExp = regExpNew(expression);
// Remove list items that do not match. This prevents us from having check them again on the next find.
while (!strLstEmpty(this->list) && !regExpMatch(regExp, strLstGet(this->list, 0)))
strLstRemoveIdx(this->list, 0);
// Find matches at the beginning of the remaining list
match = 0;
while (match < strLstSize(this->list) && regExpMatch(regExp, strLstGet(this->list, match)))
match++;
}
// Error if there is more than one match
if (match > 1)
{
// Build list of duplicate WAL
StringList *const matchList = strLstNew();
for (unsigned int matchIdx = 0; matchIdx < match; matchIdx++)
strLstAdd(matchList, strLstGet(this->list, matchIdx));
// Clear list for next find
strLstFree(this->list);
this->list = NULL;
THROW_FMT(
ArchiveDuplicateError,
"duplicates found in archive for WAL segment %s: %s\n"
"HINT: are multiple primaries archiving to this stanza?",
strZ(walSegment), strZ(strLstJoin(matchList, ", ")));
}
// On match copy file name of WAL segment found into the prior context
if (match == 1)
{
MEM_CONTEXT_PRIOR_BEGIN()
{
result = strDup(strLstGet(this->list, 0));
}
MEM_CONTEXT_PRIOR_END();
}
// Remove matching entries so list will be reloaded when empty
if (!this->single)
{
ASSERT(regExp != NULL);
while (!strLstEmpty(this->list) && regExpMatch(regExp, strLstGet(this->list, 0)))
strLstRemoveIdx(this->list, 0);
}
}
// Clear list for next find
if (this->single || strLstEmpty(this->list))
{
strLstFree(this->list);
this->list = NULL;
}
}
while (result == NULL && waitMore(wait));
}
MEM_CONTEXT_TEMP_END();
// Error if segment not found before timeout
if (result == NULL && this->timeout != 0)
{
THROW_FMT(
ArchiveTimeoutError,
"WAL segment %s was not archived before the %" PRIu64 "ms timeout\n"
"HINT: check the archive_command to ensure that all options are correct (especially --stanza).\n"
"HINT: check the PostgreSQL server log for errors.\n"
"HINT: run the 'start' command if the stanza was previously stopped.",
strZ(walSegment), this->timeout);
}
FUNCTION_LOG_RETURN(STRING, result);
}
/**********************************************************************************************************************************/
FN_EXTERN String *
walSegmentFindOne(
const Storage *const storage, const String *const archiveId, const String *const walSegment, const TimeMSec timeout)
{
FUNCTION_LOG_BEGIN(logLevelDebug);
FUNCTION_LOG_PARAM(STORAGE, storage);
FUNCTION_LOG_PARAM(STRING, archiveId);
FUNCTION_LOG_PARAM(STRING, walSegment);
FUNCTION_LOG_PARAM(TIME_MSEC, timeout);
FUNCTION_LOG_END();
String *result = NULL;
MEM_CONTEXT_TEMP_BEGIN()
{
WalSegmentFind *const find = walSegmentFindNew(storage, archiveId, true, timeout);
MEM_CONTEXT_PRIOR_BEGIN()
{
result = walSegmentFind(find, walSegment);
}
MEM_CONTEXT_PRIOR_END();
}
MEM_CONTEXT_TEMP_END();
FUNCTION_LOG_RETURN(STRING, result);
}

View File

@@ -0,0 +1,36 @@
/***********************************************************************************************************************************
Archive Segment Find
Find a WAL segment (or segments) in a repository. The code paths for finding single or multiple WAL segments are both optimized.
***********************************************************************************************************************************/
#ifndef COMMAND_ARCHIVE_FIND_H
#define COMMAND_ARCHIVE_FIND_H
/***********************************************************************************************************************************
Object type
***********************************************************************************************************************************/
typedef struct WalSegmentFind WalSegmentFind;
#include "common/type/string.h"
#include "storage/storage.h"
/***********************************************************************************************************************************
Constructors
***********************************************************************************************************************************/
FN_EXTERN WalSegmentFind *walSegmentFindNew(const Storage *storage, const String *archiveId, bool single, TimeMSec timeout);
/***********************************************************************************************************************************
Functions
***********************************************************************************************************************************/
// Find a WAL segment in the repository. The file name can have several things appended such as a hash, compression extension, and
// partial extension so it is possible to have multiple files that match the segment, though more than one match is not a good
// thing.
FN_EXTERN String *walSegmentFind(WalSegmentFind *this, const String *walSegment);
/***********************************************************************************************************************************
Helper functions
***********************************************************************************************************************************/
// Find a single WAL segment (see walSegmentFind() for details)
FN_EXTERN String *walSegmentFindOne(const Storage *storage, const String *archiveId, const String *walSegment, TimeMSec timeout);
#endif

View File

@@ -4,6 +4,7 @@ Archive Push File
#include "build.auto.h"
#include "command/archive/common.h"
#include "command/archive/find.h"
#include "command/archive/push/file.h"
#include "command/control/common.h"
#include "common/crypto/cipherBlock.h"
@@ -175,7 +176,7 @@ archivePushFile(
TRY_BEGIN()
{
walSegmentFile = walSegmentFind(storageRepoIdx(repoData->repoIdx), repoData->archiveId, archiveFile, 0);
walSegmentFile = walSegmentFindOne(storageRepoIdx(repoData->repoIdx), repoData->archiveId, archiveFile, 0);
}
CATCH_ANY()
{

View File

@@ -8,7 +8,7 @@ Backup Command
#include <time.h>
#include <unistd.h>
#include "command/archive/common.h"
#include "command/archive/find.h"
#include "command/backup/backup.h"
#include "command/backup/common.h"
#include "command/backup/file.h"
@@ -1118,7 +1118,7 @@ backupStart(BackupData *const backupData)
strEq(result.walSegmentName, dbBackupStartResult.walSegmentCheck) ? "" : "prior ",
strZ(dbBackupStartResult.walSegmentCheck));
walSegmentFind(
walSegmentFindOne(
storageRepo(), backupData->archiveId, dbBackupStartResult.walSegmentCheck,
cfgOptionUInt64(cfgOptArchiveTimeout));
}
@@ -2338,16 +2338,16 @@ backupArchiveCheckCopy(const BackupData *const backupData, Manifest *const manif
// Loop through all the segments in the lsn range
const StringList *const walSegmentList = pgLsnRangeToWalSegmentList(
backupData->timeline, lsnStart, lsnStop, backupData->walSegmentSize);
WalSegmentFind *const find = walSegmentFindNew(
storageRepo(), backupData->archiveId, strLstSize(walSegmentList) == 1, cfgOptionUInt64(cfgOptArchiveTimeout));
for (unsigned int walSegmentIdx = 0; walSegmentIdx < strLstSize(walSegmentList); walSegmentIdx++)
{
MEM_CONTEXT_TEMP_BEGIN()
{
// Find the actual wal segment file (including checksum compression extension) in the archive
const String *const walSegment = strLstGet(walSegmentList, walSegmentIdx);
// Find the actual wal segment file in the archive
const String *const archiveFile = walSegmentFind(
storageRepo(), backupData->archiveId, walSegment, cfgOptionUInt64(cfgOptArchiveTimeout));
const String *const archiveFile = walSegmentFind(find, walSegment);
if (cfgOptionBool(cfgOptArchiveCopy))
{

View File

@@ -3,7 +3,7 @@ Check Command
***********************************************************************************************************************************/
#include "build.auto.h"
#include "command/archive/common.h"
#include "command/archive/find.h"
#include "command/check/check.h"
#include "command/check/common.h"
#include "common/debug.h"
@@ -151,7 +151,7 @@ checkPrimary(const DbGetResult dbGroup)
{
LOG_INFO_FMT(CFGCMD_CHECK " %s archive for WAL (primary)", cfgOptionGroupName(cfgOptGrpRepo, repoIdx));
const String *const walSegmentFile = walSegmentFind(
const String *const walSegmentFile = walSegmentFindOne(
storageRepoIdx(repoIdx), repoArchiveId[repoIdx], walSegment, cfgOptionUInt64(cfgOptArchiveTimeout));
LOG_INFO_FMT(

View File

@@ -116,6 +116,7 @@ subdir('postgres')
src_pgbackrest = [
'command/annotate/annotate.c',
'command/archive/common.c',
'command/archive/find.c',
'command/archive/get/file.c',
'command/archive/get/get.c',
'command/archive/get/protocol.c',