1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-07-05 00:28:52 +02:00

Retry reads of pg_control until checksum is valid.

On certain file systems (e.g. ext4) pg_control may appear torn if there is a concurrent write while reading the file. To prevent an invalid read, retry until the checksum matches the control data.

Special handling is required for the pg-version-force feature since the offset of the checksum is not known. In this case, scan from the default position to the end of the data looking for a checksum match. This is a bit imprecise, but better than nothing, and the chance of a random collision in the control data seems very remote considering the ratio of data size (< 512 bytes) to checksum size (4 bytes).

This was discovered and a possible solution proposed for PostgreSQL in [1]. The proposed solution may work for backup, but pgBackRest needs to be able to read pg_control reliably outside of backup. So no matter what fix is adopted for PostgreSQL, pgBackRest need retries. Further adjustment may be required as the PostgreSQL fix evolves.

[1] https://www.postgresql.org/message-id/20221123014224.xisi44byq3cf5psi%40awork3.anarazel.de
This commit is contained in:
David Steele
2023-09-10 09:47:49 -04:00
committed by GitHub
parent c1805134b3
commit f42d927d2d
21 changed files with 473 additions and 124 deletions

View File

@ -9,7 +9,9 @@ PostgreSQL Interface
#include "common/log.h"
#include "common/memContext.h"
#include "common/regExp.h"
#include "common/time.h"
#include "postgres/interface.h"
#include "postgres/interface/crc32.h"
#include "postgres/interface/static.vendor.h"
#include "postgres/version.h"
#include "storage/helper.h"
@ -44,8 +46,10 @@ STRING_STATIC(PG_NAME_LOCATION_STR, "location");
/***********************************************************************************************************************************
The control file is 8192 bytes but only the first 512 bytes are used to prevent torn pages even on really old storage with 512-byte
sectors. This is true across all versions of PostgreSQL.
sectors. This is true across all versions of PostgreSQL. Unfortunately, this is not sufficient to prevent torn pages during
concurrent reads and writes so retries are required.
***********************************************************************************************************************************/
#define PG_CONTROL_SIZE 8192
#define PG_CONTROL_DATA_SIZE 512
/***********************************************************************************************************************************
@ -64,6 +68,9 @@ typedef struct PgInterface
// Convert pg_control to a common data structure
PgControl (*control)(const unsigned char *);
// Get control crc offset
size_t (*controlCrcOffset)(void);
// Get the control version for this version of PostgreSQL
uint32_t (*controlVersion)(void);
@ -222,6 +229,47 @@ pgControlFromBuffer(const Buffer *controlFile, const String *const pgVersionForc
PgControl result = interface->control(bufPtrConst(controlFile));
result.version = interface->version;
// Check CRC
size_t crcOffset = interface->controlCrcOffset();
do
{
// Calculate CRC and retrieve expected CRC
const uint32_t crcCalculated =
result.version > PG_VERSION_94 ?
crc32cOne(bufPtrConst(controlFile), crcOffset) : crc32One(bufPtrConst(controlFile), crcOffset);
const uint32_t crcExpected = *((uint32_t *)(bufPtrConst(controlFile) + crcOffset));
// If CRC does not match
if (crcCalculated != crcExpected)
{
// If version is forced then the CRC might be later in the file (assuming the fork added extra fields to pg_control).
// Increment the offset by CRC data size and continue to try again.
if (pgVersionForce != NULL)
{
crcOffset += sizeof(uint32_t);
if (crcOffset <= bufUsed(controlFile) - sizeof(uint32_t))
continue;
}
// If no retry then error
THROW_FMT(
ChecksumError,
"calculated " PG_FILE_PGCONTROL " checksum does not match expected value\n"
"HINT: calculated 0x%x but expected value is 0x%x\n"
"%s"
"HINT: is " PG_FILE_PGCONTROL " corrupt?\n"
"HINT: does " PG_FILE_PGCONTROL " have a different layout than expected?",
crcCalculated, crcExpected,
pgVersionForce == NULL ? "" : "HINT: checksum values may be misleading due to forced version scan\n");
}
// Do not retry if the CRC is valid
break;
}
while (true);
// Check the segment size
pgWalSegmentSizeCheck(result.version, result.walSegmentSize);
@ -232,8 +280,20 @@ pgControlFromBuffer(const Buffer *controlFile, const String *const pgVersionForc
FUNCTION_LOG_RETURN(PG_CONTROL, result);
}
FN_EXTERN PgControl
pgControlFromFile(const Storage *storage, const String *const pgVersionForce)
// Helper to compare control data to last read
static bool
pgControlBufferEq(const Buffer *const last, const Buffer *const current)
{
FUNCTION_TEST_BEGIN();
FUNCTION_TEST_PARAM(BUFFER, last);
FUNCTION_TEST_PARAM(BUFFER, current);
FUNCTION_TEST_END();
FUNCTION_TEST_RETURN(BOOL, last != NULL && bufEq(last, current));
}
FN_EXTERN Buffer *
pgControlBufferFromFile(const Storage *const storage, const String *const pgVersionForce)
{
FUNCTION_LOG_BEGIN(logLevelDebug);
FUNCTION_LOG_PARAM(STORAGE, storage);
@ -242,18 +302,75 @@ pgControlFromFile(const Storage *storage, const String *const pgVersionForce)
ASSERT(storage != NULL);
PgControl result = {0};
Buffer *result = NULL;
MEM_CONTEXT_TEMP_BEGIN()
{
// Read control file
Buffer *controlFile = storageGetP(
storageNewReadP(storage, STRDEF(PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL)), .exactSize = PG_CONTROL_DATA_SIZE);
// On filesystems that do not implement atomicity of concurrent reads and writes, we might get garbage if the server is
// writing the pg_control file at the same time as we try to read it. Keep trying until success or we read the same data
// twice in a row. Do not use a timeout here because there are plenty of other errors that can happen and we don't want to
// wait for them.
Buffer *controlFileLast = NULL;
bool done = false;
result = pgControlFromBuffer(controlFile, pgVersionForce);
do
{
// Read control file
Buffer *const controlFile = storageGetP(
storageNewReadP(storage, STRDEF(PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL)), .exactSize = PG_CONTROL_DATA_SIZE);
TRY_BEGIN()
{
// Check that control data is valid
pgControlFromBuffer(controlFile, pgVersionForce);
// Create a buffer of the correct size to hold pg_control and zero out the remaining portion
result = bufNew(PG_CONTROL_SIZE);
bufCat(result, controlFile);
memset(bufPtr(result) + bufUsed(controlFile), 0, bufSize(result) - bufUsed(controlFile));
bufUsedSet(result, bufSize(result));
bufMove(result, memContextPrior());
done = true;
}
CATCH_ANY()
{
// If we get the same bad data twice in a row then error
if (pgControlBufferEq(controlFileLast, controlFile))
RETHROW();
// Copy data to last
bufFree(controlFileLast);
controlFileLast = controlFile;
// Sleep to let data stabilize
sleepMSec(50);
}
TRY_END();
}
while (!done);
}
MEM_CONTEXT_TEMP_END();
FUNCTION_LOG_RETURN(BUFFER, result);
}
FN_EXTERN PgControl
pgControlFromFile(const Storage *const storage, const String *const pgVersionForce)
{
FUNCTION_LOG_BEGIN(logLevelDebug);
FUNCTION_LOG_PARAM(STORAGE, storage);
FUNCTION_LOG_PARAM(STRING, pgVersionForce);
FUNCTION_LOG_END();
ASSERT(storage != NULL);
Buffer *const buffer = pgControlBufferFromFile(storage, pgVersionForce);
PgControl result = pgControlFromBuffer(buffer, pgVersionForce);
bufFree(buffer);
FUNCTION_LOG_RETURN(PG_CONTROL, result);
}

View File

@ -130,6 +130,7 @@ FN_EXTERN bool pgDbIsSystem(const String *name);
FN_EXTERN bool pgDbIsSystemId(unsigned int id);
// Get info from pg_control
FN_EXTERN Buffer *pgControlBufferFromFile(const Storage *storage, const String *pgVersionForce);
FN_EXTERN PgControl pgControlFromFile(const Storage *storage, const String *pgVersionForce);
// Get the control version for a PostgreSQL version

View File

@ -0,0 +1,102 @@
/***********************************************************************************************************************************
CRC-32 Calculation
***********************************************************************************************************************************/
#include "build.auto.h"
#include "postgres/interface/crc32.h"
/**********************************************************************************************************************************/
static const uint32_t crc32_lookup[256] =
{
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D,
};
FN_EXTERN uint32_t
crc32One(const unsigned char *data, size_t size)
{
uint32_t result = 0xffffffff;
while (size--)
result = crc32_lookup[((result >> 24) ^ *data++) & 0xFF] ^ (result << 8);
return result ^ 0xffffffff;
}
/**********************************************************************************************************************************/
static const uint32_t crc32c_lookup[256] =
{
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351,
};
FN_EXTERN uint32_t
crc32cOne(const unsigned char *data, size_t size)
{
uint32_t result = 0xffffffff;
while (size--)
result = crc32c_lookup[(result ^ *data++) & 0xFF] ^ (result >> 8);
return result ^ 0xffffffff;
}

View File

@ -0,0 +1,21 @@
/***********************************************************************************************************************************
CRC-32 Calculation
CRC-32 and CRC-32C calculations required to validate the integrity of pg_control.
***********************************************************************************************************************************/
#ifndef POSTGRES_INTERFACE_CRC32_H
#define POSTGRES_INTERFACE_CRC32_H
#include <inttypes.h>
#include <stddef.h>
/***********************************************************************************************************************************
Functions
***********************************************************************************************************************************/
// Generate CRC-32 checksum (required by <= 9.4)
FN_EXTERN uint32_t crc32One(const unsigned char *data, size_t size);
// Generate CRC-32C checksum (required by >= 9.5)
FN_EXTERN uint32_t crc32cOne(const unsigned char *data, size_t size);
#endif

View File

@ -76,6 +76,22 @@ Read the version specific pg_control into a general data structure
#endif
/***********************************************************************************************************************************
Get control crc offset
***********************************************************************************************************************************/
#if PG_VERSION > PG_VERSION_MAX
#elif PG_VERSION >= PG_VERSION_93
#define PG_INTERFACE_CONTROL_CRC_OFFSET(version) \
static size_t \
pgInterfaceControlCrcOffset##version(void) \
{ \
return offsetof(ControlFileData, crc); \
}
#endif
/***********************************************************************************************************************************
Get the control version
***********************************************************************************************************************************/