1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2024-12-13 11:53:59 +02:00

PGPRO-2065: allow checkdb to be used without BACKUP_DIR

This commit is contained in:
Grigory Smolkin 2019-03-31 11:41:46 +03:00
parent 8964f2aff3
commit 702d13e4a5
3 changed files with 170 additions and 113 deletions

View File

@ -940,7 +940,7 @@ do_block_validation(void)
/* arrays with meta info for multi threaded backup */
pthread_t *threads;
backup_files_arg *threads_args;
bool backup_isok = true;
bool check_isok = true;
pgBackupGetPath(&current, database_path, lengthof(database_path),
DATABASE_DIR);
@ -1017,14 +1017,14 @@ do_block_validation(void)
{
pthread_join(threads[i], NULL);
if (threads_args[i].ret > 0)
backup_isok = false;
check_isok = false;
}
/* TODO write better info message */
if (backup_isok)
elog(INFO, "Data files are checked");
if (check_isok)
elog(INFO, "Data files are valid");
else
elog(ERROR, "Data files checking failed");
elog(ERROR, "Data files are corrupted");
if (backup_files_list)
{
@ -1042,7 +1042,7 @@ do_amcheck(void)
/* arrays with meta info for multi threaded backup */
pthread_t *threads;
backup_files_arg *threads_args;
bool backup_isok = true;
bool check_isok = true;
PGresult *res_db;
int n_databases = 0;
bool first_db_with_amcheck = true;
@ -1118,13 +1118,13 @@ do_amcheck(void)
{
pthread_join(threads[j], NULL);
if (threads_args[j].ret == 1)
backup_isok = false;
check_isok = false;
}
pgut_disconnect(db_conn);
}
/* TODO write better info message */
if (backup_isok)
if (check_isok)
elog(INFO, "Indexes are checked");
else
elog(ERROR, "Indexs checking failed");
@ -1188,6 +1188,17 @@ pgdata_basic_setup(void)
is_checksum_enabled = pg_checksum_enable();
/*
* Ensure that backup directory was initialized for the same PostgreSQL
* instance we opened connection to. And that target backup database PGDATA
* belogns to the same instance.
*/
/* TODO fix it for remote backup */
if (!is_remote_backup)
check_system_identifiers();
if (is_checksum_enabled)
elog(LOG, "This PostgreSQL instance was initialized with data block checksums. "
"Data block corruption will be detected");
@ -1225,10 +1236,6 @@ do_backup(time_t start_time)
current.compress_alg = instance_config.compress_alg;
current.compress_level = instance_config.compress_level;
/* TODO fix it for remote backup*/
if (!is_remote_backup)
current.checksum_version = get_data_checksum_version(true);
current.stream = stream_wal;
is_ptrack_support = pg_ptrack_support();
@ -1261,15 +1268,6 @@ do_backup(time_t start_time)
instance_config.master_user);
}
/*
* Ensure that backup directory was initialized for the same PostgreSQL
* instance we opened connection to. And that target backup database PGDATA
* belogns to the same instance.
*/
/* TODO fix it for remote backup */
if (!is_remote_backup)
check_system_identifiers();
/* Start backup. Update backup status. */
current.status = BACKUP_STATUS_RUNNING;
current.start_time = start_time;
@ -1415,10 +1413,24 @@ check_system_identifiers(void)
system_id_pgdata = get_system_identifier(instance_config.pgdata);
system_id_conn = get_remote_system_identifier(backup_conn);
/* for checkdb check only system_id_pgdata and system_id_conn */
if (current.backup_mode == BACKUP_MODE_INVALID)
{
if (system_id_conn != system_id_pgdata)
{
elog(ERROR, "Data directory initialized with system id " UINT64_FORMAT ", "
"but connected instance system id is " UINT64_FORMAT,
system_id_pgdata, system_id_conn);
}
return;
}
if (system_id_conn != instance_config.system_identifier)
elog(ERROR, "Backup data directory was initialized for system id " UINT64_FORMAT ", "
"but connected instance system id is " UINT64_FORMAT,
instance_config.system_identifier, system_id_conn);
if (system_id_pgdata != instance_config.system_identifier)
elog(ERROR, "Backup data directory was initialized for system id " UINT64_FORMAT ", "
"but target backup directory system id is " UINT64_FORMAT,
@ -2562,7 +2574,7 @@ check_files(void *arg)
elog(VERBOSE, "Checking file: \"%s\" ", file->path);
/* check for interrupt */
if (interrupted)
if (interrupted || thread_interrupted)
elog(ERROR, "interrupted during checkdb");
if (progress)
@ -2579,7 +2591,6 @@ check_files(void *arg)
* If file is not found, this is not en error.
* It could have been deleted by concurrent postgres transaction.
*/
file->write_size = BYTES_INVALID;
elog(LOG, "File \"%s\" is not found", file->path);
continue;
}
@ -2597,7 +2608,7 @@ check_files(void *arg)
if (S_ISREG(buf.st_mode))
{
/* check only uncompressed datafiles */
/* check only uncompressed by cfs datafiles */
if (file->is_datafile && !file->is_cfs)
{
char to_path[MAXPGPATH];
@ -2639,7 +2650,7 @@ check_indexes(void *arg)
continue;
/* check for interrupt */
if (interrupted)
if (interrupted || thread_interrupted)
elog(ERROR, "interrupted during checkdb --amcheck");
elog(VERBOSE, "Checking index number %d of %d : \"%s\" ", i,n_indexes, ind->name);

View File

@ -181,6 +181,7 @@ typedef struct BackupPageHeader
/* Special value for compressed_size field */
#define PageIsTruncated -2
#define SkipCurrentPage -3
#define PageIsCorrupted -4 /* used by checkdb */
/* Verify page's header */
static bool
@ -278,7 +279,7 @@ read_page_from_file(pgFile *file, BlockNumber blknum,
if (pg_checksum_page(page, file->segno * RELSEG_SIZE + blknum)
!= ((PageHeader) page)->pd_checksum)
{
elog(WARNING, "File: %s blknum %u have wrong checksum, try again",
elog(LOG, "File: %s blknum %u have wrong checksum, try again",
file->path, blknum);
return -1;
}
@ -304,6 +305,7 @@ read_page_from_file(pgFile *file, BlockNumber blknum,
* Returns 0 if page was successfully retrieved
* SkipCurrentPage(-3) if we need to skip this page
* PageIsTruncated(-2) if the page was truncated
* PageIsCorrupted(-4) if the page check mismatch
*/
static int32
prepare_page(backup_files_arg *arguments,
@ -311,7 +313,8 @@ prepare_page(backup_files_arg *arguments,
BlockNumber blknum, BlockNumber nblocks,
FILE *in, int *n_skipped,
BackupMode backup_mode,
Page page)
Page page,
bool strict)
{
XLogRecPtr page_lsn = 0;
int try_again = 100;
@ -354,7 +357,7 @@ prepare_page(backup_files_arg *arguments,
*/
//elog(WARNING, "Checksum_Version: %i", current.checksum_version ? 1 : 0);
if (result == -1 && is_ptrack_support)
if (result == -1 && is_ptrack_support && strict)
{
elog(WARNING, "File %s, block %u, try to fetch via SQL",
file->path, blknum);
@ -365,8 +368,24 @@ prepare_page(backup_files_arg *arguments,
* If page is not valid after 100 attempts to read it
* throw an error.
*/
if(!page_is_valid && !is_ptrack_support)
elog(ERROR, "Data file checksum mismatch. Canceling backup");
if (!page_is_valid)
{
elog(WARNING, "CORRUPTION in file %s, block %u",
file->path, blknum);
if (!is_ptrack_support && strict)
elog(ERROR, "Data file corruption. Canceling backup");
}
/* Checkdb not going futher */
if (!strict)
{
if (page_is_valid)
return 0;
else
return PageIsCorrupted;
}
}
if (backup_mode == BACKUP_MODE_DIFF_PTRACK || (!page_is_valid && is_ptrack_support))
@ -507,67 +526,6 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum,
file->write_size += write_buffer_size;
}
bool
check_data_file(backup_files_arg* arguments,
pgFile *file)
{
FILE *in;
BlockNumber blknum = 0;
BlockNumber nblocks = 0;
int n_blocks_skipped = 0;
int page_state;
char curr_page[BLCKSZ];
/* reset size summary */
file->read_size = 0;
file->write_size = 0;
/* open backup mode file for read */
in = fopen(file->path, PG_BINARY_R);
if (in == NULL)
{
FIN_FILE_CRC32(true, file->crc);
/*
* If file is not found, this is not en error.
* It could have been deleted by concurrent postgres transaction.
*/
if (errno == ENOENT)
{
elog(LOG, "File \"%s\" is not found", file->path);
return false;
}
elog(ERROR, "cannot open file \"%s\": %s",
file->path, strerror(errno));
}
if (file->size % BLCKSZ != 0)
{
fclose(in);
elog(ERROR, "File: %s, invalid file size %zu", file->path, file->size);
}
/*
* Compute expected number of blocks in the file.
* NOTE This is a normal situation, if the file size has changed
* since the moment we computed it.
*/
nblocks = file->size/BLCKSZ;
for (blknum = 0; blknum < nblocks; blknum++)
{
page_state = prepare_page(arguments, file, 0, //0 = InvalidXLogRecPtr
blknum, nblocks, in, &n_blocks_skipped,
BACKUP_MODE_FULL, curr_page);
if (page_state == PageIsTruncated)
break;
}
return true;
fclose(in);
}
/*
* Backup data file in the from_root directory to the to_root directory with
* same relative path. If prev_backup_start_lsn is not NULL, only pages with
@ -672,7 +630,7 @@ backup_data_file(backup_files_arg* arguments,
{
page_state = prepare_page(arguments, file, prev_backup_start_lsn,
blknum, nblocks, in, &n_blocks_skipped,
backup_mode, curr_page);
backup_mode, curr_page, true);
compress_and_backup_page(file, blknum, in, out, &(file->crc),
page_state, curr_page, calg, clevel);
n_blocks_read++;
@ -695,7 +653,7 @@ backup_data_file(backup_files_arg* arguments,
{
page_state = prepare_page(arguments, file, prev_backup_start_lsn,
blknum, nblocks, in, &n_blocks_skipped,
backup_mode, curr_page);
backup_mode, curr_page, true);
compress_and_backup_page(file, blknum, in, out, &(file->crc),
page_state, curr_page, calg, clevel);
n_blocks_read++;
@ -1580,10 +1538,7 @@ validate_one_page(Page page, pgFile *file,
/* Verify checksum */
if (checksum_version)
{
/*
* If checksum is wrong, sleep a bit and then try again
* several times. If it didn't help, throw error
*/
/* Checksums are enabled, so check it. */
if (pg_checksum_page(page, file->segno * RELSEG_SIZE + blknum)
== ((PageHeader) page)->pd_checksum)
{
@ -1597,19 +1552,25 @@ validate_one_page(Page page, pgFile *file,
}
else
{
/* Get lsn from page header. Ensure that page is from our time */
lsn = PageXLogRecPtrGet(phdr->pd_lsn);
if (stop_lsn > 0)
{
/* Get lsn from page header. Ensure that page is from our time.
* This is dangerous move, because we cannot be sure that
* lsn from page header is not a garbage.
*/
lsn = PageXLogRecPtrGet(phdr->pd_lsn);
if (lsn > stop_lsn)
elog(WARNING, "File: %s, block %u, checksum is not enabled. "
"Page is from future: pageLSN %X/%X stopLSN %X/%X",
file->path, blknum, (uint32) (lsn >> 32), (uint32) lsn,
(uint32) (stop_lsn >> 32), (uint32) stop_lsn);
else
return PAGE_IS_FOUND_AND_VALID;
if (lsn > stop_lsn)
elog(WARNING, "File: %s, block %u, checksum is not enabled. "
"Page is from future: pageLSN %X/%X stopLSN %X/%X",
file->path, blknum, (uint32) (lsn >> 32), (uint32) lsn,
(uint32) (stop_lsn >> 32), (uint32) stop_lsn);
else
return PAGE_IS_FOUND_AND_VALID;
}
}
if (checksum_is_ok)
if (checksum_is_ok && stop_lsn > 0)
{
/* Get lsn from page header. Ensure that page is from our time */
lsn = PageXLogRecPtrGet(phdr->pd_lsn);
@ -1627,6 +1588,87 @@ validate_one_page(Page page, pgFile *file,
return PAGE_IS_FOUND_AND_NOT_VALID;
}
bool
check_data_file(backup_files_arg* arguments,
pgFile *file)
{
FILE *in;
BlockNumber blknum = 0;
BlockNumber nblocks = 0;
int n_blocks_skipped = 0;
int page_state;
char curr_page[BLCKSZ];
bool is_valid = true;
/* reset size summary */
file->read_size = 0;
file->write_size = 0;
/* open backup mode file for read */
in = fopen(file->path, PG_BINARY_R);
if (in == NULL)
{
/*
* If file is not found, this is not en error.
* It could have been deleted by concurrent postgres transaction.
*/
if (errno == ENOENT)
{
elog(LOG, "File \"%s\" is not found", file->path);
return true;
}
elog(ERROR, "cannot open file \"%s\": %s",
file->path, strerror(errno));
}
if (file->size % BLCKSZ != 0)
{
fclose(in);
elog(ERROR, "File: %s, invalid file size %zu", file->path, file->size);
}
/*
* Compute expected number of blocks in the file.
* NOTE This is a normal situation, if the file size has changed
* since the moment we computed it.
*/
nblocks = file->size/BLCKSZ;
for (blknum = 0; blknum < nblocks; blknum++)
{
page_state = prepare_page(arguments, file, InvalidXLogRecPtr, //0 = InvalidXLogRecPtr
blknum, nblocks, in, &n_blocks_skipped,
BACKUP_MODE_FULL, curr_page, false);
if (page_state == PageIsTruncated)
break;
if (page_state == PageIsCorrupted)
{
/* Page is corrupted */
//elog(WARNING, "File %s, block %u is CORRUPTED.",
// file->path, blknum);
is_valid = false;
continue;
}
/* Page is found and this point, but it may not be 'sane' */
if (validate_one_page(curr_page, file, blknum,
InvalidXLogRecPtr,
0) == PAGE_IS_FOUND_AND_NOT_VALID)
{
/* Page is corrupted */
is_valid = false;
}
}
fclose(in);
return is_valid;
}
/* Valiate pages of datafile in backup one by one */
bool
check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version,

View File

@ -327,14 +327,14 @@ main(int argc, char *argv[])
help_command(command_name);
/* backup_path is required for all pg_probackup commands except help and checkdb */
if (backup_path == NULL && backup_subcmd != CHECKDB_CMD)
if (backup_path == NULL)
{
/*
* If command line argument is not set, try to read BACKUP_PATH
* from environment variable
*/
backup_path = getenv("BACKUP_PATH");
if (backup_path == NULL)
if (backup_path == NULL && backup_subcmd != CHECKDB_CMD)
elog(ERROR, "required parameter not specified: BACKUP_PATH (-B, --backup-path)");
}
@ -353,10 +353,10 @@ main(int argc, char *argv[])
}
/* Option --instance is required for all commands except init and show */
if (backup_subcmd != INIT_CMD && backup_subcmd != SHOW_CMD &&
backup_subcmd != VALIDATE_CMD && backup_subcmd != CHECKDB_CMD)
if (instance_name == NULL)
{
if (instance_name == NULL)
if (backup_subcmd != INIT_CMD && backup_subcmd != SHOW_CMD &&
backup_subcmd != VALIDATE_CMD && backup_subcmd != CHECKDB_CMD)
elog(ERROR, "required parameter not specified: --instance");
}
@ -399,7 +399,11 @@ main(int argc, char *argv[])
{
join_path_components(path, backup_instance_path,
BACKUP_CATALOG_CONF_FILE);
config_read_opt(path, instance_options, ERROR, true, false);
if (backup_subcmd == CHECKDB_CMD)
config_read_opt(path, instance_options, ERROR, true, true);
else
config_read_opt(path, instance_options, ERROR, true, false);
}
}