From fed13c298ec658e8eb05fd905813c7c5352f26cc Mon Sep 17 00:00:00 2001 From: Zhuravlev Uriy aka stalkerg Date: Mon, 29 Feb 2016 20:23:48 +0300 Subject: [PATCH] Add multithread beckup. --- Makefile | 6 +- backup.c | 169 ++++++++++++++++++++++++++------------------ expected/backup.out | 6 ++ pg_arman.c | 2 + pg_arman.h | 2 + sql/backup.sh | 11 +++ sql/restore.sh | 24 +++---- 7 files changed, 138 insertions(+), 82 deletions(-) diff --git a/Makefile b/Makefile index 267b3edf..5609ffa2 100644 --- a/Makefile +++ b/Makefile @@ -33,9 +33,9 @@ DOCS += doc/pg_arman.html doc/README.html endif # XMLTO endif # ASCIIDOC -PG_CPPFLAGS = -I$(libpq_srcdir) -override CPPFLAGS := -DFRONTEND $(CPPFLAGS) -PG_LIBS = $(libpq_pgport) +PG_CPPFLAGS = -I$(libpq_srcdir) ${PTHREAD_CFLAGS} +override CPPFLAGS := -DFRONTEND $(CPPFLAGS) +PG_LIBS = $(libpq_pgport) ${PTHREAD_LIBS} ${PTHREAD_CFLAGS} REGRESS = init option show delete backup restore diff --git a/backup.c b/backup.c index cb3ef97c..84274455 100644 --- a/backup.c +++ b/backup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "libpq/pqsignal.h" #include "pgut/pgut-port.h" @@ -33,12 +34,22 @@ static bool in_backup = false; /* TODO: more robust logic */ /* list of files contained in backup */ parray *backup_files_list; +typedef struct +{ + const char *from_root; + const char *to_root; + parray *files; + parray *prev_files; + const XLogRecPtr *lsn; + unsigned int start_file_idx; + unsigned int end_file_idx; +} backup_files_args; + /* * Backup routines */ static void backup_cleanup(bool fatal, void *userdata); -static void backup_files(const char *from_root, const char *to_root, - parray *files, parray *prev_files, const XLogRecPtr *lsn, const char *prefix); +static void backup_files(void *arg); static parray *do_backup_database(parray *backup_list, pgBackupOption bkupopt); static void confirm_block_size(const char *name, int blcksz); static void pg_start_backup(const char *label, bool smooth, pgBackup *backup); @@ -72,6 +83,8 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt) char prev_file_txt[MAXPGPATH]; /* path of the previous backup * list file */ bool has_backup_label = true; /* flag if backup_label is there */ + pthread_t backup_threads[num_threads]; + backup_files_args *backup_threads_args[num_threads]; /* repack the options */ bool smooth_checkpoint = bkupopt.smooth_checkpoint; @@ -221,7 +234,77 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt) make_pagemap_from_ptrack(backup_files_list); } - backup_files(pgdata, path, backup_files_list, prev_files, lsn, NULL); + /* sort pathname ascending */ + parray_qsort(backup_files_list, pgFileComparePath); + + /* make dirs before backup */ + for (i = 0; i < parray_num(backup_files_list); i++) + { + int ret; + struct stat buf; + pgFile *file = (pgFile *) parray_get(backup_files_list, i); + + ret = stat(file->path, &buf); + if (ret == -1) + { + if (errno == ENOENT) + { + /* record as skipped file in file_xxx.txt */ + file->write_size = BYTES_INVALID; + elog(LOG, "skip"); + continue; + } + else + { + elog(ERROR, + "can't stat backup mode. \"%s\": %s", + file->path, strerror(errno)); + } + } + /* if the entry was a directory, create it in the backup */ + if (S_ISDIR(buf.st_mode)) + { + char dirpath[MAXPGPATH]; + if (verbose) + elog(LOG, "Make dir %s", file->path + strlen(pgdata) + 1); + join_path_components(dirpath, path, JoinPathEnd(file->path, pgdata)); + if (!check) + dir_create_dir(dirpath, DIR_PERMISSION); + } + } + + if (num_threads < 1) + num_threads = 1; + + for (i = 0; i < num_threads; i++) + { + backup_files_args *arg = pg_malloc(sizeof(backup_files_args)); + arg->from_root = pgdata; + arg->to_root = path; + arg->files = backup_files_list; + arg->prev_files = prev_files; + arg->lsn = lsn; + arg->start_file_idx = i * (parray_num(backup_files_list)/num_threads); + if (i == num_threads - 1) + arg->end_file_idx = parray_num(backup_files_list); + else + arg->end_file_idx = (i + 1) * (parray_num(backup_files_list)/num_threads); + + if (verbose) + elog(WARNING, "Start thread for start_file_idx:%i end_file_idx:%i num:%li", + arg->start_file_idx, + arg->end_file_idx, + parray_num(backup_files_list)); + backup_threads_args[i] = arg; + pthread_create(&backup_threads[i], NULL, (void *(*)(void *)) backup_files, arg); + } + + /* Wait theads */ + for (i = 0; i < num_threads; i++) + { + pthread_join(backup_threads[i], NULL); + pg_free(backup_threads_args[i]); + } /* Clear ptrack files after backup */ if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK) @@ -266,8 +349,8 @@ do_backup(pgBackupOption bkupopt) int ret; /* repack the necessary options */ - int keep_data_generations = bkupopt.keep_data_generations; - int keep_data_days = bkupopt.keep_data_days; + int keep_data_generations = bkupopt.keep_data_generations; + int keep_data_days = bkupopt.keep_data_days; /* PGDATA and BACKUP_MODE are always required */ if (pgdata == NULL) @@ -656,28 +739,22 @@ backup_cleanup(bool fatal, void *userdata) * Take differential backup at page level. */ static void -backup_files(const char *from_root, - const char *to_root, - parray *files, - parray *prev_files, - const XLogRecPtr *lsn, - const char *prefix) +backup_files(void *arg) { int i; struct timeval tv; - /* sort pathname ascending */ - parray_qsort(files, pgFileComparePath); + backup_files_args *arguments = (backup_files_args *) arg; gettimeofday(&tv, NULL); /* backup a file or create a directory */ - for (i = 0; i < parray_num(files); i++) + for (i = arguments->start_file_idx; i < arguments->end_file_idx; i++) { int ret; struct stat buf; - pgFile *file = (pgFile *) parray_get(files, i); + pgFile *file = (pgFile *) parray_get(arguments->files, i); /* If current time is rewinded, abort this backup. */ if (tv.tv_sec < file->mtime) @@ -690,18 +767,8 @@ backup_files(const char *from_root, /* print progress in verbose mode */ if (verbose) - { - if (prefix) - { - char path[MAXPGPATH]; - join_path_components(path, prefix, file->path + strlen(from_root) + 1); - elog(LOG, "(%d/%lu) %s", i + 1, - (unsigned long) parray_num(files), path); - } - else - elog(LOG, "(%d/%lu) %s", i + 1, (unsigned long) parray_num(files), - file->path + strlen(from_root) + 1); - } + elog(LOG, "(%d/%lu) %s", i + 1, (unsigned long) parray_num(arguments->files), + file->path + strlen(arguments->from_root) + 1); /* stat file to get file type, size and modify timestamp */ ret = stat(file->path, &buf); @@ -722,52 +789,20 @@ backup_files(const char *from_root, } } - /* if the entry was a directory, create it in the backup */ + /* skip dir because make before */ if (S_ISDIR(buf.st_mode)) { - char dirpath[MAXPGPATH]; - - join_path_components(dirpath, to_root, JoinPathEnd(file->path, from_root)); - if (!check) - dir_create_dir(dirpath, DIR_PERMISSION); - elog(LOG, "directory"); + continue; } else if (S_ISREG(buf.st_mode)) { /* skip files which have not been modified since last backup */ - if (prev_files) + if (arguments->prev_files) { pgFile *prev_file = NULL; - - /* - * If prefix is not NULL, the table space is backup from the snapshot. - * Therefore, adjust file name to correspond to the file list. - */ - if (prefix) - { - int j; - - for (j = 0; j < parray_num(prev_files); j++) - { - pgFile *p = (pgFile *) parray_get(prev_files, j); - char *prev_path; - char curr_path[MAXPGPATH]; - - prev_path = p->path + strlen(from_root) + 1; - join_path_components(curr_path, prefix, file->path + strlen(from_root) + 1); - if (strcmp(curr_path, prev_path) == 0) - { - prev_file = p; - break; - } - } - } - else - { - pgFile **p = (pgFile **) parray_bsearch(prev_files, file, pgFileComparePath); - if (p) - prev_file = *p; - } + pgFile **p = (pgFile **) parray_bsearch(arguments->prev_files, file, pgFileComparePath); + if (p) + prev_file = *p; if (prev_file && prev_file->mtime == file->mtime) { @@ -797,8 +832,8 @@ backup_files(const char *from_root, /* copy the file into backup */ if (!(file->is_datafile - ? backup_data_file(from_root, to_root, file, lsn) - : copy_file(from_root, to_root, file))) + ? backup_data_file(arguments->from_root, arguments->to_root, file, arguments->lsn) + : copy_file(arguments->from_root, arguments->to_root, file))) { /* record as skipped file in file_xxx.txt */ file->write_size = BYTES_INVALID; diff --git a/expected/backup.out b/expected/backup.out index 7bec0c7b..9770dd16 100644 --- a/expected/backup.out +++ b/expected/backup.out @@ -38,3 +38,9 @@ page-level backup without validated full backup 0 2 6 +###### BACKUP COMMAND TEST-0007 ###### +###### ptrack multi thread backup mode ###### +0 +0 +2 +6 diff --git a/pg_arman.c b/pg_arman.c index 89a93de0..0d9f0c98 100644 --- a/pg_arman.c +++ b/pg_arman.c @@ -33,6 +33,7 @@ pgBackup current; static bool smooth_checkpoint; static int keep_data_generations = KEEP_INFINITE; static int keep_data_days = KEEP_INFINITE; +int num_threads = 1; static bool backup_validate = false; /* restore configuration */ @@ -55,6 +56,7 @@ static pgut_option options[] = { 's', 'B', "backup-path", &backup_path, SOURCE_ENV }, /* common options */ { 'b', 'c', "check", &check }, + { 'i', 'j', "check", &num_threads }, /* backup options */ { 'f', 'b', "backup-mode", opt_backup_mode, SOURCE_ENV }, { 'b', 'C', "smooth-checkpoint", &smooth_checkpoint, SOURCE_ENV }, diff --git a/pg_arman.h b/pg_arman.h index 2a2fac1c..2133482d 100644 --- a/pg_arman.h +++ b/pg_arman.h @@ -202,6 +202,8 @@ extern const char *pgdata_exclude[]; /* backup file list from non-snapshot */ extern parray *backup_files_list; +extern int num_threads; + /* in backup.c */ extern int do_backup(pgBackupOption bkupopt); extern BackupMode parse_backup_mode(const char *value); diff --git a/sql/backup.sh b/sql/backup.sh index 2efc70e5..e9fa6ef2 100644 --- a/sql/backup.sh +++ b/sql/backup.sh @@ -85,6 +85,17 @@ pg_arman show -B ${BACKUP_PATH} > ${TEST_BASE}/TEST-0006.log 2>&1 grep -c OK ${TEST_BASE}/TEST-0006.log grep OK ${TEST_BASE}/TEST-0006.log | sed -e 's@[^-]@@g' | wc -c | sed 's/^ *//' +echo '###### BACKUP COMMAND TEST-0007 ######' +echo '###### ptrack multi thread backup mode ######' +init_catalog +pg_arman backup -B ${BACKUP_PATH} -b full -j 4 -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0007-run.log 2>&1;echo $? +pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0007-run.log 2>&1 +pg_arman backup -B ${BACKUP_PATH} -b ptrack -j 4 -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0007-run.log 2>&1;echo $? +pg_arman validate -B ${BACKUP_PATH} >> ${TEST_BASE}/TEST-0007-run.log 2>&1 +pg_arman show -B ${BACKUP_PATH} > ${TEST_BASE}/TEST-0007.log 2>&1 +grep -c OK ${TEST_BASE}/TEST-0007.log +grep OK ${TEST_BASE}/TEST-0007.log | sed -e 's@[^-]@@g' | wc -c | sed 's/^ *//' + # cleanup ## clean up the temporal test data pg_ctl stop -m immediate -D ${PGDATA_PATH} > /dev/null 2>&1 diff --git a/sql/restore.sh b/sql/restore.sh index d08ae893..5f775adb 100644 --- a/sql/restore.sh +++ b/sql/restore.sh @@ -30,10 +30,10 @@ init_backup pgbench_objs 0001 pgbench -p ${TEST_PGPORT} -d pgbench > /dev/null 2>&1 psql --no-psqlrc -p ${TEST_PGPORT} -d pgbench -c "SELECT * FROM pgbench_branches;" > ${TEST_BASE}/TEST-0001-before.out -pg_arman backup -B ${BACKUP_PATH} -b full -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0001-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b full -j 4 -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0001-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0001-run.out 2>&1 pg_ctl stop -m immediate > /dev/null 2>&1 -pg_arman restore -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0001-run.out 2>&1;echo $? +pg_arman restore -B ${BACKUP_PATH} -j 4 --verbose >> ${TEST_BASE}/TEST-0001-run.out 2>&1;echo $? pg_ctl start -w -t 600 > /dev/null 2>&1 psql --no-psqlrc -p ${TEST_PGPORT} -d pgbench -c "SELECT * FROM pgbench_branches;" > ${TEST_BASE}/TEST-0001-after.out diff ${TEST_BASE}/TEST-0001-before.out ${TEST_BASE}/TEST-0001-after.out @@ -43,7 +43,7 @@ echo '###### RESTORE COMMAND TEST-0002 ######' echo '###### recovery to latest from full + page backups ######' init_backup pgbench_objs 0002 -pg_arman backup -B ${BACKUP_PATH} -b full -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0002-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b full -j 4 -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0002-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0002-run.out 2>&1 pgbench -p ${TEST_PGPORT} -d pgbench > /dev/null 2>&1 pg_arman backup -B ${BACKUP_PATH} -b page -p ${TEST_PGPORT} -d postgres --verbose >> ${TEST_BASE}/TEST-0002-run.out 2>&1;echo $? @@ -61,7 +61,7 @@ echo '###### recovery to target timeline ######' init_backup pgbench_objs 0003 psql --no-psqlrc -p ${TEST_PGPORT} -d pgbench -c "SELECT * FROM pgbench_branches;" > ${TEST_BASE}/TEST-0003-before.out -pg_arman backup -B ${BACKUP_PATH} -b full -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0003-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b full -j 4 -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0003-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0003-run.out 2>&1 TARGET_TLI=`pg_controldata | grep " TimeLineID:" | awk '{print $4}'` pg_ctl stop -m immediate > /dev/null 2>&1 @@ -90,7 +90,7 @@ init_backup pgbench_objs 0004 pgbench -p ${TEST_PGPORT} -d pgbench > /dev/null 2>&1 psql --no-psqlrc -p ${TEST_PGPORT} -d pgbench -c "SELECT * FROM pgbench_branches;" > ${TEST_BASE}/TEST-0004-before.out -pg_arman backup -B ${BACKUP_PATH} -b full -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0004-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b full -j 4 -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0004-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0004-run.out 2>&1 TARGET_TIME=`date +"%Y-%m-%d %H:%M:%S"` pgbench -p ${TEST_PGPORT} -d pgbench > /dev/null 2>&1 @@ -106,7 +106,7 @@ echo '###### recovery to target XID ######' init_backup pgbench_objs 0005 psql --no-psqlrc -p ${TEST_PGPORT} -d pgbench -c "CREATE TABLE tbl0005 (a text);" > /dev/null 2>&1 -pg_arman backup -B ${BACKUP_PATH} -b full -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0005-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b full -j 4 -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0005-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0005-run.out 2>&1 pgbench -p ${TEST_PGPORT} pgbench > /dev/null 2>&1 psql --no-psqlrc -p ${TEST_PGPORT} -d pgbench -c "SELECT * FROM pgbench_branches;" > ${TEST_BASE}/TEST-0005-before.out @@ -136,10 +136,10 @@ echo '###### RESTORE COMMAND TEST-0006 ######' echo '###### recovery to latest from full + ptrack backups ######' init_backup pgbench_objs 0006 -pg_arman backup -B ${BACKUP_PATH} -b full -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0006-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b full -j 4 -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0006-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0006-run.out 2>&1 pgbench -p ${TEST_PGPORT} -d pgbench > /dev/null 2>&1 -pg_arman backup -B ${BACKUP_PATH} -b ptrack -p ${TEST_PGPORT} -d postgres --verbose >> ${TEST_BASE}/TEST-0006-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b ptrack -j 4 -p ${TEST_PGPORT} -d postgres --verbose >> ${TEST_BASE}/TEST-0006-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0006-run.out 2>&1 psql --no-psqlrc -p ${TEST_PGPORT} -d pgbench -c "SELECT * FROM pgbench_branches;" > ${TEST_BASE}/TEST-0006-before.out pg_ctl stop -m immediate > /dev/null 2>&1 @@ -153,13 +153,13 @@ echo '###### RESTORE COMMAND TEST-0007 ######' echo '###### recovery to latest from full + ptrack + ptrack backups ######' init_backup pgbench_objs 0007 -pg_arman backup -B ${BACKUP_PATH} -b full -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0007-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b full -j 4 -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0007-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0007-run.out 2>&1 pgbench -p ${TEST_PGPORT} -d pgbench > /dev/null 2>&1 -pg_arman backup -B ${BACKUP_PATH} -b ptrack -p ${TEST_PGPORT} -d postgres --verbose >> ${TEST_BASE}/TEST-0007-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b ptrack -j 4 -p ${TEST_PGPORT} -d postgres --verbose >> ${TEST_BASE}/TEST-0007-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0007-run.out 2>&1 pgbench -p ${TEST_PGPORT} -d pgbench > /dev/null 2>&1 -pg_arman backup -B ${BACKUP_PATH} -b ptrack -p ${TEST_PGPORT} -d postgres --verbose >> ${TEST_BASE}/TEST-0007-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b ptrack -j 4 -p ${TEST_PGPORT} -d postgres --verbose >> ${TEST_BASE}/TEST-0007-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0007-run.out 2>&1 pg_arman show -B ${BACKUP_PATH} > ${TEST_BASE}/TEST-0007-show.out 2>&1 psql --no-psqlrc -p ${TEST_PGPORT} -d pgbench -c "SELECT * FROM pgbench_branches;" > ${TEST_BASE}/TEST-0007-before.out @@ -175,7 +175,7 @@ echo '###### recovery with target inclusive false ######' init_backup pgbench_objs 0008 psql --no-psqlrc -p ${TEST_PGPORT} -d pgbench -c "CREATE TABLE tbl0008 (a text);" > /dev/null 2>&1 -pg_arman backup -B ${BACKUP_PATH} -b full -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0008-run.out 2>&1;echo $? +pg_arman backup -B ${BACKUP_PATH} -b full -j 4 -p ${TEST_PGPORT} -d postgres --verbose > ${TEST_BASE}/TEST-0008-run.out 2>&1;echo $? pg_arman validate -B ${BACKUP_PATH} --verbose >> ${TEST_BASE}/TEST-0003-run.out 2>&1 pgbench -p ${TEST_PGPORT} pgbench > /dev/null 2>&1 psql --no-psqlrc -p ${TEST_PGPORT} -d pgbench -c "SELECT * FROM pgbench_branches;" > ${TEST_BASE}/TEST-0008-before.out