1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2024-11-25 09:01:48 +02:00

Merge branch 'master' into issue_146

This commit is contained in:
Grigory Smolkin 2020-04-18 16:40:38 +03:00
commit 1e2491eb80
44 changed files with 6191 additions and 1701 deletions

11
.gitignore vendored
View File

@ -47,3 +47,14 @@
# Doc files
/doc/*html
# Docker files
/docker-compose.yml
/Dockerfile
/Dockerfile.in
/run_tests.sh
/make_dockerfile.sh
/backup_restore.sh
# Misc
.python-version

View File

@ -1,7 +1,47 @@
sudo: required
os: linux
dist: bionic
language: c
services:
- docker
- docker
before_install:
- cp travis/* .
install:
- ./make_dockerfile.sh
- docker-compose build
script:
- docker run -v $(pwd):/tests --rm centos:7 /tests/travis/backup_restore.sh
- docker-compose run tests
# - docker-compose run $(bash <(curl -s https://codecov.io/env)) tests
# - docker run -v $(pwd):/tests --rm centos:7 /tests/travis/backup_restore.sh
notifications:
email:
on_success: change
on_failure: always
# Default MODE is basic, i.e. all tests with PG_PROBACKUP_TEST_BASIC=ON
env:
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=archive
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=backup
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=compression
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=delta
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=locking
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=merge
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=page
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=replica
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=retention
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=restore
- PG_VERSION=11 PG_BRANCH=REL_11_STABLE
- PG_VERSION=10 PG_BRANCH=REL_10_STABLE
- PG_VERSION=9.6 PG_BRANCH=REL9_6_STABLE
- PG_VERSION=9.5 PG_BRANCH=REL9_5_STABLE
jobs:
allow_failures:
- if: env(MODE) IN (archive, backup, delta, locking, merge, replica, retention, restore)

View File

@ -15,9 +15,9 @@ OBJS += src/pg_crc.o src/datapagemap.o src/receivelog.o src/streamutil.o \
EXTRA_CLEAN = src/pg_crc.c src/datapagemap.c src/datapagemap.h \
src/receivelog.c src/receivelog.h src/streamutil.c src/streamutil.h \
src/xlogreader.c
src/xlogreader.c src/instr_time.h
INCLUDES = src/datapagemap.h src/streamutil.h src/receivelog.h
INCLUDES = src/datapagemap.h src/streamutil.h src/receivelog.h src/instr_time.h
ifdef USE_PGXS
PG_CONFIG = pg_config
@ -60,6 +60,8 @@ all: checksrcdir $(INCLUDES);
$(PROGRAM): $(OBJS)
src/instr_time.h: $(top_srcdir)/src/include/portability/instr_time.h
rm -f $@ && $(LN_S) $(srchome)/src/include/portability/instr_time.h $@
src/datapagemap.c: $(top_srcdir)/src/bin/pg_rewind/datapagemap.c
rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_rewind/datapagemap.c $@
src/datapagemap.h: $(top_srcdir)/src/bin/pg_rewind/datapagemap.h

View File

@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.com/postgrespro/pg_probackup.svg?branch=master)](https://travis-ci.com/postgrespro/pg_probackup)
# pg_probackup
`pg_probackup` is a utility to manage backup and recovery of PostgreSQL database clusters. It is designed to perform periodic backups of the PostgreSQL instance that enable you to restore the server in case of a failure.
@ -7,6 +9,7 @@ The utility is compatible with:
As compared to other backup solutions, `pg_probackup` offers the following benefits that can help you implement different backup strategies and deal with large amounts of data:
* Incremental backup: page-level incremental backup allows you to save disk space, speed up backup and restore. With three different incremental modes, you can plan the backup strategy in accordance with your data flow.
* Merge: using this feature allows you to implement "incrementally updated backups" strategy, eliminating the need to to do periodical full backups.
* Validation: automatic data consistency checks and on-demand backup validation without actual data recovery
* Verification: on-demand verification of PostgreSQL instance with the `checkdb` command.
* Retention: managing WAL archive and backups in accordance with retention policy. You can configure retention policy based on recovery time or the number of backups to keep, as well as specify `time to live` (TTL) for a particular backup. Expired backups can be merged or deleted.
@ -37,8 +40,9 @@ Regardless of the chosen backup type, all backups taken with `pg_probackup` supp
`PTRACK` backup support provided via following options:
* vanilla PostgreSQL compiled with ptrack patch. Currently there are patches for [PostgreSQL 9.6](https://gist.githubusercontent.com/gsmol/5b615c971dfd461c76ef41a118ff4d97/raw/e471251983f14e980041f43bea7709b8246f4178/ptrack_9.6.6_v1.5.patch) and [PostgreSQL 10](https://gist.githubusercontent.com/gsmol/be8ee2a132b88463821021fd910d960e/raw/de24f9499f4f314a4a3e5fae5ed4edb945964df8/ptrack_10.1_v1.5.patch)
* Postgres Pro Standard 9.6, 10, 11
* Postgres Pro Enterprise 9.6, 10, 11
* vanilla PostgreSQL 12 with [ptrack extension](https://github.com/postgrespro/ptrack)
* Postgres Pro Standard 9.6, 10, 11, 12
* Postgres Pro Enterprise 9.6, 10, 11, 12
## Limitations

View File

@ -131,7 +131,6 @@ doc/src/sgml/pgprobackup.sgml
<arg choice="plain"><option>archive-push</option></arg>
<arg choice="plain"><option>-B</option> <replaceable>backup_dir</replaceable></arg>
<arg choice="plain"><option>--instance</option> <replaceable>instance_name</replaceable></arg>
<arg choice="plain"><option>--wal-file-path</option> <replaceable>wal_file_path</replaceable></arg>
<arg choice="plain"><option>--wal-file-name</option> <replaceable>wal_file_name</replaceable></arg>
<arg rep="repeat"><replaceable>option</replaceable></arg>
</cmdsynopsis>
@ -427,14 +426,6 @@ doc/src/sgml/pgprobackup.sgml
or <application>libc</application>/<application>libicu</application> versions.
</para>
</listitem>
<listitem>
<para>
All backups in the incremental chain must belong to the same
timeline. For example, if you have taken incremental backups on a
standby server that gets promoted, you have to take another FULL
backup.
</para>
</listitem>
</itemizedlist>
</para>
</refsect2>
@ -754,9 +745,10 @@ ALTER ROLE backup WITH REPLICATION;
<title>Setting up Continuous WAL Archiving</title>
<para>
Making backups in PAGE backup mode, performing
<link linkend="pbk-performing-point-in-time-pitr-recovery">PITR</link>
and making backups with
<link linkend="pbk-archive-mode">ARCHIVE</link> WAL delivery mode
<link linkend="pbk-performing-point-in-time-pitr-recovery">PITR</link>,
making backups with
<link linkend="pbk-archive-mode">ARCHIVE</link> WAL delivery mode and
running incremental backup after timeline switch
require
<ulink url="https://postgrespro.com/docs/postgresql/current/continuous-archiving.html">continuous
WAL archiving</ulink> to be enabled. To set up continuous
@ -786,7 +778,7 @@ ALTER ROLE backup WITH REPLICATION;
parameter, as follows:
</para>
<programlisting>
archive_command = '<replaceable>install_dir</replaceable>/pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --wal-file-path=%p --wal-file-name=%f [<replaceable>remote_options</replaceable>]'
archive_command = '<replaceable>install_dir</replaceable>/pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --wal-file-name=%f [<replaceable>remote_options</replaceable>]'
</programlisting>
</listitem>
</itemizedlist>
@ -1483,7 +1475,7 @@ pg_probackup checkdb [-B <replaceable>backup_dir</replaceable> [--instance <repl
enough to specify the backup instance of this cluster for
<application>pg_probackup</application> to determine the required
connection options. However, if <literal>-B</literal> and
<literal>--instance</literal> options are ommitted, you have to provide
<literal>--instance</literal> options are omitted, you have to provide
<link linkend="pbk-connection-opts">connection options</link> and
<replaceable>data_dir</replaceable> via environment
variables or command-line options.
@ -2247,7 +2239,7 @@ BACKUP INSTANCE 'node'
<para>
<literal>MERGED</literal> — the backup data files were
successfully merged, but its metadata is in the process
of been updated. Only full backup can have this status.
of being updated. Only full backups can have this status.
</para>
</listitem>
<listitem>
@ -2372,7 +2364,8 @@ primary_conninfo = 'user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmod
<listitem>
<para>
<literal>expire-time</literal> — the point in time
when a pinned backup can be removed by retention purge.
when a pinned backup can be removed in accordance with retention
policy. This attribute is only available for pinned backups.
</para>
</listitem>
<listitem>
@ -2816,17 +2809,19 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> [--instance <replacea
<refsect2 id="pbk-configuring-retention-policy">
<title>Configuring Retention Policy</title>
<para>
With <application>pg_probackup</application>, you can set retention policies for backups
and WAL archive. All policies can be combined together in any
way.
With <application>pg_probackup</application>, you can configure
retention policy to remove redundant backups, clean up unneeded
WAL files, as well as pin specific backups to ensure they are
kept for the specified time, as explained in the sections below.
All these actions can be combined together in any way.
</para>
<refsect3 id="pbk-retention-policy">
<title>Backup Retention Policy</title>
<title>Removing Redundant Backups</title>
<para>
By default, all backup copies created with <application>pg_probackup</application> are
stored in the specified backup catalog. To save disk space,
you can configure retention policy and periodically clean up
redundant backup copies accordingly.
you can configure retention policy to remove redundant backup copies.
</para>
<para>
To configure retention policy, set one or more of the
@ -2849,56 +2844,51 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> [--instance <replacea
<emphasis role="strong">the number of days</emphasis> from the
current moment. For example, if
<literal>retention-window=7</literal>, <application>pg_probackup</application> must
delete all backup copies that are older than seven days, with
all the corresponding WAL files.
keep at least one backup copy that is older than seven days, with
all the corresponding WAL files, and all the backups that follow.
</para>
<para>
If both <option>--retention-redundancy</option> and
<option>--retention-window</option> options are set,
<application>pg_probackup</application> keeps backup copies that satisfy at least one
condition. For example, if you set
<literal>--retention-redundancy=2</literal> and
<literal>--retention-window=7</literal>, <application>pg_probackup</application> purges
the backup catalog to keep only two full backup copies and all
backups that are newer than seven days:
<option>--retention-window</option> options are set, both these
conditions have to be taken into account when purging the backup
catalog. For example, if you set <literal>--retention-redundancy=2</literal>
and <literal>--retention-window=7</literal>,
<application>pg_probackup</application> has to keep two full backup
copies, as well as all the backups required to ensure recoverability
for the last seven days:
</para>
<programlisting>
pg_probackup set-config -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --retention-redundancy=2 --retention-window=7
</programlisting>
<para>
To clean up the backup catalog in accordance with retention
policy, run:
To clean up the backup catalog in accordance with retention policy,
you have to run the <xref linkend="pbk-delete"/> command with
<link linkend="pbk-retention-opts">retention flags</link>, as shown
below, or use the <xref linkend="pbk-backup"/> command with
these flags to process the outdated backup copies right when the new
backup is created.
</para>
<para>
For example, to remove all backup copies that no longer satisfy the
defined retention policy, run the following command with the
<literal>--delete-expired</literal> flag:
</para>
<programlisting>
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --delete-expired
</programlisting>
<para>
<application>pg_probackup</application> deletes all backup copies that do not conform to
the defined retention policy.
</para>
<para>
If you would like to also remove the WAL files that are no
longer required for any of the backups, add the
longer required for any of the backups, you should also specify the
<option>--delete-wal</option> flag:
</para>
<programlisting>
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --delete-expired --delete-wal
</programlisting>
<note>
<para>
Alternatively, you can use the
<option>--delete-expired</option>,
<option>--merge-expired</option>,
<option>--delete-wal</option> flags and the
<option>--retention-window</option> and
<option>--retention-redundancy</option> options together
with the <xref linkend="pbk-backup"/> command to
remove and merge the outdated backup copies once the new
backup is created.
</para>
</note>
<para>
You can set or override the current retention policy by
You can also set or override the current retention policy by
specifying <option>--retention-redundancy</option> and
<option>--retention-window</option> options directly when
running <command>delete</command> or <command>backup</command>
@ -2919,6 +2909,7 @@ pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replace
<xref linkend="pbk-backup"/> or
<xref linkend="pbk-delete"/> commands.
</para>
<para>
Suppose you have backed up the <replaceable>node</replaceable>
instance in the <replaceable>backup_dir</replaceable> directory,
@ -2971,9 +2962,10 @@ BACKUP INSTANCE 'node'
The <literal>Time</literal> field for the merged backup displays the time
required for the merge.
</para>
</refsect3>
<refsect3 id="pbk-backup-pinning">
<title>Backup Pinning</title>
<title>Pinning Backups</title>
<para>
If you need to keep certain backups longer than the
established retention policy allows, you can pin them
@ -3012,8 +3004,8 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> --instance <replaceab
</programlisting>
</para>
<para>
If the backup is pinned, the <literal>expire-time</literal>
attribute displays its expiration time:
If the backup is pinned, it has the <literal>expire-time</literal>
attribute that displays its expiration time:
<programlisting>
...
recovery-time = '2017-05-16 12:57:31'
@ -3023,34 +3015,65 @@ data-bytes = 22288792
</programlisting>
</para>
<para>
Only pinned backups have the <literal>expire-time</literal>
attribute in the backup metadata.
</para>
<note>
<para>
A pinned incremental backup implicitly pins all
its parent backups.
</para>
</note>
<para>
You can unpin the backup by setting the
<option>--ttl</option> option to zero using the
<xref linkend="pbk-set-backup"/> command. For example:
You can unpin the backup by setting the <option>--ttl</option> option to zero:
</para>
<programlisting>
pg_probackup set-backup -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> -i <replaceable>backup_id</replaceable> --ttl=0
</programlisting>
<note>
<para>
A pinned incremental backup implicitly pins all
its parent backups. If you unpin such a backup later,
its implicitly pinned parents will also be automatically unpinned.
</para>
</note>
</refsect3>
<refsect3 id="pbk-wal-archive-retention-policy">
<title>WAL Archive Retention Policy</title>
<title>Configuring WAL Archive Retention Policy</title>
<para>
By default, <application>pg_probackup</application> purges
only redundant WAL segments that cannot be applied to any of the
backups in the backup catalog. To save disk space,
you can configure WAL archive retention policy, which allows to
keep WAL of limited depth measured in backups per timeline.
When <link linkend="pbk-setting-up-continuous-wal-archiving">continuous
WAL archiving</link> is enabled, archived WAL segments can take a lot
of disk space. Even if you delete old backup copies from time to time,
the <literal>--delete-wal</literal> flag can
purge only those WAL segments that do not apply to any of the
remaining backups in the backup catalog. However, if point-in-time
recovery is critical only for the most recent backups, you can
configure WAL archive retention policy to keep WAL archive of
limited depth and win back some more disk space.
</para>
<para>
To configure WAL archive retention policy, you have to run the
<xref linkend="pbk-set-config"/> command with the
<literal>--wal-depth</literal> option that specifies the number
of backups that can be used for PITR.
This setting applies to all the timelines, so you should be able to perform
PITR for the same number of backups on each timeline, if available.
<link linkend="pbk-backup-pinning">Pinned backups</link> are
not included into this count: if one of the latest backups
is pinned, <application>pg_probackup</application> ensures that
PITR is possible for one extra backup.
</para>
<para>
To remove WAL segments that do not satisfy the defined WAL archive
retention policy, you simply have to run the <xref linkend="pbk-delete"/>
or <xref linkend="pbk-backup"/> command with the <literal>--delete-wal</literal>
flag. For archive backups, WAL segments between <literal>Start LSN</literal>
and <literal>Stop LSN</literal> are always kept intact, so such backups
remain valid regardless of the <literal>--wal-depth</literal> setting
and can still be restored, if required.
</para>
<para>
You can also use the <option>--wal-depth</option> option
with the <xref linkend="pbk-delete"/> and <xref linkend="pbk-backup"/>
commands to override the previously defined WAL archive retention
policy and purge old WAL segments on the fly.
</para>
<para>
Suppose you have backed up the <literal>node</literal>
instance in the <replaceable>backup_dir</replaceable> directory and
@ -3104,8 +3127,8 @@ ARCHIVE INSTANCE 'node'
</programlisting>
<para>
If you would like, for example, to keep only those WAL
segments that can be applied to the last valid backup, use the
<option>--wal-depth</option> option:
segments that can be applied to the latest valid backup, set the
<option>--wal-depth</option> option to 1:
</para>
<programlisting>
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance node --delete-wal --wal-depth=1
@ -3131,12 +3154,6 @@ ARCHIVE INSTANCE 'node'
===============================================================================================================================
1 0 0/0 000000010000000000000048 000000010000000000000049 1 72kB 228.00 7 OK
</programlisting>
<note>
<para>
<link linkend="pbk-backup-pinning">Pinned backups</link> are
ignored for the purpose of WAL Archive Retention Policy fulfilment.
</para>
</note>
</refsect3>
</refsect2>
<refsect2 id="pbk-merging-backups">
@ -3152,16 +3169,16 @@ ARCHIVE INSTANCE 'node'
pg_probackup merge -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> -i <replaceable>backup_id</replaceable>
</programlisting>
<para>
This command merges the specified incremental backup to its
parent full backup, together with all incremental backups
between them. If the specified backup ID belong to the full backup,
then it will be merged with the closest incremental backup.
Once the merge is complete, the incremental
backups are removed as redundant. Thus, the merge operation is
virtually equivalent to retaking a full backup and removing all
the outdated backups, but it allows to save much time,
especially for large data volumes, as well as I/O and network traffic
if you are using <application>pg_probackup</application> in the
This command merges backups that belong to a common incremental backup
chain. If you specify a full backup, it will be merged with its first
incremental backup. If you specify an incremental backup, it will be
merged to its parent full backup, together with all incremental backups
between them. Once the merge is complete, the full backup takes in all
the merged data, and the incremental backups are removed as redundant.
Thus, the merge operation is virtually equivalent to retaking a full
backup and removing all the outdated backups, but it allows to save much
time, especially for large data volumes, as well as I/O and network
traffic if you are using <application>pg_probackup</application> in the
<link linkend="pbk-remote-backup">remote</link> mode.
</para>
<para>
@ -3175,8 +3192,10 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> --instance <replaceab
</programlisting>
<para>
If the merge is still in progress, the backup status is
displayed as <literal>MERGING</literal> or, at the final stage,
<literal>MERGED</literal>. The merge is idempotent, so you can
displayed as <literal>MERGING</literal>. For full backups,
it can also be shown as <literal>MERGED</literal> while the
metadata is being updated at the final stage of the merge.
The merge is idempotent, so you can
restart the merge if it was interrupted.
</para>
</refsect2>
@ -3238,6 +3257,17 @@ pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replace
all the available backups according to the current retention
policy, without performing any irreversible actions.
</para>
<para>
To delete all backups with specific status, use the <option>--status</option>:
</para>
<programlisting>
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --status=ERROR
</programlisting>
<para>
Deleting backups by status ignores established retention policies.
</para>
</refsect2>
</refsect1>
@ -3574,9 +3604,11 @@ pg_probackup backup -B <replaceable>backup_dir</replaceable> -b <replaceable>bac
<listitem>
<para>
Do not sync backed up files to disk. You can use this flag to speed
up backup process. Using this flag can result in data
up the backup process. Using this flag can result in data
corruption in case of operating system or hardware crash.
Corruption can be detected by backup validation.
If you use this option, it is recommended to run the
<xref linkend="pbk-validate"/> command once the backup is complete
to detect possible issues.
</para>
</listitem>
</varlistentry>
@ -3618,8 +3650,8 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
[-R | --restore-as-replica] [--no-validate] [--skip-block-validation]
[--force] [--no-sync]
[--restore-command=<replaceable>cmdline</replaceable>]
[--restore-command=<replaceable>cmdline</replaceable>]
[--primary-conninfo=<replaceable>primary_conninfo</replaceable>]
[-S | --primary-slot-name=<replaceable>slot_name</replaceable>]
[<replaceable>recovery_target_options</replaceable>] [<replaceable>logging_options</replaceable>] [<replaceable>remote_options</replaceable>]
[<replaceable>partial_restore_options</replaceable>] [<replaceable>remote_wal_archive_options</replaceable>]
</programlisting>
@ -3664,7 +3696,7 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
Sets the
<ulink url="https://postgrespro.com/docs/postgresql/current/runtime-config-replication.html#GUC-PRIMARY-CONNINFO">primary_conninfo</ulink>
parameter to the specified value.
This option will be ignored unless the <option>-R</option> flag if specified.
This option will be ignored unless the <option>-R</option> flag is specified.
</para>
<para>
Example: <literal>--primary-conninfo='host=192.168.1.50 port=5432 user=foo password=foopass'</literal>
@ -3672,6 +3704,19 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
</listitem>
</varlistentry>
<varlistentry>
<term><option>-S</option></term>
<term><option>--primary-slot-name=<replaceable>slot_name</replaceable></option></term>
<listitem>
<para>
Sets the
<ulink url="https://postgrespro.com/docs/postgresql/current/runtime-config-replication.html#GUC-PRIMARY-SLOT-NAME">primary_slot_name</ulink>
parameter to the specified value.
This option will be ignored unless the <option>-R</option> flag is specified.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-T <replaceable>OLDDIR</replaceable>=<replaceable>NEWDIR</replaceable></option></term>
<term><option>--tablespace-mapping=<replaceable>OLDDIR</replaceable>=<replaceable>NEWDIR</replaceable></option></term>
@ -3764,6 +3809,8 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
Do not sync restored files to disk. You can use this flag to speed
up restore process. Using this flag can result in data
corruption in case of operating system or hardware crash.
If it happens, you have to run the <xref linkend="pbk-restore"/>
command again.
</para>
</listitem>
</varlistentry>
@ -3891,12 +3938,12 @@ pg_probackup merge -B <replaceable>backup_dir</replaceable> --instance <replacea
[<replaceable>logging_options</replaceable>]
</programlisting>
<para>
Merges the specified incremental backup to its parent full
backup, together with all incremental backups between them, if
any. If the specified backup ID belong to the full backup,
then it will be merged with the closest incremental backup.
As a result, the full backup takes in all the merged
data, and the incremental backups are removed as redundant.
Merges backups that belong to a common incremental backup
chain. If you specify a full backup, it will be merged with its first
incremental backup. If you specify an incremental backup, it will be
merged to its parent full backup, together with all incremental backups
between them. Once the merge is complete, the full backup takes in all
the merged data, and the incremental backups are removed as redundant.
</para>
<para>
For details, see the section
@ -3908,10 +3955,9 @@ pg_probackup merge -B <replaceable>backup_dir</replaceable> --instance <replacea
<programlisting>
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable>
[--help] [-j <replaceable>num_threads</replaceable>] [--progress]
[--retention-redundancy=<replaceable>redundancy</replaceable>][--retention-window=<replaceable>window</replaceable>][--wal-depth=<replaceable>wal_depth</replaceable>]
[--delete-wal] {-i <replaceable>backup_id</replaceable> | --delete-expired [--merge-expired] | --merge-expired}
[--dry-run]
[<replaceable>logging_options</replaceable>]
[--retention-redundancy=<replaceable>redundancy</replaceable>][--retention-window=<replaceable>window</replaceable>][--wal-depth=<replaceable>wal_depth</replaceable>] [--delete-wal]
{-i <replaceable>backup_id</replaceable> | --delete-expired [--merge-expired] | --merge-expired | --status=backup_status}
[--dry-run] [<replaceable>logging_options</replaceable>]
</programlisting>
<para>
Deletes backup with specified <replaceable>backup_id</replaceable>
@ -3930,9 +3976,12 @@ pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replace
<title>archive-push</title>
<programlisting>
pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable>
--wal-file-path=<replaceable>wal_file_path</replaceable> --wal-file-name=<replaceable>wal_file_name</replaceable>
[--help] [--compress] [--compress-algorithm=<replaceable>compression_algorithm</replaceable>]
[--compress-level=<replaceable>compression_level</replaceable>] [--overwrite]
--wal-file-name=<replaceable>wal_file_name</replaceable> [--wal-file-path=<replaceable>wal_file_path</replaceable>]
[--help] [--no-sync] [--compress] [--no-ready-rename] [--overwrite]
[-j <replaceable>num_threads</replaceable>] [--batch-size=<replaceable>batch_size</replaceable>]
[--archive-timeout=<replaceable>timeout</replaceable>]
[--compress-algorithm=<replaceable>compression_algorithm</replaceable>]
[--compress-level=<replaceable>compression_level</replaceable>]
[<replaceable>remote_options</replaceable>] [<replaceable>logging_options</replaceable>]
</programlisting>
<para>
@ -3943,12 +3992,10 @@ pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <r
backup instance and the cluster do not match, this command
fails with the following error message: <literal>Refuse to push WAL
segment segment_name into archive. Instance parameters
mismatch.</literal> For each WAL file moved to the backup catalog, you
will see the following message in the <productname>PostgreSQL</productname> log file:
<literal>pg_probackup archive-push completed successfully</literal>.
mismatch.</literal>
</para>
<para>
If the files to be copied already exist in the backup catalog,
If the files to be copied already exists in the backup catalog,
<application>pg_probackup</application> computes and compares their checksums. If the
checksums match, <command>archive-push</command> skips the corresponding file and
returns a successful execution code. Otherwise, <command>archive-push</command>
@ -3957,13 +4004,25 @@ pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <r
with the <option>--overwrite</option> flag.
</para>
<para>
The files are copied to a temporary file with the
<literal>.part</literal> suffix. After the copy is
done, atomic rename is performed. This algorithm ensures that a
failed <command>archive-push</command> will not stall continuous archiving and
that concurrent archiving from multiple sources into a single
WAL archive have no risk of archive corruption. WAL segments copied to
the archive are synced to disk.
Each file is copied to a temporary file with the
<literal>.part</literal> suffix. If the temporary file already
exists, <application>pg_probackup</application> will wait
<option>archive_timeout</option> seconds before discarding it.
After the copy is done, atomic rename is performed.
This algorithm ensures that a failed <command>archive-push</command>
will not stall continuous archiving and that concurrent archiving from
multiple sources into a single WAL archive has no risk of archive
corruption.
</para>
<para>
To speed up archiving, you can specify the <option>-j</option> option
to run <command>archive-push</command> on multiple threads.
If you provide the <option>--batch-size</option> option, WAL files
will be copied in batches of the specified size.
</para>
<para>
WAL segments copied to the archive are synced to disk unless
the <option>--no-sync</option> flag is used.
</para>
<para>
You can use <command>archive-push</command> in the
@ -3983,6 +4042,8 @@ pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <r
<title>archive-get</title>
<programlisting>
pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --wal-file-path=<replaceable>wal_file_path</replaceable> --wal-file-name=<replaceable>wal_file_name</replaceable>
[-j <replaceable>num_threads</replaceable>] [--batch-size=<replaceable>batch_size</replaceable>]
[--prefetch-dir=<replaceable>prefetch_dir_path</replaceable>] [--no-validate-wal]
[--help] [<replaceable>remote_options</replaceable>] [<replaceable>logging_options</replaceable>]
</programlisting>
<para>
@ -3993,6 +4054,17 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
restoring backups using a WAL archive. You do not need to set
it manually.
</para>
<para>
To speed up recovery, you can specify the <option>-j</option> option
to run <command>archive-get</command> on multiple threads.
If you provide the <option>--batch-size</option> option, WAL segments
will be copied in batches of the specified size.
</para>
<para>
For details, see section <link linkend="pbk-archiving-options">Archiving Options</link>.
</para>
</refsect3>
</refsect2>
<refsect2 id="pbk-options">
@ -4069,7 +4141,8 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
<para>
Sets the number of parallel threads for <command>backup</command>,
<command>restore</command>, <command>merge</command>,
<command>validate</command>, and <command>checkdb</command> processes.
<command>validate</command>, <command>checkdb</command>, and
<command>archive-push</command> processes.
</para>
</listitem>
</varlistentry>
@ -4119,7 +4192,7 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
The <literal>immediate</literal> value stops the recovery
after reaching the consistent state of the specified
backup, or the latest available backup if the
<option>-i</option>/<option>--backup_id</option> option is omitted.
<option>-i</option>/<option>--backup-id</option> option is omitted.
This is the default behavior for STREAM backups.
</para>
</listitem>
@ -4728,6 +4801,78 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--batch-size=<replaceable>batch_size</replaceable></option></term>
<listitem>
<para>
Sets the maximum number of files that can be copied into the archive
by a single <command>archive-push</command> process, or from
the archive by a single <command>archive-get</command> process.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--archive-timeout=<replaceable>wait_time</replaceable></option></term>
<listitem>
<para>
Sets the timeout for considering existing <literal>.part</literal>
files to be stale. By default, <application>pg_probackup</application>
waits 300 seconds.
This option can be used only with <xref linkend="pbk-archive-push"/> command.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--no-ready-rename</option></term>
<listitem>
<para>
Do not rename status files in the <literal>archive_status</literal> directory.
This option should be used only if <parameter>archive_command</parameter>
contains multiple commands.
This option can be used only with <xref linkend="pbk-archive-push"/> command.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--no-sync</option></term>
<listitem>
<para>
Do not sync copied WAL files to disk. You can use this flag to speed
up archiving process. Using this flag can result in WAL archive
corruption in case of operating system or hardware crash.
This option can be used only with <xref linkend="pbk-archive-push"/> command.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--prefetch-dir=<replaceable>path</replaceable></option></term>
<listitem>
<para>
Directory used to store prefetched WAL segments if <option>--batch-size</option> option is used.
Directory must be located on the same filesystem and on the same mountpoint the
<literal>PGDATA/pg_wal</literal> is located.
By default files are stored in <literal>PGDATA/pg_wal/pbk_prefetch</literal> directory.
This option can be used only with <xref linkend="pbk-archive-get"/> command.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--no-validate-wal</option></term>
<listitem>
<para>
Do not validate prefetched WAL file before using it.
Use this option if you want to increase the speed of recovery.
This option can be used only with <xref linkend="pbk-archive-get"/> command.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
</refsect3>
@ -5165,7 +5310,7 @@ INFO: Backup PZ7YK2 completed
<step id="pbk-lets-take-a-look-at-the-backup-catalog">
<title>Let's take a look at the backup catalog:</title>
<programlisting>
[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11'
[backupman@backup_host] pg_probackup-11 show -B /mnt/backups --instance 'pg-11'
BACKUP INSTANCE 'pg-11'
==================================================================================================================================
@ -5256,7 +5401,7 @@ remote-host = postgres_host
<step id="pbk-lets-take-a-look-at-the-backup-catalog-1">
<title>Let's take a look at the backup catalog:</title>
<programlisting>
[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11'
[backupman@backup_host] pg_probackup-11 show -B /mnt/backups --instance 'pg-11'
====================================================================================================================================
Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status

File diff suppressed because it is too large Load Diff

View File

@ -153,6 +153,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
PGconn *master_conn = NULL;
PGconn *pg_startbackup_conn = NULL;
/* used for multitimeline incremental backup */
parray *tli_list = NULL;
/* for fancy reporting */
time_t start_time, end_time;
char pretty_time[20];
@ -181,17 +185,43 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
current.backup_mode == BACKUP_MODE_DIFF_PTRACK ||
current.backup_mode == BACKUP_MODE_DIFF_DELTA)
{
char prev_backup_filelist_path[MAXPGPATH];
/* get list of backups already taken */
backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID);
prev_backup = catalog_get_last_data_backup(backup_list, current.tli, current.start_time);
if (prev_backup == NULL)
elog(ERROR, "Valid backup on current timeline %X is not found. "
"Create new FULL backup before an incremental one.",
{
/* try to setup multi-timeline backup chain */
elog(WARNING, "Valid backup on current timeline %u is not found, "
"try to look up on previous timelines",
current.tli);
tli_list = catalog_get_timelines(&instance_config);
if (parray_num(tli_list) == 0)
elog(WARNING, "Cannot find valid backup on previous timelines, "
"WAL archive is not available");
else
{
prev_backup = get_multi_timeline_parent(backup_list, tli_list, current.tli,
current.start_time, &instance_config);
if (prev_backup == NULL)
elog(WARNING, "Cannot find valid backup on previous timelines");
}
/* failed to find suitable parent, error out */
if (!prev_backup)
elog(ERROR, "Create new full backup before an incremental one");
}
}
if (prev_backup)
{
char prev_backup_filelist_path[MAXPGPATH];
elog(INFO, "Parent backup: %s", base36enc(prev_backup->start_time));
join_path_components(prev_backup_filelist_path, prev_backup->root_dir,
DATABASE_FILE_LIST);
/* Files of previous backup needed by DELTA backup */
@ -378,8 +408,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE ||
current.backup_mode == BACKUP_MODE_DIFF_PTRACK)
{
elog(INFO, "Compiling pagemap of changed blocks");
bool pagemap_isok = true;
time(&start_time);
elog(INFO, "Extracting pagemap of changed blocks");
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
{
@ -388,8 +420,9 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
* reading WAL segments present in archives up to the point
* where this backup has started.
*/
extractPageMap(arclog_path, current.tli, instance_config.xlog_seg_size,
prev_backup->start_lsn, current.start_lsn);
pagemap_isok = extractPageMap(arclog_path, instance_config.xlog_seg_size,
prev_backup->start_lsn, prev_backup->tli,
current.start_lsn, current.tli, tli_list);
}
else if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK)
{
@ -407,8 +440,14 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
}
time(&end_time);
elog(INFO, "Pagemap compiled, time elapsed %.0f sec",
difftime(end_time, start_time));
/* TODO: add ms precision */
if (pagemap_isok)
elog(INFO, "Pagemap successfully extracted, time elapsed: %.0f sec",
difftime(end_time, start_time));
else
elog(ERROR, "Pagemap extraction failed, time elasped: %.0f sec",
difftime(end_time, start_time));
}
/*
@ -667,6 +706,15 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
elog(INFO, "Backup files are synced, time elapsed: %s", pretty_time);
}
/* be paranoid about instance been from the past */
if (current.backup_mode != BACKUP_MODE_FULL &&
current.stop_lsn < prev_backup->stop_lsn)
elog(ERROR, "Current backup STOP LSN %X/%X is lower than STOP LSN %X/%X of previous backup %s. "
"It may indicate that we are trying to backup PostgreSQL instance from the past.",
(uint32) (current.stop_lsn >> 32), (uint32) (current.stop_lsn),
(uint32) (prev_backup->stop_lsn >> 32), (uint32) (prev_backup->stop_lsn),
base36enc(prev_backup->stop_lsn));
/* clean external directories list */
if (external_dirs)
free_dir_list(external_dirs);
@ -678,6 +726,12 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
parray_free(backup_list);
}
if (tli_list)
{
parray_walk(tli_list, timelineInfoFree);
parray_free(tli_list);
}
parray_walk(backup_files_list, pgFileFree);
parray_free(backup_files_list);
backup_files_list = NULL;
@ -1096,7 +1150,7 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup,
PQclear(res);
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE &&
if ((!stream_wal || current.backup_mode == BACKUP_MODE_DIFF_PAGE) &&
!backup->from_replica &&
!(nodeInfo->server_version < 90600 &&
!nodeInfo->is_superuser))
@ -1108,17 +1162,14 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup,
*/
pg_switch_wal(conn);
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
/* In PAGE mode wait for current segment... */
/* In PAGE mode or in ARCHIVE wal-mode wait for current segment */
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE ||!stream_wal)
/*
* Do not wait start_lsn for stream backup.
* Because WAL streaming will start after pg_start_backup() in stream
* mode.
*/
wait_wal_lsn(backup->start_lsn, true, backup->tli, false, true, ERROR, false);
/*
* Do not wait start_lsn for stream backup.
* Because WAL streaming will start after pg_start_backup() in stream
* mode.
*/
else if (!stream_wal)
/* ...for others wait for previous segment */
wait_wal_lsn(backup->start_lsn, true, backup->tli, true, true, ERROR, false);
}
/*

View File

@ -42,6 +42,24 @@ timelineInfoNew(TimeLineID tli)
return tlinfo;
}
/* free timelineInfo object */
void
timelineInfoFree(void *tliInfo)
{
timelineInfo *tli = (timelineInfo *) tliInfo;
parray_walk(tli->xlog_filelist, pgFileFree);
parray_free(tli->xlog_filelist);
if (tli->backups)
{
parray_walk(tli->backups, pgBackupFree);
parray_free(tli->backups);
}
pfree(tliInfo);
}
/* Iterate over locked backups and delete locks files */
static void
unlink_lock_atexit(void)
@ -597,7 +615,7 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
switch (scan_parent_chain(backup, &tmp_backup))
{
/* broken chain */
case 0:
case ChainIsBroken:
invalid_backup_id = base36enc_dup(tmp_backup->parent_backup);
elog(WARNING, "Backup %s has missing parent: %s. Cannot be a parent",
@ -606,7 +624,7 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
continue;
/* chain is intact, but at least one parent is invalid */
case 1:
case ChainIsInvalid:
invalid_backup_id = base36enc_dup(tmp_backup->start_time);
elog(WARNING, "Backup %s has invalid parent: %s. Cannot be a parent",
@ -615,17 +633,13 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
continue;
/* chain is ok */
case 2:
case ChainIsOk:
/* Yes, we could call is_parent() earlier - after choosing the ancestor,
* but this way we have an opportunity to detect and report all possible
* anomalies.
*/
if (is_parent(full_backup->start_time, backup, true))
{
elog(INFO, "Parent backup: %s",
base36enc(backup->start_time));
return backup;
}
}
}
/* skip yourself */
@ -641,6 +655,150 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
return NULL;
}
/*
* For multi-timeline chain, look up suitable parent for incremental backup.
* Multi-timeline chain has full backup and one or more descendants located
* on different timelines.
*/
pgBackup *
get_multi_timeline_parent(parray *backup_list, parray *tli_list,
TimeLineID current_tli, time_t current_start_time,
InstanceConfig *instance)
{
int i;
timelineInfo *my_tlinfo = NULL;
timelineInfo *tmp_tlinfo = NULL;
pgBackup *ancestor_backup = NULL;
/* there are no timelines in the archive */
if (parray_num(tli_list) == 0)
return NULL;
/* look for current timelineInfo */
for (i = 0; i < parray_num(tli_list); i++)
{
timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i);
if (tlinfo->tli == current_tli)
{
my_tlinfo = tlinfo;
break;
}
}
if (my_tlinfo == NULL)
return NULL;
/* Locate tlinfo of suitable full backup.
* Consider this example:
* t3 s2-------X <-! We are here
* /
* t2 s1----D---*----E--->
* /
* t1--A--B--*---C------->
*
* A, E - full backups
* B, C, D - incremental backups
*
* We must find A.
*/
tmp_tlinfo = my_tlinfo;
while (tmp_tlinfo->parent_link)
{
/* if timeline has backups, iterate over them */
if (tmp_tlinfo->parent_link->backups)
{
for (i = 0; i < parray_num(tmp_tlinfo->parent_link->backups); i++)
{
pgBackup *backup = (pgBackup *) parray_get(tmp_tlinfo->parent_link->backups, i);
if (backup->backup_mode == BACKUP_MODE_FULL &&
(backup->status == BACKUP_STATUS_OK ||
backup->status == BACKUP_STATUS_DONE) &&
backup->stop_lsn <= tmp_tlinfo->switchpoint)
{
ancestor_backup = backup;
break;
}
}
}
if (ancestor_backup)
break;
tmp_tlinfo = tmp_tlinfo->parent_link;
}
/* failed to find valid FULL backup on parent timelines */
if (!ancestor_backup)
return NULL;
else
elog(LOG, "Latest valid full backup: %s, tli: %i",
base36enc(ancestor_backup->start_time), ancestor_backup->tli);
/* At this point we found suitable full backup,
* now we must find his latest child, suitable to be
* parent of current incremental backup.
* Consider this example:
* t3 s2-------X <-! We are here
* /
* t2 s1----D---*----E--->
* /
* t1--A--B--*---C------->
*
* A, E - full backups
* B, C, D - incremental backups
*
* We found A, now we must find D.
*/
/* Optimistically, look on current timeline for valid incremental backup, child of ancestor */
if (my_tlinfo->backups)
{
/* backups are sorted in descending order and we need latest valid */
for (i = 0; i < parray_num(my_tlinfo->backups); i++)
{
pgBackup *tmp_backup = NULL;
pgBackup *backup = (pgBackup *) parray_get(my_tlinfo->backups, i);
/* found suitable parent */
if (scan_parent_chain(backup, &tmp_backup) == ChainIsOk &&
is_parent(ancestor_backup->start_time, backup, false))
return backup;
}
}
/* Iterate over parent timelines and look for a valid backup, child of ancestor */
tmp_tlinfo = my_tlinfo;
while (tmp_tlinfo->parent_link)
{
/* if timeline has backups, iterate over them */
if (tmp_tlinfo->parent_link->backups)
{
for (i = 0; i < parray_num(tmp_tlinfo->parent_link->backups); i++)
{
pgBackup *tmp_backup = NULL;
pgBackup *backup = (pgBackup *) parray_get(tmp_tlinfo->parent_link->backups, i);
/* We are not interested in backups
* located outside of our timeline history
*/
if (backup->stop_lsn > tmp_tlinfo->switchpoint)
continue;
if (scan_parent_chain(backup, &tmp_backup) == ChainIsOk &&
is_parent(ancestor_backup->start_time, backup, true))
return backup;
}
}
tmp_tlinfo = tmp_tlinfo->parent_link;
}
return NULL;
}
/* create backup directory in $BACKUP_PATH */
int
pgBackupCreateDir(pgBackup *backup)
@ -2237,18 +2395,18 @@ scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup)
{
/* Set oldest child backup in chain */
*result_backup = target_backup;
return 0;
return ChainIsBroken;
}
/* chain is ok, but some backups are invalid */
if (invalid_backup)
{
*result_backup = invalid_backup;
return 1;
return ChainIsInvalid;
}
*result_backup = target_backup;
return 2;
return ChainIsOk;
}
/*
@ -2301,3 +2459,23 @@ get_backup_index_number(parray *backup_list, pgBackup *backup)
elog(WARNING, "Failed to find backup %s", base36enc(backup->start_time));
return -1;
}
/* On backup_list lookup children of target_backup and append them to append_list */
void
append_children(parray *backup_list, pgBackup *target_backup, parray *append_list)
{
int i;
for (i = 0; i < parray_num(backup_list); i++)
{
pgBackup *backup = (pgBackup *) parray_get(backup_list, i);
/* check if backup is descendant of target backup */
if (is_parent(target_backup->start_time, backup, false))
{
/* if backup is already in the list, then skip it */
if (!parray_contains(append_list, backup))
parray_append(append_list, backup);
}
}
}

View File

@ -192,77 +192,67 @@ parse_page(Page page, XLogRecPtr *lsn)
return false;
}
/* Read one page from file directly accessing disk
* return value:
* 2 - if the page is found but zeroed
* 1 - if the page is found and valid
* 0 - if the page is not found, probably truncated
* -1 - if the page is found but read size is not multiple of BLKSIZE
* -2 - if the page is found but page header is "insane"
* -3 - if the page is found but page checksumm is wrong
* -4 - something went wrong, check errno
*
/* We know that header is invalid, store specific
* details in errormsg.
*/
static int
read_page_from_file(pgFile *file, BlockNumber blknum,
FILE *in, Page page, XLogRecPtr *page_lsn,
uint32 checksum_version)
void
get_header_errormsg(Page page, char **errormsg)
{
off_t offset = blknum * BLCKSZ;
ssize_t read_len = 0;
PageHeader phdr = (PageHeader) page;
*errormsg = pgut_malloc(MAXPGPATH);
/* read the block */
read_len = fio_pread(in, page, offset);
if (PageGetPageSize(phdr) != BLCKSZ)
snprintf(*errormsg, MAXPGPATH, "page header invalid, "
"page size %lu is not equal to block size %u",
PageGetPageSize(phdr), BLCKSZ);
if (read_len != BLCKSZ)
{
else if (phdr->pd_lower < SizeOfPageHeaderData)
snprintf(*errormsg, MAXPGPATH, "page header invalid, "
"pd_lower %i is less than page header size %lu",
phdr->pd_lower, SizeOfPageHeaderData);
/* The block could have been truncated. It is fine. */
if (read_len == 0)
return 0;
else if (read_len > 0)
return -1;
else
return -4;
}
else if (phdr->pd_lower > phdr->pd_upper)
snprintf(*errormsg, MAXPGPATH, "page header invalid, "
"pd_lower %u is greater than pd_upper %u",
phdr->pd_lower, phdr->pd_upper);
/*
* If we found page with invalid header, at first check if it is zeroed,
* which is a valid state for page. If it is not, read it and check header
* again, because it's possible that we've read a partly flushed page.
* If after several attempts page header is still invalid, throw an error.
* The same idea is applied to checksum verification.
*/
if (!parse_page(page, page_lsn))
{
int i;
/* Check if the page is zeroed. */
for (i = 0; i < BLCKSZ && page[i] == 0; i++);
else if (phdr->pd_upper > phdr->pd_special)
snprintf(*errormsg, MAXPGPATH, "page header invalid, "
"pd_upper %u is greater than pd_special %u",
phdr->pd_upper, phdr->pd_special);
/* Page is zeroed. No need to check header and checksum. */
if (i == BLCKSZ)
return 2;
else if (phdr->pd_special > BLCKSZ)
snprintf(*errormsg, MAXPGPATH, "page header invalid, "
"pd_special %u is greater than block size %u",
phdr->pd_special, BLCKSZ);
return -2;
}
else if (phdr->pd_special != MAXALIGN(phdr->pd_special))
snprintf(*errormsg, MAXPGPATH, "page header invalid, "
"pd_special %i is misaligned, expected %lu",
phdr->pd_special, MAXALIGN(phdr->pd_special));
else if (phdr->pd_flags & ~PD_VALID_FLAG_BITS)
snprintf(*errormsg, MAXPGPATH, "page header invalid, "
"pd_flags mask contain illegal bits");
/* Verify checksum */
if (checksum_version)
{
BlockNumber blkno = file->segno * RELSEG_SIZE + blknum;
/*
* If checksum is wrong, sleep a bit and then try again
* several times. If it didn't help, throw error
*/
if (pg_checksum_page(page, blkno) != ((PageHeader) page)->pd_checksum)
return -3;
else
/* page header and checksum are correct */
return 1;
}
else
/* page header is correct and checksum check is disabled */
return 1;
snprintf(*errormsg, MAXPGPATH, "page header invalid");
}
/* We know that checksumms are mismatched, store specific
* details in errormsg.
*/
void
get_checksum_errormsg(Page page, char **errormsg, BlockNumber absolute_blkno)
{
PageHeader phdr = (PageHeader) page;
*errormsg = pgut_malloc(MAXPGPATH);
snprintf(*errormsg, MAXPGPATH,
"page verification failed, "
"calculated checksum %u but expected %u",
phdr->pd_checksum,
pg_checksum_page(page, absolute_blkno));
}
/*
@ -273,15 +263,20 @@ read_page_from_file(pgFile *file, BlockNumber blknum,
* Prints appropriate warnings/errors/etc into log.
* Returns:
* PageIsOk(0) if page was successfully retrieved
* PageIsTruncated(-2) if the page was truncated
* SkipCurrentPage(-3) if we need to skip this page
* PageIsCorrupted(-4) if the page check mismatch
* PageIsTruncated(-1) if the page was truncated
* SkipCurrentPage(-2) if we need to skip this page,
* only used for DELTA backup
* PageIsCorrupted(-3) if the page checksum mismatch
* or header corruption,
* only used for checkdb
* TODO: probably we should always
* return it to the caller
*/
static int32
prepare_page(ConnectionArgs *conn_arg,
pgFile *file, XLogRecPtr prev_backup_start_lsn,
BlockNumber blknum, BlockNumber nblocks,
FILE *in, BackupMode backup_mode,
BlockNumber blknum, FILE *in,
BackupMode backup_mode,
Page page, bool strict,
uint32 checksum_version,
int ptrack_version_num,
@ -289,9 +284,8 @@ prepare_page(ConnectionArgs *conn_arg,
const char *from_fullpath)
{
XLogRecPtr page_lsn = 0;
int try_again = 100;
int try_again = PAGE_READ_ATTEMPTS;
bool page_is_valid = false;
bool page_is_truncated = false;
BlockNumber absolute_blknum = file->segno * RELSEG_SIZE + blknum;
/* check for interrupt */
@ -305,83 +299,104 @@ prepare_page(ConnectionArgs *conn_arg,
*/
if (backup_mode != BACKUP_MODE_DIFF_PTRACK || ptrack_version_num >= 20)
{
while (!page_is_valid && try_again)
int rc = 0;
while (!page_is_valid && try_again--)
{
int result = read_page_from_file(file, blknum, in, page,
&page_lsn, checksum_version);
/* read the block */
int read_len = fio_pread(in, page, blknum * BLCKSZ);
page_lsn = 0;
switch (result)
/* The block could have been truncated. It is fine. */
if (read_len == 0)
{
case 2:
elog(VERBOSE, "File: \"%s\" blknum %u, empty page", from_fullpath, blknum);
return PageIsOk;
elog(VERBOSE, "Cannot read block %u of \"%s\": "
"block truncated", blknum, from_fullpath);
return PageIsTruncated;
}
else if (read_len < 0)
elog(ERROR, "Cannot read block %u of \"%s\": %s",
blknum, from_fullpath, strerror(errno));
else if (read_len != BLCKSZ)
elog(WARNING, "Cannot read block %u of \"%s\": "
"read %i of %d, try again",
blknum, from_fullpath, read_len, BLCKSZ);
else
{
/* We have BLCKSZ of raw data, validate it */
rc = validate_one_page(page, absolute_blknum,
InvalidXLogRecPtr, &page_lsn,
checksum_version);
switch (rc)
{
case PAGE_IS_ZEROED:
elog(VERBOSE, "File: \"%s\" blknum %u, empty page", from_fullpath, blknum);
return PageIsOk;
case 1:
page_is_valid = true;
break;
case PAGE_IS_VALID:
/* in DELTA mode we must compare lsn */
if (backup_mode == BACKUP_MODE_DIFF_DELTA)
page_is_valid = true;
else
return PageIsOk;
break;
case 0:
/* This block was truncated.*/
page_is_truncated = true;
/* Page is not actually valid, but it is absent
* and we're not going to reread it or validate */
page_is_valid = true;
case PAGE_HEADER_IS_INVALID:
elog(VERBOSE, "File: \"%s\" blknum %u have wrong page header, try again",
from_fullpath, blknum);
break;
elog(VERBOSE, "File \"%s\", block %u, file was truncated",
from_fullpath, blknum);
break;
case -1:
elog(WARNING, "File: \"%s\", block %u, partial read, try again",
from_fullpath, blknum);
break;
case -2:
elog(LOG, "File: \"%s\" blknum %u have wrong page header, try again",
from_fullpath, blknum);
break;
case -3:
elog(LOG, "File: \"%s\" blknum %u have wrong checksum, try again",
from_fullpath, blknum);
break;
case -4:
elog(LOG, "File: \"%s\" access error: %s",
from_fullpath, strerror(errno));
break;
case PAGE_CHECKSUM_MISMATCH:
elog(VERBOSE, "File: \"%s\" blknum %u have wrong checksum, try again",
from_fullpath, blknum);
break;
default:
Assert(false);
}
}
/*
* If ptrack support is available use it to get invalid block
* If ptrack support is available, use it to get invalid block
* instead of rereading it 99 times
*/
if (result < 0 && strict && ptrack_version_num > 0)
if (!page_is_valid && strict && ptrack_version_num > 0)
{
elog(WARNING, "File \"%s\", block %u, try to fetch via shared buffer",
from_fullpath, blknum);
break;
}
try_again--;
}
/*
* If page is not valid after 100 attempts to read it
* throw an error.
*/
if (!page_is_valid &&
((strict && ptrack_version_num == 0) || !strict))
if (!page_is_valid)
{
/* show this message for checkdb, merge or backup without ptrack support */
elog(WARNING, "Corruption detected in file \"%s\", block %u",
from_fullpath, blknum);
}
int elevel = ERROR;
char *errormsg = NULL;
/* Backup with invalid block and without ptrack support must throw error */
if (!page_is_valid && strict && ptrack_version_num == 0)
elog(ERROR, "Data file corruption, canceling backup");
/* Get the details of corruption */
if (rc == PAGE_HEADER_IS_INVALID)
get_header_errormsg(page, &errormsg);
else if (rc == PAGE_CHECKSUM_MISMATCH)
get_checksum_errormsg(page, &errormsg,
file->segno * RELSEG_SIZE + blknum);
/* Error out in case of merge or backup without ptrack support;
* issue warning in case of checkdb or backup with ptrack support
*/
if (!strict || (strict && ptrack_version_num > 0))
elevel = WARNING;
if (errormsg)
elog(elevel, "Corruption detected in file \"%s\", block %u: %s",
from_fullpath, blknum, errormsg);
else
elog(elevel, "Corruption detected in file \"%s\", block %u",
from_fullpath, blknum);
pg_free(errormsg);
}
/* Checkdb not going futher */
if (!strict)
@ -412,7 +427,7 @@ prepare_page(ConnectionArgs *conn_arg,
if (ptrack_page == NULL)
{
/* This block was truncated.*/
page_is_truncated = true;
return PageIsTruncated;
}
else if (page_size != BLCKSZ)
{
@ -433,18 +448,15 @@ prepare_page(ConnectionArgs *conn_arg,
if (checksum_version)
((PageHeader) page)->pd_checksum = pg_checksum_page(page, absolute_blknum);
}
/* get lsn from page, provided by pg_ptrack_get_block() */
if (backup_mode == BACKUP_MODE_DIFF_DELTA &&
file->exists_in_prev &&
!page_is_truncated &&
!parse_page(page, &page_lsn))
elog(ERROR, "Cannot parse page after pg_ptrack_get_block. "
"Possible risk of a memory corruption");
}
if (page_is_truncated)
return PageIsTruncated;
/*
* Skip page if page lsn is less than START_LSN of parent backup.
* Nullified pages must be copied by DELTA backup, just to be safe.
@ -475,10 +487,8 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum,
const char *errormsg = NULL;
header.block = blknum;
header.compressed_size = page_state;
/* The page was not truncated, so we need to compress it */
/* Compress the page */
header.compressed_size = do_compress(compressed_page, sizeof(compressed_page),
page, BLCKSZ, calg, clevel,
&errormsg);
@ -487,7 +497,7 @@ compress_and_backup_page(pgFile *file, BlockNumber blknum,
elog(WARNING, "An error occured during compressing block %u of file \"%s\": %s",
blknum, from_fullpath, errormsg);
file->compress_alg = calg;
file->compress_alg = calg; /* TODO: wtf? why here? */
/* The page was successfully compressed. */
if (header.compressed_size > 0 && header.compressed_size < BLCKSZ)
@ -533,15 +543,15 @@ backup_data_file(ConnectionArgs* conn_arg, pgFile *file,
CompressAlg calg, int clevel, uint32 checksum_version,
int ptrack_version_num, const char *ptrack_schema, bool missing_ok)
{
FILE *in;
FILE *out;
BlockNumber blknum = 0;
BlockNumber nblocks = 0; /* number of blocks in file */
BlockNumber n_blocks_skipped = 0;
BlockNumber n_blocks_read = 0; /* number of blocks actually readed
* TODO: we should report them */
int page_state;
char curr_page[BLCKSZ];
FILE *in;
FILE *out;
BlockNumber blknum = 0;
BlockNumber nblocks = 0; /* number of blocks in source file */
BlockNumber n_blocks_skipped = 0;
int page_state;
char curr_page[BLCKSZ];
bool use_pagemap;
datapagemap_iterator_t *iter = NULL;
/* stdio buffers */
char in_buffer[STDIO_BUFSIZE];
@ -613,7 +623,7 @@ backup_data_file(ConnectionArgs* conn_arg, pgFile *file,
}
if (!fio_is_remote_file(in))
setbuffer(in, in_buffer, STDIO_BUFSIZE);
setvbuf(in, in_buffer, _IOFBF, STDIO_BUFSIZE);
/* open backup file for write */
out = fopen(to_fullpath, PG_BINARY_W);
@ -621,7 +631,7 @@ backup_data_file(ConnectionArgs* conn_arg, pgFile *file,
elog(ERROR, "Cannot open backup file \"%s\": %s",
to_fullpath, strerror(errno));
setbuffer(out, out_buffer, STDIO_BUFSIZE);
setvbuf(out, out_buffer, _IOFBF, STDIO_BUFSIZE);
/* update file permission */
if (chmod(to_fullpath, FILE_PERMISSION) == -1)
@ -633,113 +643,114 @@ backup_data_file(ConnectionArgs* conn_arg, pgFile *file,
* If page map is empty or file is not present in previous backup
* backup all pages of the relation.
*
* Usually enter here if backup_mode is FULL or DELTA.
* Also in some cases even PAGE backup is going here,
* becase not all data files are logged into WAL,
* for example CREATE DATABASE.
* Such files should be fully copied.
* In PTRACK 1.x there was a problem
* of data files with missing _ptrack map.
* Such files should be fully copied.
*/
if (file->pagemap.bitmapsize == PageBitmapIsEmpty ||
file->pagemap_isabsent || !file->exists_in_prev)
if (file->pagemap.bitmapsize == PageBitmapIsEmpty ||
file->pagemap_isabsent || !file->exists_in_prev ||
!file->pagemap.bitmap)
use_pagemap = false;
else
use_pagemap = true;
/* Remote mode */
if (fio_is_remote_file(in))
{
/* remote FULL and DELTA */
if (fio_is_remote_file(in))
char *errmsg = NULL;
BlockNumber err_blknum = 0;
/* TODO: retrying via ptrack should be implemented on the agent */
int rc = fio_send_pages(in, out, file,
/* send prev backup START_LSN */
backup_mode == BACKUP_MODE_DIFF_DELTA &&
file->exists_in_prev ? prev_backup_start_lsn : InvalidXLogRecPtr,
calg, clevel, checksum_version,
/* send pagemap if any */
use_pagemap ? &file->pagemap : NULL,
/* variables for error reporting */
&err_blknum, &errmsg);
/* check for errors */
if (rc == REMOTE_ERROR)
elog(ERROR, "Cannot read block %u of \"%s\": %s",
err_blknum, from_fullpath, strerror(errno));
else if (rc == PAGE_CORRUPTION)
{
int rc = fio_send_pages(in, out, file,
backup_mode == BACKUP_MODE_DIFF_DELTA &&
file->exists_in_prev ? prev_backup_start_lsn : InvalidXLogRecPtr,
&n_blocks_skipped, calg, clevel);
if (rc == PAGE_CHECKSUM_MISMATCH && ptrack_version_num >= 15)
/* only ptrack versions 1.5, 1.6, 1.7 and 2.x support this functionality */
goto RetryUsingPtrack;
if (rc < 0)
elog(ERROR, "Failed to read file \"%s\": %s",
from_fullpath,
rc == PAGE_CHECKSUM_MISMATCH ? "data file checksum mismatch" : strerror(-rc));
/* TODO: check that fio_send_pages ain`t lying about number of readed blocks */
n_blocks_read = rc;
file->read_size = n_blocks_read * BLCKSZ;
file->uncompressed_size = (n_blocks_read - n_blocks_skipped)*BLCKSZ;
if (errmsg)
elog(ERROR, "Corruption detected in file \"%s\", block %u: %s",
from_fullpath, err_blknum, errmsg);
else
elog(ERROR, "Corruption detected in file \"%s\", block %u",
from_fullpath, err_blknum);
}
else
{
/* local FULL and DELTA */
RetryUsingPtrack:
for (blknum = 0; blknum < nblocks; blknum++)
{
page_state = prepare_page(conn_arg, file, prev_backup_start_lsn,
blknum, nblocks, in, backup_mode,
curr_page, true, checksum_version,
ptrack_version_num, ptrack_schema,
from_fullpath);
if (page_state == PageIsTruncated)
break;
else if (rc == WRITE_FAILED)
elog(ERROR, "Cannot write block %u of \"%s\": %s",
err_blknum, to_fullpath, strerror(errno));
else if (page_state == SkipCurrentPage)
n_blocks_skipped++;
file->read_size = rc * BLCKSZ;
pg_free(errmsg);
else if (page_state == PageIsOk)
compress_and_backup_page(file, blknum, in, out, &(file->crc),
page_state, curr_page, calg, clevel,
from_fullpath, to_fullpath);
else
elog(ERROR, "Invalid page state: %i, file: %s, blknum %i",
page_state, file->rel_path, blknum);
n_blocks_read++;
file->read_size += BLCKSZ;
}
}
file->n_blocks = n_blocks_read;
}
/*
* If page map is not empty we scan only changed blocks.
*
* We will enter here if backup_mode is PAGE or PTRACK.
*/
/* Local mode */
else
{
datapagemap_iterator_t *iter;
iter = datapagemap_iterate(&file->pagemap);
while (datapagemap_next(iter, &blknum))
if (use_pagemap)
{
iter = datapagemap_iterate(&file->pagemap);
datapagemap_next(iter, &blknum); /* set first block */
}
while (blknum < nblocks)
{
page_state = prepare_page(conn_arg, file, prev_backup_start_lsn,
blknum, nblocks, in, backup_mode,
curr_page, true, checksum_version,
ptrack_version_num, ptrack_schema,
from_fullpath);
blknum, in, backup_mode, curr_page,
true, checksum_version,
ptrack_version_num, ptrack_schema,
from_fullpath);
if (page_state == PageIsTruncated)
break;
/* TODO: PAGE and PTRACK should never get SkipCurrentPage */
/* TODO: remove */
else if (page_state == SkipCurrentPage)
n_blocks_skipped++;
else if (page_state == PageIsOk)
compress_and_backup_page(file, blknum, in, out, &(file->crc),
page_state, curr_page, calg, clevel,
from_fullpath, to_fullpath);
page_state, curr_page, calg, clevel,
from_fullpath, to_fullpath);
/* TODO: handle PageIsCorrupted, currently it is done in prepare_page */
else
elog(ERROR, "Invalid page state: %i, file: %s, blknum %i",
page_state, file->rel_path, blknum);
Assert(false);
n_blocks_read++;
file->read_size += BLCKSZ;
}
pg_free(file->pagemap.bitmap);
pg_free(iter);
/* next block */
if (use_pagemap)
{
/* exit if pagemap is exhausted */
if (!datapagemap_next(iter, &blknum))
break;
}
else
blknum++;
}
}
pg_free(file->pagemap.bitmap);
pg_free(iter);
/* refresh n_blocks for FULL and DELTA */
if (backup_mode == BACKUP_MODE_FULL ||
backup_mode == BACKUP_MODE_DIFF_DELTA)
file->n_blocks = file->read_size / BLCKSZ;
if (fclose(out))
elog(ERROR, "Cannot close the backup file \"%s\": %s",
to_fullpath, strerror(errno));
@ -792,7 +803,7 @@ backup_non_data_file(pgFile *file, pgFile *prev_file,
file->mtime <= parent_backup_time)
{
file->crc = fio_get_crc32(from_fullpath, FIO_DB_HOST);
file->crc = fio_get_crc32(from_fullpath, FIO_DB_HOST, false);
/* ...and checksum is the same... */
if (EQ_TRADITIONAL_CRC32(file->crc, prev_file->crc))
@ -863,7 +874,7 @@ restore_data_file(parray *parent_chain, pgFile *dest_file, FILE *out, const char
elog(ERROR, "Cannot open backup file \"%s\": %s", from_fullpath,
strerror(errno));
setbuffer(in, buffer, STDIO_BUFSIZE);
setvbuf(in, buffer, _IOFBF, STDIO_BUFSIZE);
/*
* Restore the file.
@ -1058,7 +1069,7 @@ restore_non_data_file_internal(FILE *in, FILE *out, pgFile *file,
break;
if (read_len < 0)
elog(ERROR, "Cannot read backup mode file \"%s\": %s",
elog(ERROR, "Cannot read backup file \"%s\": %s",
from_fullpath, strerror(errno));
if (fio_fwrite(out, buf, read_len) != read_len)
@ -1153,7 +1164,7 @@ restore_non_data_file(parray *parent_chain, pgBackup *dest_backup,
elog(ERROR, "Cannot open backup file \"%s\": %s", from_fullpath,
strerror(errno));
setbuffer(in, buffer, STDIO_BUFSIZE);
setvbuf(in, buffer, _IOFBF, STDIO_BUFSIZE);
/* do actual work */
restore_non_data_file_internal(in, out, tmp_file, from_fullpath, to_fullpath);
@ -1290,106 +1301,55 @@ create_empty_file(fio_location from_location, const char *to_root,
/*
* Validate given page.
*
* Returns value:
* 0 - if the page is not found
* 1 - if the page is found and valid
* -1 - if the page is found but invalid
* This function is expected to be executed multiple times,
* so avoid using elog within it.
* lsn from page is assigned to page_lsn pointer.
* TODO: switch to enum for return codes.
*/
#define PAGE_IS_NOT_FOUND 0
#define PAGE_IS_FOUND_AND_VALID 1
#define PAGE_IS_FOUND_AND_NOT_VALID -1
static int
validate_one_page(Page page, pgFile *file,
BlockNumber blknum, XLogRecPtr stop_lsn,
uint32 checksum_version)
int
validate_one_page(Page page, BlockNumber absolute_blkno,
XLogRecPtr stop_lsn, XLogRecPtr *page_lsn,
uint32 checksum_version)
{
PageHeader phdr;
XLogRecPtr lsn;
/* new level of paranoia */
if (page == NULL)
{
elog(LOG, "File \"%s\", block %u, page is NULL", file->path, blknum);
return PAGE_IS_NOT_FOUND;
}
phdr = (PageHeader) page;
if (PageIsNew(page))
/* check that page header is ok */
if (!parse_page(page, page_lsn))
{
int i;
int i;
/* Check if the page is zeroed. */
for(i = 0; i < BLCKSZ && page[i] == 0; i++);
for (i = 0; i < BLCKSZ && page[i] == 0; i++);
/* Page is zeroed. No need to verify checksums */
if (i == BLCKSZ)
{
elog(LOG, "File: %s blknum %u, page is New, empty zeroed page",
file->path, blknum);
return PAGE_IS_FOUND_AND_VALID;
}
else
{
elog(WARNING, "File: %s blknum %u, page is New, but not zeroed",
file->path, blknum);
}
return PAGE_IS_ZEROED;
/* Page is zeroed. No sense in checking header and checksum. */
return PAGE_IS_FOUND_AND_VALID;
/* Page does not looking good */
return PAGE_HEADER_IS_INVALID;
}
/* Verify checksum */
if (checksum_version)
{
/* Checksums are enabled, so check them. */
if (!(pg_checksum_page(page, file->segno * RELSEG_SIZE + blknum)
== ((PageHeader) page)->pd_checksum))
{
elog(WARNING, "File: %s blknum %u have wrong checksum",
file->path, blknum);
return PAGE_IS_FOUND_AND_NOT_VALID;
}
}
/* Check page for the sights of insanity.
* TODO: We should give more information about what exactly is looking "wrong"
*/
if (!(PageGetPageSize(phdr) == BLCKSZ &&
// PageGetPageLayoutVersion(phdr) == PG_PAGE_LAYOUT_VERSION &&
(phdr->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
phdr->pd_lower >= SizeOfPageHeaderData &&
phdr->pd_lower <= phdr->pd_upper &&
phdr->pd_upper <= phdr->pd_special &&
phdr->pd_special <= BLCKSZ &&
phdr->pd_special == MAXALIGN(phdr->pd_special)))
{
/* Page does not looking good */
elog(WARNING, "Page header is looking insane: %s, block %i",
file->path, blknum);
return PAGE_IS_FOUND_AND_NOT_VALID;
if (pg_checksum_page(page, absolute_blkno) != ((PageHeader) page)->pd_checksum)
return PAGE_CHECKSUM_MISMATCH;
}
/* At this point page header is sane, if checksums are enabled - the`re ok.
* Check that page is not from future.
* Note, this check should be used only by validate command.
*/
if (stop_lsn > 0)
{
/* Get lsn from page header. Ensure that page is from our time. */
lsn = PageXLogRecPtrGet(phdr->pd_lsn);
if (lsn > stop_lsn)
{
elog(WARNING, "File: %s, block %u, checksum is %s. "
"Page is from future: pageLSN %X/%X stopLSN %X/%X",
file->path, blknum, checksum_version ? "correct" : "not enabled",
(uint32) (lsn >> 32), (uint32) lsn,
(uint32) (stop_lsn >> 32), (uint32) stop_lsn);
return PAGE_IS_FOUND_AND_NOT_VALID;
}
if (*page_lsn > stop_lsn)
return PAGE_LSN_FROM_FUTURE;
}
return PAGE_IS_FOUND_AND_VALID;
return PAGE_IS_VALID;
}
/*
@ -1441,7 +1401,7 @@ check_data_file(ConnectionArgs *arguments, pgFile *file,
{
page_state = prepare_page(NULL, file, InvalidXLogRecPtr,
blknum, nblocks, in, BACKUP_MODE_FULL,
blknum, in, BACKUP_MODE_FULL,
curr_page, false, checksum_version,
0, NULL, from_fullpath);
@ -1456,19 +1416,6 @@ check_data_file(ConnectionArgs *arguments, pgFile *file,
is_valid = false;
continue;
}
/* At this point page is found and its checksum is ok, if any
* but could be 'insane'
* TODO: between prepare_page and validate_one_page we
* compute and compare checksum twice, it`s ineffective
*/
if (validate_one_page(curr_page, file, blknum,
InvalidXLogRecPtr,
0) == PAGE_IS_FOUND_AND_NOT_VALID)
{
/* Page is corrupted */
is_valid = false;
}
}
fclose(in);
@ -1507,10 +1454,12 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version,
/* read and validate pages one by one */
while (true)
{
int rc = 0;
DataPage compressed_page; /* used as read buffer */
DataPage page;
BackupPageHeader header;
BlockNumber blknum = 0;
XLogRecPtr page_lsn = 0;
if (interrupted || thread_interrupted)
elog(ERROR, "Interrupted during data file validation");
@ -1597,15 +1546,39 @@ check_file_pages(pgFile *file, XLogRecPtr stop_lsn, uint32 checksum_version,
return false;
}
if (validate_one_page(page.data, file, blknum,
stop_lsn, checksum_version) == PAGE_IS_FOUND_AND_NOT_VALID)
is_valid = false;
rc = validate_one_page(page.data,
file->segno * RELSEG_SIZE + blknum,
stop_lsn, &page_lsn, checksum_version);
}
else
rc = validate_one_page(compressed_page.data,
file->segno * RELSEG_SIZE + blknum,
stop_lsn, &page_lsn, checksum_version);
switch (rc)
{
if (validate_one_page(compressed_page.data, file, blknum,
stop_lsn, checksum_version) == PAGE_IS_FOUND_AND_NOT_VALID)
case PAGE_IS_NOT_FOUND:
elog(LOG, "File \"%s\", block %u, page is NULL", file->rel_path, blknum);
break;
case PAGE_IS_ZEROED:
elog(LOG, "File: %s blknum %u, empty zeroed page", file->rel_path, blknum);
break;
case PAGE_HEADER_IS_INVALID:
elog(WARNING, "Page header is looking insane: %s, block %i", file->rel_path, blknum);
is_valid = false;
break;
case PAGE_CHECKSUM_MISMATCH:
elog(WARNING, "File: %s blknum %u have wrong checksum", file->rel_path, blknum);
is_valid = false;
break;
case PAGE_LSN_FROM_FUTURE:
elog(WARNING, "File: %s, block %u, checksum is %s. "
"Page is from future: pageLSN %X/%X stopLSN %X/%X",
file->rel_path, blknum,
checksum_version ? "correct" : "not enabled",
(uint32) (page_lsn >> 32), (uint32) page_lsn,
(uint32) (stop_lsn >> 32), (uint32) stop_lsn);
break;
}
}

View File

@ -123,7 +123,7 @@ do_delete(time_t backup_id)
* which FULL backup should be keeped for redundancy obligation(only valid do),
* but if invalid backup is not guarded by retention - it is removed
*/
int do_retention(void)
void do_retention(void)
{
parray *backup_list = NULL;
parray *to_keep_list = parray_new();
@ -154,7 +154,7 @@ int do_retention(void)
/* Retention is disabled but we still can cleanup wal */
elog(WARNING, "Retention policy is not set");
if (!delete_wal)
return 0;
return;
}
else
/* At least one retention policy is active */
@ -196,9 +196,6 @@ int do_retention(void)
parray_free(backup_list);
parray_free(to_keep_list);
parray_free(to_purge_list);
return 0;
}
/* Evaluate every backup by retention policies and populate purge and keep lists.
@ -1023,3 +1020,107 @@ do_delete_instance(void)
elog(INFO, "Instance '%s' successfully deleted", instance_name);
return 0;
}
/* Delete all backups of given status in instance */
void
do_delete_status(InstanceConfig *instance_config, const char *status)
{
int i;
parray *backup_list, *delete_list;
const char *pretty_status;
int n_deleted = 0, n_found = 0;
size_t size_to_delete = 0;
char size_to_delete_pretty[20];
pgBackup *backup;
BackupStatus status_for_delete = str2status(status);
delete_list = parray_new();
if (status_for_delete == BACKUP_STATUS_INVALID)
elog(ERROR, "Unknown value for '--status' option: '%s'", status);
/*
* User may have provided status string in lower case, but
* we should print backup statuses consistently with show command,
* so convert it.
*/
pretty_status = status2str(status_for_delete);
backup_list = catalog_get_backup_list(instance_config->name, INVALID_BACKUP_ID);
if (parray_num(backup_list) == 0)
{
elog(WARNING, "Instance '%s' has no backups", instance_config->name);
return;
}
if (dry_run)
elog(INFO, "Deleting all backups with status '%s' in dry run mode", pretty_status);
else
elog(INFO, "Deleting all backups with status '%s'", pretty_status);
/* Selects backups with specified status and their children into delete_list array. */
for (i = 0; i < parray_num(backup_list); i++)
{
backup = (pgBackup *) parray_get(backup_list, i);
if (backup->status == status_for_delete)
{
n_found++;
/* incremental backup can be already in delete_list due to append_children() */
if (parray_contains(delete_list, backup))
continue;
parray_append(delete_list, backup);
append_children(backup_list, backup, delete_list);
}
}
parray_qsort(delete_list, pgBackupCompareIdDesc);
/* delete and calculate free size from delete_list */
for (i = 0; i < parray_num(delete_list); i++)
{
backup = (pgBackup *)parray_get(delete_list, i);
elog(INFO, "Backup %s with status %s %s be deleted",
base36enc(backup->start_time), status2str(backup->status), dry_run ? "can" : "will");
size_to_delete += backup->data_bytes;
if (backup->stream)
size_to_delete += backup->wal_bytes;
if (!dry_run && lock_backup(backup))
delete_backup_files(backup);
n_deleted++;
}
/* Inform about data size to free */
if (size_to_delete >= 0)
{
pretty_size(size_to_delete, size_to_delete_pretty, lengthof(size_to_delete_pretty));
elog(INFO, "Resident data size to free by delete of %i backups: %s",
n_deleted, size_to_delete_pretty);
}
/* delete selected backups */
if (!dry_run && n_deleted > 0)
elog(INFO, "Successfully deleted %i %s from instance '%s'",
n_deleted, n_deleted == 1 ? "backup" : "backups",
instance_config->name);
if (n_found == 0)
elog(WARNING, "Instance '%s' has no backups with status '%s'",
instance_config->name, pretty_status);
// we don`t do WAL purge here, because it is impossible to correctly handle
// dry-run case.
/* Cleanup */
parray_free(delete_list);
parray_walk(backup_list, pgBackupFree);
parray_free(backup_list);
}

View File

@ -315,6 +315,72 @@ pgFileGetCRC(const char *file_path, bool use_crc32c, bool missing_ok)
return crc;
}
/*
* Read the local file to compute its CRC.
* We cannot make decision about file decompression because
* user may ask to backup already compressed files and we should be
* obvious about it.
*/
pg_crc32
pgFileGetCRCgz(const char *file_path, bool use_crc32c, bool missing_ok)
{
gzFile fp;
pg_crc32 crc = 0;
char buf[STDIO_BUFSIZE];
int len = 0;
int err;
INIT_FILE_CRC32(use_crc32c, crc);
/* open file in binary read mode */
fp = gzopen(file_path, PG_BINARY_R);
if (fp == NULL)
{
if (errno == ENOENT)
{
if (missing_ok)
{
FIN_FILE_CRC32(use_crc32c, crc);
return crc;
}
}
elog(ERROR, "Cannot open file \"%s\": %s",
file_path, strerror(errno));
}
/* calc CRC of file */
for (;;)
{
if (interrupted)
elog(ERROR, "interrupted during CRC calculation");
len = gzread(fp, &buf, sizeof(buf));
if (len <= 0)
{
/* we either run into eof or error */
if (gzeof(fp))
break;
else
{
const char *err_str = NULL;
err_str = gzerror(fp, &err);
elog(ERROR, "Cannot read from compressed file %s", err_str);
}
}
/* update CRC */
COMP_FILE_CRC32(use_crc32c, crc, buf, len);
}
FIN_FILE_CRC32(use_crc32c, crc);
gzclose(fp);
return crc;
}
void
pgFileFree(void *file)
{
@ -1693,7 +1759,6 @@ write_database_map(pgBackup *backup, parray *database_map, parray *backup_files_
char database_dir[MAXPGPATH];
char database_map_path[MAXPGPATH];
// pgBackupGetPath(backup, path, lengthof(path), DATABASE_DIR);
join_path_components(database_dir, backup->root_dir, DATABASE_DIR);
join_path_components(database_map_path, database_dir, DATABASE_MAP);
@ -1717,7 +1782,7 @@ write_database_map(pgBackup *backup, parray *database_map, parray *backup_files_
file->path = pgut_strdup(DATABASE_MAP);
file->crc = pgFileGetCRC(database_map_path, true, false);
file->write_size = file->read_size;
file->write_size = file->size;
file->uncompressed_size = file->read_size;
parray_append(backup_files_list, file);
}

View File

@ -153,6 +153,8 @@ help_pg_probackup(void)
printf(_(" [--recovery-target-name=target-name]\n"));
printf(_(" [--recovery-target-action=pause|promote|shutdown]\n"));
printf(_(" [--restore-as-replica] [--force]\n"));
printf(_(" [--primary-conninfo=primary_conninfo]\n"));
printf(_(" [-S | --primary-slot-name=slotname]\n"));
printf(_(" [--no-validate] [--skip-block-validation]\n"));
printf(_(" [-T OLDDIR=NEWDIR] [--progress]\n"));
printf(_(" [--external-mapping=OLDDIR=NEWDIR]\n"));
@ -191,7 +193,8 @@ help_pg_probackup(void)
printf(_(" [--retention-redundancy=retention-redundancy]\n"));
printf(_(" [--retention-window=retention-window]\n"));
printf(_(" [--wal-depth=wal-depth]\n"));
printf(_(" [--delete-wal] [-i backup-id | --delete-expired | --merge-expired]\n"));
printf(_(" [-i backup-id | --delete-expired | --merge-expired | --status=backup_status]\n"));
printf(_(" [--delete-wal]\n"));
printf(_(" [--dry-run]\n"));
printf(_(" [--help]\n"));
@ -212,10 +215,11 @@ help_pg_probackup(void)
printf(_(" [--help]\n"));
printf(_("\n %s archive-push -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
printf(_(" --wal-file-path=wal-file-path\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" [--overwrite]\n"));
printf(_(" [--compress]\n"));
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
printf(_(" [--archive-timeout=timeout]\n"));
printf(_(" [--no-ready-rename] [--no-sync]\n"));
printf(_(" [--overwrite] [--compress]\n"));
printf(_(" [--compress-algorithm=compress-algorithm]\n"));
printf(_(" [--compress-level=compress-level]\n"));
printf(_(" [--remote-proto] [--remote-host]\n"));
@ -226,6 +230,8 @@ help_pg_probackup(void)
printf(_("\n %s archive-get -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
printf(_(" --wal-file-path=wal-file-path\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
printf(_(" [--no-validate-wal]\n"));
printf(_(" [--remote-proto] [--remote-host]\n"));
printf(_(" [--remote-port] [--remote-path] [--remote-user]\n"));
printf(_(" [--ssh-options]\n"));
@ -382,20 +388,20 @@ help_restore(void)
{
printf(_("\n%s restore -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
printf(_(" [-D pgdata-path] [-i backup-id] [-j num-threads]\n"));
printf(_(" [--progress] [--force] [--no-sync]\n"));
printf(_(" [--no-validate] [--skip-block-validation]\n"));
printf(_(" [-T OLDDIR=NEWDIR]\n"));
printf(_(" [--external-mapping=OLDDIR=NEWDIR]\n"));
printf(_(" [--skip-external-dirs]\n"));
printf(_(" [--db-include dbname | --db-exclude dbname]\n"));
printf(_(" [--recovery-target-time=time|--recovery-target-xid=xid\n"));
printf(_(" |--recovery-target-lsn=lsn [--recovery-target-inclusive=boolean]]\n"));
printf(_(" [--recovery-target-timeline=timeline]\n"));
printf(_(" [--recovery-target=immediate|latest]\n"));
printf(_(" [--recovery-target-name=target-name]\n"));
printf(_(" [--recovery-target-action=pause|promote|shutdown]\n"));
printf(_(" [--restore-as-replica] [--force]\n"));
printf(_(" [--no-validate] [--skip-block-validation]\n"));
printf(_(" [-T OLDDIR=NEWDIR] [--progress]\n"));
printf(_(" [--external-mapping=OLDDIR=NEWDIR]\n"));
printf(_(" [--skip-external-dirs]\n"));
printf(_(" [--restore-command=cmdline]\n"));
printf(_(" [--no-sync]\n"));
printf(_(" [--db-include dbname | --db-exclude dbname]\n"));
printf(_(" [-R | --restore-as-replica]\n"));
printf(_(" [--remote-proto] [--remote-host]\n"));
printf(_(" [--remote-port] [--remote-path] [--remote-user]\n"));
printf(_(" [--ssh-options]\n"));
@ -410,6 +416,22 @@ help_restore(void)
printf(_(" -j, --threads=NUM number of parallel threads\n"));
printf(_(" --progress show progress\n"));
printf(_(" --force ignore invalid status of the restored backup\n"));
printf(_(" --no-sync do not sync restored files to disk\n"));
printf(_(" --no-validate disable backup validation during restore\n"));
printf(_(" --skip-block-validation set to validate only file-level checksum\n"));
printf(_(" -T, --tablespace-mapping=OLDDIR=NEWDIR\n"));
printf(_(" relocate the tablespace from directory OLDDIR to NEWDIR\n"));
printf(_(" --external-mapping=OLDDIR=NEWDIR\n"));
printf(_(" relocate the external directory from OLDDIR to NEWDIR\n"));
printf(_(" --skip-external-dirs do not restore all external directories\n"));
printf(_("\n Partial restore options:\n"));
printf(_(" --db-include dbname restore only specified databases\n"));
printf(_(" --db-exclude dbname do not restore specified databases\n"));
printf(_("\n Recovery options:\n"));
printf(_(" --recovery-target-time=time time stamp up to which recovery will proceed\n"));
printf(_(" --recovery-target-xid=xid transaction ID up to which recovery will proceed\n"));
printf(_(" --recovery-target-lsn=lsn LSN of the write-ahead log location up to which recovery will proceed\n"));
@ -424,24 +446,15 @@ help_restore(void)
printf(_(" --recovery-target-action=pause|promote|shutdown\n"));
printf(_(" action the server should take once the recovery target is reached\n"));
printf(_(" (default: pause)\n"));
printf(_(" --restore-command=cmdline command to use as 'restore_command' in recovery.conf; 'none' disables\n"));
printf(_("\n Standby options:\n"));
printf(_(" -R, --restore-as-replica write a minimal recovery.conf in the output directory\n"));
printf(_(" to ease setting up a standby server\n"));
printf(_(" --force ignore invalid status of the restored backup\n"));
printf(_(" --no-validate disable backup validation during restore\n"));
printf(_(" --skip-block-validation set to validate only file-level checksum\n"));
printf(_(" -T, --tablespace-mapping=OLDDIR=NEWDIR\n"));
printf(_(" relocate the tablespace from directory OLDDIR to NEWDIR\n"));
printf(_(" --external-mapping=OLDDIR=NEWDIR\n"));
printf(_(" relocate the external directory from OLDDIR to NEWDIR\n"));
printf(_(" --skip-external-dirs do not restore all external directories\n"));
printf(_(" --restore-command=cmdline command to use as 'restore_command' in recovery.conf; 'none' disables\n"));
printf(_(" --no-sync do not sync restored files to disk\n"));
printf(_("\n Partial restore options:\n"));
printf(_(" --db-include dbname restore only specified databases\n"));
printf(_(" --db-exclude dbname do not restore specified databases\n"));
printf(_(" --primary-conninfo=primary_conninfo\n"));
printf(_(" connection string to be used for establishing connection\n"));
printf(_(" with the primary server\n"));
printf(_(" -S, --primary-slot-name=slotname replication slot to be used for WAL streaming from the primary server\n"));
printf(_("\n Logging options:\n"));
printf(_(" --log-level-console=log-level-console\n"));
@ -626,6 +639,7 @@ help_delete(void)
printf(_(" --wal-depth=wal-depth number of latest valid backups per timeline that must\n"));
printf(_(" retain the ability to perform PITR; 0 disables; (default: 0)\n"));
printf(_(" --dry-run perform a trial run without any changes\n"));
printf(_(" --status=backup_status delete all backups with specified status\n"));
printf(_("\n Logging options:\n"));
printf(_(" --log-level-console=log-level-console\n"));
@ -859,10 +873,11 @@ static void
help_archive_push(void)
{
printf(_("\n%s archive-push -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
printf(_(" --wal-file-path=wal-file-path\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" [--overwrite]\n"));
printf(_(" [--compress]\n"));
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
printf(_(" [--archive-timeout=timeout]\n"));
printf(_(" [--no-ready-rename] [--no-sync]\n"));
printf(_(" [--overwrite] [--compress]\n"));
printf(_(" [--compress-algorithm=compress-algorithm]\n"));
printf(_(" [--compress-level=compress-level]\n"));
printf(_(" [--remote-proto] [--remote-host]\n"));
@ -871,10 +886,13 @@ help_archive_push(void)
printf(_(" -B, --backup-path=backup-path location of the backup storage area\n"));
printf(_(" --instance=instance_name name of the instance to delete\n"));
printf(_(" --wal-file-path=wal-file-path\n"));
printf(_(" relative path name of the WAL file on the server\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" name of the WAL file to retrieve from the server\n"));
printf(_(" name of the file to copy into WAL archive\n"));
printf(_(" -j, --threads=NUM number of parallel threads\n"));
printf(_(" --batch-size=NUM number of files to be copied\n"));
printf(_(" --archive-timeout=timeout wait timeout before discarding stale temp file(default: 5min)\n"));
printf(_(" --no-ready-rename do not rename '.ready' files in 'archive_status' directory\n"));
printf(_(" --no-sync do not sync WAL file to disk\n"));
printf(_(" --overwrite overwrite archived WAL file\n"));
printf(_("\n Compression options:\n"));
@ -902,6 +920,8 @@ help_archive_get(void)
printf(_("\n%s archive-get -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
printf(_(" --wal-file-path=wal-file-path\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
printf(_(" [--no-validate-wal]\n"));
printf(_(" [--remote-proto] [--remote-host]\n"));
printf(_(" [--remote-port] [--remote-path] [--remote-user]\n"));
printf(_(" [--ssh-options]\n\n"));
@ -912,6 +932,10 @@ help_archive_get(void)
printf(_(" relative destination path name of the WAL file on the server\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" name of the WAL file to retrieve from the archive\n"));
printf(_(" -j, --threads=NUM number of parallel threads\n"));
printf(_(" --batch-size=NUM number of files to be prefetched\n"));
printf(_(" --prefetch-dir=path location of the store area for prefetched WAL files\n"));
printf(_(" --no-validate-wal skip validation of prefetched WAL file before using it\n"));
printf(_("\n Remote options:\n"));
printf(_(" --remote-proto=protocol remote protocol to use\n"));

View File

@ -103,6 +103,23 @@ do_add_instance(InstanceConfig *instance)
SOURCE_FILE);
config_set_opt(instance_options, &instance->xlog_seg_size,
SOURCE_FILE);
/* Kludge: do not save remote options into config */
config_set_opt(instance_options, &instance_config.remote.host,
SOURCE_DEFAULT);
config_set_opt(instance_options, &instance_config.remote.proto,
SOURCE_DEFAULT);
config_set_opt(instance_options, &instance_config.remote.port,
SOURCE_DEFAULT);
config_set_opt(instance_options, &instance_config.remote.path,
SOURCE_DEFAULT);
config_set_opt(instance_options, &instance_config.remote.user,
SOURCE_DEFAULT);
config_set_opt(instance_options, &instance_config.remote.ssh_options,
SOURCE_DEFAULT);
config_set_opt(instance_options, &instance_config.remote.ssh_config,
SOURCE_DEFAULT);
/* pgdata was set through command line */
do_set_config(true);

View File

@ -1132,7 +1132,7 @@ merge_data_file(parray *parent_chain, pgBackup *full_backup,
if (out == NULL)
elog(ERROR, "Cannot open merge target file \"%s\": %s",
to_fullpath_tmp1, strerror(errno));
setbuffer(out, buffer, STDIO_BUFSIZE);
setvbuf(out, buffer, _IOFBF, STDIO_BUFSIZE);
/* restore file into temp file */
tmp_file->size = restore_data_file(parent_chain, dest_file, out, to_fullpath_tmp1);

View File

@ -138,6 +138,9 @@ typedef struct
*/
bool got_target;
/* Should we read record, located at endpoint position */
bool inclusive_endpoint;
/*
* Return value from the thread.
* 0 means there is no error, 1 - there is an error.
@ -162,7 +165,8 @@ static bool RunXLogThreads(const char *archivedir,
XLogRecPtr startpoint, XLogRecPtr endpoint,
bool consistent_read,
xlog_record_function process_record,
XLogRecTarget *last_rec);
XLogRecTarget *last_rec,
bool inclusive_endpoint);
//static XLogReaderState *InitXLogThreadRead(xlog_thread_arg *arg);
static bool SwitchThreadToNextWal(XLogReaderState *xlogreader,
xlog_thread_arg *arg);
@ -231,18 +235,121 @@ static XLogRecPtr wal_target_lsn = InvalidXLogRecPtr;
* Pagemap extracting is processed using threads. Each thread reads single WAL
* file.
*/
void
extractPageMap(const char *archivedir, TimeLineID tli, uint32 wal_seg_size,
XLogRecPtr startpoint, XLogRecPtr endpoint)
bool
extractPageMap(const char *archivedir, uint32 wal_seg_size,
XLogRecPtr startpoint, TimeLineID start_tli,
XLogRecPtr endpoint, TimeLineID end_tli,
parray *tli_list)
{
bool extract_isok = true;
bool extract_isok = false;
extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId,
InvalidXLogRecPtr, tli, wal_seg_size,
startpoint, endpoint, false, extractPageInfo,
NULL);
if (!extract_isok)
elog(ERROR, "Pagemap compiling failed");
if (start_tli == end_tli)
/* easy case */
extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId,
InvalidXLogRecPtr, end_tli, wal_seg_size,
startpoint, endpoint, false, extractPageInfo,
NULL, true);
else
{
/* We have to process WAL located on several different xlog intervals,
* located on different timelines.
*
* Consider this example:
* t3 C-----X <!- We are here
* /
* t2 B---*-->
* /
* t1 -A----*------->
*
* A - prev backup START_LSN
* B - switchpoint for t2, available as t2->switchpoint
* C - switch for t3, available as t3->switchpoint
* X - current backup START_LSN
*
* Intervals to be parsed:
* - [A,B) on t1
* - [B,C) on t2
* - [C,X] on t3
*/
int i;
parray *interval_list = parray_new();
timelineInfo *end_tlinfo = NULL;
timelineInfo *tmp_tlinfo = NULL;
XLogRecPtr prev_switchpoint = InvalidXLogRecPtr;
lsnInterval *wal_interval = NULL;
/* We must find TLI information about final timeline (t3 in example) */
for (i = 0; i < parray_num(tli_list); i++)
{
tmp_tlinfo = parray_get(tli_list, i);
if (tmp_tlinfo->tli == end_tli)
{
end_tlinfo = tmp_tlinfo;
break;
}
}
/* Iterate over timelines backward,
* starting with end_tli and ending with start_tli.
* For every timeline calculate LSN-interval that must be parsed.
*/
tmp_tlinfo = end_tlinfo;
while (tmp_tlinfo)
{
wal_interval = pgut_malloc(sizeof(lsnInterval));
wal_interval->tli = tmp_tlinfo->tli;
if (tmp_tlinfo->tli == end_tli)
{
wal_interval->begin_lsn = tmp_tlinfo->switchpoint;
wal_interval->end_lsn = endpoint;
}
else if (tmp_tlinfo->tli == start_tli)
{
wal_interval->begin_lsn = startpoint;
wal_interval->end_lsn = prev_switchpoint;
}
else
{
wal_interval->begin_lsn = tmp_tlinfo->switchpoint;
wal_interval->end_lsn = prev_switchpoint;
}
prev_switchpoint = tmp_tlinfo->switchpoint;
tmp_tlinfo = tmp_tlinfo->parent_link;
parray_append(interval_list, wal_interval);
}
for (i = parray_num(interval_list) - 1; i >= 0; i--)
{
bool inclusive_endpoint;
wal_interval = parray_get(interval_list, i);
/* In case of replica promotion, endpoints of intermediate
* timelines can be unreachable.
*/
inclusive_endpoint = false;
/* ... but not the end timeline */
if (wal_interval->tli == end_tli)
inclusive_endpoint = true;
extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId,
InvalidXLogRecPtr, wal_interval->tli, wal_seg_size,
wal_interval->begin_lsn, wal_interval->end_lsn,
false, extractPageInfo, NULL, inclusive_endpoint);
if (!extract_isok)
break;
pg_free(wal_interval);
}
pg_free(interval_list);
}
return extract_isok;
}
/*
@ -262,7 +369,7 @@ validate_backup_wal_from_start_to_stop(pgBackup *backup,
got_endpoint = RunXLogThreads(archivedir, 0, InvalidTransactionId,
InvalidXLogRecPtr, tli, xlog_seg_size,
backup->start_lsn, backup->stop_lsn,
false, NULL, NULL);
false, NULL, NULL, true);
if (!got_endpoint)
{
@ -349,7 +456,7 @@ validate_wal(pgBackup *backup, const char *archivedir,
* If recovery target is provided, ensure that archive files exist in
* archive directory.
*/
if (dir_is_empty(archivedir, FIO_BACKUP_HOST))
if (dir_is_empty(archivedir, FIO_LOCAL_HOST))
elog(ERROR, "WAL archive is empty. You cannot restore backup to a recovery target without WAL archive.");
/*
@ -373,7 +480,7 @@ validate_wal(pgBackup *backup, const char *archivedir,
all_wal = all_wal ||
RunXLogThreads(archivedir, target_time, target_xid, target_lsn,
tli, wal_seg_size, backup->stop_lsn,
InvalidXLogRecPtr, true, validateXLogRecord, &last_rec);
InvalidXLogRecPtr, true, validateXLogRecord, &last_rec, true);
if (last_rec.rec_time > 0)
time2iso(last_timestamp, lengthof(last_timestamp),
timestamptz_to_time_t(last_rec.rec_time));
@ -753,20 +860,35 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
if (!reader_data->xlogexists)
{
char xlogfname[MAXFNAMELEN];
char partial_file[MAXPGPATH];
GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno,
wal_seg_size);
snprintf(reader_data->xlogpath, MAXPGPATH, "%s/%s", wal_archivedir,
xlogfname);
GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno, wal_seg_size);
if (fileExists(reader_data->xlogpath, FIO_BACKUP_HOST))
snprintf(reader_data->xlogpath, MAXPGPATH, "%s/%s", wal_archivedir, xlogfname);
snprintf(reader_data->gz_xlogpath, MAXPGPATH, "%s.gz", reader_data->xlogpath);
/* We fall back to using .partial segment in case if we are running
* multi-timeline incremental backup right after standby promotion.
* TODO: it should be explicitly enabled.
*/
snprintf(partial_file, MAXPGPATH, "%s.partial", reader_data->xlogpath);
/* If segment do not exists, but the same
* segment with '.partial' suffix does, use it instead */
if (!fileExists(reader_data->xlogpath, FIO_LOCAL_HOST) &&
fileExists(partial_file, FIO_LOCAL_HOST))
{
snprintf(reader_data->xlogpath, MAXPGPATH, "%s", partial_file);
}
if (fileExists(reader_data->xlogpath, FIO_LOCAL_HOST))
{
elog(LOG, "Thread [%d]: Opening WAL segment \"%s\"",
reader_data->thread_num, reader_data->xlogpath);
reader_data->xlogexists = true;
reader_data->xlogfile = fio_open(reader_data->xlogpath,
O_RDONLY | PG_BINARY, FIO_BACKUP_HOST);
O_RDONLY | PG_BINARY, FIO_LOCAL_HOST);
if (reader_data->xlogfile < 0)
{
@ -778,29 +900,23 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
}
#ifdef HAVE_LIBZ
/* Try to open compressed WAL segment */
else
else if (fileExists(reader_data->gz_xlogpath, FIO_LOCAL_HOST))
{
snprintf(reader_data->gz_xlogpath, sizeof(reader_data->gz_xlogpath),
"%s.gz", reader_data->xlogpath);
if (fileExists(reader_data->gz_xlogpath, FIO_BACKUP_HOST))
{
elog(LOG, "Thread [%d]: Opening compressed WAL segment \"%s\"",
reader_data->thread_num, reader_data->gz_xlogpath);
elog(LOG, "Thread [%d]: Opening compressed WAL segment \"%s\"",
reader_data->thread_num, reader_data->gz_xlogpath);
reader_data->xlogexists = true;
reader_data->gz_xlogfile = fio_gzopen(reader_data->gz_xlogpath,
"rb", -1, FIO_BACKUP_HOST);
if (reader_data->gz_xlogfile == NULL)
{
elog(WARNING, "Thread [%d]: Could not open compressed WAL segment \"%s\": %s",
reader_data->thread_num, reader_data->gz_xlogpath,
strerror(errno));
return -1;
}
reader_data->xlogexists = true;
reader_data->gz_xlogfile = fio_gzopen(reader_data->gz_xlogpath,
"rb", -1, FIO_LOCAL_HOST);
if (reader_data->gz_xlogfile == NULL)
{
elog(WARNING, "Thread [%d]: Could not open compressed WAL segment \"%s\": %s",
reader_data->thread_num, reader_data->gz_xlogpath,
strerror(errno));
return -1;
}
}
#endif
/* Exit without error if WAL segment doesn't exist */
if (!reader_data->xlogexists)
return -1;
@ -923,7 +1039,7 @@ RunXLogThreads(const char *archivedir, time_t target_time,
TransactionId target_xid, XLogRecPtr target_lsn, TimeLineID tli,
uint32 segment_size, XLogRecPtr startpoint, XLogRecPtr endpoint,
bool consistent_read, xlog_record_function process_record,
XLogRecTarget *last_rec)
XLogRecTarget *last_rec, bool inclusive_endpoint)
{
pthread_t *threads;
xlog_thread_arg *thread_args;
@ -932,17 +1048,25 @@ RunXLogThreads(const char *archivedir, time_t target_time,
XLogSegNo endSegNo = 0;
bool result = true;
if (!XRecOffIsValid(startpoint))
if (!XRecOffIsValid(startpoint) && !XRecOffIsNull(startpoint))
elog(ERROR, "Invalid startpoint value %X/%X",
(uint32) (startpoint >> 32), (uint32) (startpoint));
if (!XLogRecPtrIsInvalid(endpoint))
{
if (!XRecOffIsValid(endpoint))
// if (XRecOffIsNull(endpoint) && !inclusive_endpoint)
if (XRecOffIsNull(endpoint))
{
GetXLogSegNo(endpoint, endSegNo, segment_size);
endSegNo--;
}
else if (!XRecOffIsValid(endpoint))
{
elog(ERROR, "Invalid endpoint value %X/%X",
(uint32) (endpoint >> 32), (uint32) (endpoint));
GetXLogSegNo(endpoint, endSegNo, segment_size);
}
else
GetXLogSegNo(endpoint, endSegNo, segment_size);
}
/* Initialize static variables for workers */
@ -977,6 +1101,7 @@ RunXLogThreads(const char *archivedir, time_t target_time,
arg->startpoint = startpoint;
arg->endpoint = endpoint;
arg->endSegNo = endSegNo;
arg->inclusive_endpoint = inclusive_endpoint;
arg->got_target = false;
/* By default there is some error */
arg->ret = 1;
@ -1192,6 +1317,18 @@ XLogThreadWorker(void *arg)
reader_data->thread_num,
(uint32) (errptr >> 32), (uint32) (errptr));
/* In we failed to read record located at endpoint position,
* and endpoint is not inclusive, do not consider this as an error.
*/
if (!thread_arg->inclusive_endpoint &&
errptr == thread_arg->endpoint)
{
elog(LOG, "Thread [%d]: Endpoint %X/%X is not inclusive, switch to the next timeline",
reader_data->thread_num,
(uint32) (thread_arg->endpoint >> 32), (uint32) (thread_arg->endpoint));
break;
}
/*
* If we don't have all WAL files from prev backup start_lsn to current
* start_lsn, we won't be able to build page map and PAGE backup will
@ -1583,3 +1720,28 @@ getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
return false;
}
bool validate_wal_segment(TimeLineID tli, XLogSegNo segno, const char *prefetch_dir, uint32 wal_seg_size)
{
XLogRecPtr startpoint;
XLogRecPtr endpoint;
bool rc;
int tmp_num_threads = num_threads;
num_threads = 1;
/* calculate startpoint and endpoint */
GetXLogRecPtr(segno, 0, wal_seg_size, startpoint);
GetXLogRecPtr(segno+1, 0, wal_seg_size, endpoint);
/* disable multi-threading */
num_threads = 1;
rc = RunXLogThreads(prefetch_dir, 0, InvalidTransactionId,
InvalidXLogRecPtr, tli, wal_seg_size,
startpoint, endpoint, false, NULL, NULL, true);
num_threads = tmp_num_threads;
return rc;
}

View File

@ -117,7 +117,7 @@ bool delete_expired = false;
bool merge_expired = false;
bool force = false;
bool dry_run = false;
static char *delete_status = NULL;
/* compression options */
bool compress_shortcut = false;
@ -125,9 +125,15 @@ bool compress_shortcut = false;
char *instance_name;
/* archive push options */
int batch_size = 1;
static char *wal_file_path;
static char *wal_file_name;
static bool file_overwrite = false;
static bool file_overwrite = false;
static bool no_ready_rename = false;
/* archive get options */
static char *prefetch_dir;
bool no_validate_wal = false;
/* show options */
ShowFormat show_format = SHOW_PLAIN;
@ -189,13 +195,14 @@ static ConfigOption cmd_options[] =
{ 'f', 155, "external-mapping", opt_externaldir_map, SOURCE_CMD_STRICT },
{ 's', 141, "recovery-target-name", &target_name, SOURCE_CMD_STRICT },
{ 's', 142, "recovery-target-action", &target_action, SOURCE_CMD_STRICT },
{ 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMD_STRICT },
{ 'b', 143, "no-validate", &no_validate, SOURCE_CMD_STRICT },
{ 'b', 154, "skip-block-validation", &skip_block_validation, SOURCE_CMD_STRICT },
{ 'b', 156, "skip-external-dirs", &skip_external_dirs, SOURCE_CMD_STRICT },
{ 'f', 158, "db-include", opt_datname_include_list, SOURCE_CMD_STRICT },
{ 'f', 159, "db-exclude", opt_datname_exclude_list, SOURCE_CMD_STRICT },
{ 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMD_STRICT },
{ 's', 160, "primary-conninfo", &primary_conninfo, SOURCE_CMD_STRICT },
{ 's', 'S', "primary-slot-name",&replication_slot, SOURCE_CMD_STRICT },
/* checkdb options */
{ 'b', 195, "amcheck", &need_amcheck, SOURCE_CMD_STRICT },
{ 'b', 196, "heapallindexed", &heapallindexed, SOURCE_CMD_STRICT },
@ -203,6 +210,8 @@ static ConfigOption cmd_options[] =
/* delete options */
{ 'b', 145, "wal", &delete_wal, SOURCE_CMD_STRICT },
{ 'b', 146, "expired", &delete_expired, SOURCE_CMD_STRICT },
{ 's', 172, "status", &delete_status, SOURCE_CMD_STRICT },
/* TODO not implemented yet */
{ 'b', 147, "force", &force, SOURCE_CMD_STRICT },
/* compression options */
@ -216,9 +225,14 @@ static ConfigOption cmd_options[] =
{ 's', 150, "wal-file-path", &wal_file_path, SOURCE_CMD_STRICT },
{ 's', 151, "wal-file-name", &wal_file_name, SOURCE_CMD_STRICT },
{ 'b', 152, "overwrite", &file_overwrite, SOURCE_CMD_STRICT },
{ 'b', 153, "no-ready-rename", &no_ready_rename, SOURCE_CMD_STRICT },
{ 'i', 162, "batch-size", &batch_size, SOURCE_CMD_STRICT },
/* archive-get options */
{ 's', 163, "prefetch-dir", &prefetch_dir, SOURCE_CMD_STRICT },
{ 'b', 164, "no-validate-wal", &no_validate_wal, SOURCE_CMD_STRICT },
/* show options */
{ 'f', 153, "format", opt_show_format, SOURCE_CMD_STRICT },
{ 'b', 161, "archive", &show_archive, SOURCE_CMD_STRICT },
{ 'f', 165, "format", opt_show_format, SOURCE_CMD_STRICT },
{ 'b', 166, "archive", &show_archive, SOURCE_CMD_STRICT },
/* set-backup options */
{ 'I', 170, "ttl", &ttl, SOURCE_CMD_STRICT, SOURCE_DEFAULT, 0, OPTION_UNIT_S, option_get_value},
{ 's', 171, "expire-time", &expire_time_string, SOURCE_CMD_STRICT },
@ -262,9 +276,6 @@ main(int argc, char *argv[])
{
char *command = NULL,
*command_name;
/* Check if backup_path is directory. */
struct stat stat_buf;
int rc;
PROGRAM_NAME_FULL = argv[0];
@ -444,11 +455,6 @@ main(int argc, char *argv[])
/* Ensure that backup_path is an absolute path */
if (!is_absolute_path(backup_path))
elog(ERROR, "-B, --backup-path must be an absolute path");
/* Ensure that backup_path is a path to a directory */
rc = stat(backup_path, &stat_buf);
if (rc != -1 && !S_ISDIR(stat_buf.st_mode))
elog(ERROR, "-B, --backup-path must be a path to directory");
}
/* Ensure that backup_path is an absolute path */
@ -500,12 +506,16 @@ main(int argc, char *argv[])
/*
* Ensure that requested backup instance exists.
* for all commands except init, which doesn't take this parameter
* and add-instance which creates new instance.
* for all commands except init, which doesn't take this parameter,
* add-instance which creates new instance
* and archive-get, which just do not require it at this point
*/
if (backup_subcmd != INIT_CMD && backup_subcmd != ADD_INSTANCE_CMD)
if (backup_subcmd != INIT_CMD && backup_subcmd != ADD_INSTANCE_CMD &&
backup_subcmd != ARCHIVE_GET_CMD)
{
if (fio_access(backup_instance_path, F_OK, FIO_BACKUP_HOST) != 0)
struct stat st;
if (fio_stat(backup_instance_path, &st, true, FIO_BACKUP_HOST) != 0)
{
elog(WARNING, "Failed to access directory \"%s\": %s",
backup_instance_path, strerror(errno));
@ -514,6 +524,12 @@ main(int argc, char *argv[])
elog(ERROR, "Instance '%s' does not exist in this backup catalog",
instance_name);
}
else
{
/* Ensure that backup_path is a path to a directory */
if (!S_ISDIR(st.st_mode))
elog(ERROR, "-B, --backup-path must be a path to directory");
}
}
}
@ -529,7 +545,8 @@ main(int argc, char *argv[])
config_get_opt_env(instance_options);
/* Read options from configuration file */
if (backup_subcmd != ADD_INSTANCE_CMD)
if (backup_subcmd != ADD_INSTANCE_CMD &&
backup_subcmd != ARCHIVE_GET_CMD)
{
join_path_components(path, backup_instance_path,
BACKUP_CATALOG_CONF_FILE);
@ -680,12 +697,16 @@ main(int argc, char *argv[])
if (force)
no_validate = true;
if (replication_slot != NULL)
restore_as_replica = true;
/* keep all params in one structure */
restore_params = pgut_new(pgRestoreParams);
restore_params->is_restore = (backup_subcmd == RESTORE_CMD);
restore_params->force = force;
restore_params->no_validate = no_validate;
restore_params->restore_as_replica = restore_as_replica;
restore_params->primary_slot_name = replication_slot;
restore_params->skip_block_validation = skip_block_validation;
restore_params->skip_external_dirs = skip_external_dirs;
restore_params->partial_db_list = NULL;
@ -743,17 +764,22 @@ main(int argc, char *argv[])
if (num_threads < 1)
num_threads = 1;
if (batch_size < 1)
batch_size = 1;
compress_init();
/* do actual operation */
switch (backup_subcmd)
{
case ARCHIVE_PUSH_CMD:
return do_archive_push(&instance_config, wal_file_path,
wal_file_name, file_overwrite);
do_archive_push(&instance_config, wal_file_path, wal_file_name,
batch_size, file_overwrite, no_sync, no_ready_rename);
break;
case ARCHIVE_GET_CMD:
return do_archive_get(&instance_config,
wal_file_path, wal_file_name);
do_archive_get(&instance_config, prefetch_dir,
wal_file_path, wal_file_name, batch_size, !no_validate_wal);
break;
case ADD_INSTANCE_CMD:
return do_add_instance(&instance_config);
case DELETE_INSTANCE_CMD:
@ -799,13 +825,20 @@ main(int argc, char *argv[])
elog(ERROR, "You cannot specify --delete-expired and (-i, --backup-id) options together");
if (merge_expired && backup_id_string)
elog(ERROR, "You cannot specify --merge-expired and (-i, --backup-id) options together");
if (!delete_expired && !merge_expired && !delete_wal && !backup_id_string)
if (delete_status && backup_id_string)
elog(ERROR, "You cannot specify --status and (-i, --backup-id) options together");
if (!delete_expired && !merge_expired && !delete_wal && delete_status == NULL && !backup_id_string)
elog(ERROR, "You must specify at least one of the delete options: "
"--delete-expired |--delete-wal |--merge-expired |(-i, --backup-id)");
"--delete-expired |--delete-wal |--merge-expired |--status |(-i, --backup-id)");
if (!backup_id_string)
return do_retention();
{
if (delete_status)
do_delete_status(&instance_config, delete_status);
else
do_retention();
}
else
do_delete(current.backup_id);
do_delete(current.backup_id);
break;
case MERGE_CMD:
do_merge(current.backup_id);

View File

@ -67,7 +67,6 @@ extern const char *PROGRAM_EMAIL;
#define DATABASE_MAP "database_map"
/* Timeout defaults */
#define PARTIAL_WAL_TIMER 60
#define ARCHIVE_TIMEOUT_DEFAULT 300
#define REPLICA_TIMEOUT_DEFAULT 300
@ -86,7 +85,10 @@ extern const char *PROGRAM_EMAIL;
#endif
/* stdio buffer size */
#define STDIO_BUFSIZE 65536
#define STDIO_BUFSIZE 65536
/* retry attempts */
#define PAGE_READ_ATTEMPTS 100
/* Check if an XLogRecPtr value is pointed to 0 offset */
#define XRecOffIsNull(xlrp) \
@ -170,7 +172,8 @@ typedef struct pgFile
bool exists_in_prev; /* Mark files, both data and regular, that exists in previous backup */
CompressAlg compress_alg; /* compression algorithm applied to the file */
volatile pg_atomic_flag lock;/* lock for synchronization of parallel threads */
datapagemap_t pagemap; /* bitmap of pages updated since previous backup */
datapagemap_t pagemap; /* bitmap of pages updated since previous backup
may take up to 16kB per file */
bool pagemap_isabsent; /* Used to mark files with unknown state of pagemap,
* i.e. datafiles without _ptrack */
} pgFile;
@ -421,6 +424,7 @@ typedef struct pgRestoreParams
bool skip_external_dirs;
bool skip_block_validation; //Start using it
const char *restore_command;
const char *primary_slot_name;
/* options for partial restore */
PartialRestoreType partial_restore_type;
@ -473,7 +477,7 @@ struct timelineInfo {
TimeLineID tli; /* this timeline */
TimeLineID parent_tli; /* parent timeline. 0 if none */
timelineInfo *parent_link; /* link to parent timeline */
XLogRecPtr switchpoint; /* if this timeline has a parent
XLogRecPtr switchpoint; /* if this timeline has a parent, then
* switchpoint contains switchpoint LSN,
* otherwise 0 */
XLogSegNo begin_segno; /* first present segment in this timeline */
@ -499,6 +503,13 @@ typedef struct xlogInterval
XLogSegNo end_segno;
} xlogInterval;
typedef struct lsnInterval
{
TimeLineID tli;
XLogRecPtr begin_lsn;
XLogRecPtr end_lsn;
} lsnInterval;
typedef enum xlogFileType
{
SEGMENT,
@ -529,9 +540,9 @@ typedef struct BackupPageHeader
/* Special value for compressed_size field */
#define PageIsOk 0
#define SkipCurrentPage -1
#define PageIsTruncated -2
#define SkipCurrentPage -3
#define PageIsCorrupted -4 /* used by checkdb */
#define PageIsCorrupted -3 /* used by checkdb */
/*
@ -571,6 +582,9 @@ typedef struct BackupPageHeader
#define GetXLogSegNoFromScrath(logSegNo, log, seg, wal_segsz_bytes) \
logSegNo = (uint64) log * XLogSegmentsPerXLogId(wal_segsz_bytes) + seg
#define GetXLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \
XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes)
#else
#define GetXLogSegNo(xlrp, logSegNo, wal_segsz_bytes) \
XLByteToSeg(xlrp, logSegNo)
@ -587,6 +601,9 @@ typedef struct BackupPageHeader
#define GetXLogSegNoFromScrath(logSegNo, log, seg, wal_segsz_bytes) \
logSegNo = (uint64) log * XLogSegmentsPerXLogId + seg
#define GetXLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \
XLogFromFileName(fname, tli, logSegNo)
#endif
#define IsSshProtocol() (instance_config.remote.host && strcmp(instance_config.remote.proto, "ssh") == 0)
@ -690,10 +707,11 @@ extern int do_init(void);
extern int do_add_instance(InstanceConfig *instance);
/* in archive.c */
extern int do_archive_push(InstanceConfig *instance, char *wal_file_path,
char *wal_file_name, bool overwrite);
extern int do_archive_get(InstanceConfig *instance, char *wal_file_path,
char *wal_file_name);
extern void do_archive_push(InstanceConfig *instance, char *wal_file_path,
char *wal_file_name, int batch_size, bool overwrite,
bool no_sync, bool no_ready_rename);
extern void do_archive_get(InstanceConfig *instance, const char *prefetch_dir_arg, char *wal_file_path,
char *wal_file_name, int batch_size, bool validate_wal);
/* in configure.c */
extern void do_show_config(void);
@ -707,8 +725,9 @@ extern int do_show(const char *instance_name, time_t requested_backup_id, bool s
/* in delete.c */
extern void do_delete(time_t backup_id);
extern void delete_backup_files(pgBackup *backup);
extern int do_retention(void);
extern void do_retention(void);
extern int do_delete_instance(void);
extern void do_delete_status(InstanceConfig *instance_config, const char *status);
/* in fetch.c */
extern char *slurpFile(const char *datadir,
@ -725,6 +744,18 @@ extern void help_command(char *command);
/* in validate.c */
extern void pgBackupValidate(pgBackup* backup, pgRestoreParams *params);
extern int do_validate_all(void);
extern int validate_one_page(Page page, BlockNumber absolute_blkno,
XLogRecPtr stop_lsn, XLogRecPtr *page_lsn,
uint32 checksum_version);
/* return codes for validate_one_page */
/* TODO: use enum */
#define PAGE_IS_VALID (-1)
#define PAGE_IS_NOT_FOUND (-2)
#define PAGE_IS_ZEROED (-3)
#define PAGE_HEADER_IS_INVALID (-4)
#define PAGE_CHECKSUM_MISMATCH (-5)
#define PAGE_LSN_FROM_FUTURE (-6)
/* in catalog.c */
extern pgBackup *read_backup(const char *instance_name, time_t timestamp);
@ -743,6 +774,10 @@ extern void catalog_lock_backup_list(parray *backup_list, int from_idx,
extern pgBackup *catalog_get_last_data_backup(parray *backup_list,
TimeLineID tli,
time_t current_start_time);
extern pgBackup *get_multi_timeline_parent(parray *backup_list, parray *tli_list,
TimeLineID current_tli, time_t current_start_time,
InstanceConfig *instance);
extern void timelineInfoFree(void *tliInfo);
extern parray *catalog_get_timelines(InstanceConfig *instance);
extern void do_set_backup(const char *instance_name, time_t backup_id,
pgSetBackupParams *set_backup_params);
@ -769,10 +804,15 @@ extern int pgBackupCompareIdEqual(const void *l, const void *r);
extern pgBackup* find_parent_full_backup(pgBackup *current_backup);
extern int scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup);
/* return codes for scan_parent_chain */
#define ChainIsBroken 0
#define ChainIsInvalid 1
#define ChainIsOk 2
extern bool is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive);
extern bool is_prolific(parray *backup_list, pgBackup *target_backup);
extern bool in_backup_list(parray *backup_list, pgBackup *target_backup);
extern int get_backup_index_number(parray *backup_list, pgBackup *backup);
extern void append_children(parray *backup_list, pgBackup *target_backup, parray *append_list);
extern bool launch_agent(void);
extern void launch_ssh(char* argv[]);
extern void wait_ssh(void);
@ -832,6 +872,7 @@ extern void pgFileDelete(pgFile *file, const char *full_path);
extern void pgFileFree(void *file);
extern pg_crc32 pgFileGetCRC(const char *file_path, bool missing_ok, bool use_crc32c);
extern pg_crc32 pgFileGetCRCgz(const char *file_path, bool missing_ok, bool use_crc32c);
extern int pgFileCompareName(const void *f1, const void *f2);
extern int pgFileComparePath(const void *f1, const void *f2);
@ -877,13 +918,16 @@ extern bool create_empty_file(fio_location from_location, const char *to_root,
extern bool check_file_pages(pgFile *file, XLogRecPtr stop_lsn,
uint32 checksum_version, uint32 backup_version);
/* parsexlog.c */
extern void extractPageMap(const char *archivedir,
TimeLineID tli, uint32 seg_size,
XLogRecPtr startpoint, XLogRecPtr endpoint);
extern bool extractPageMap(const char *archivedir, uint32 wal_seg_size,
XLogRecPtr startpoint, TimeLineID start_tli,
XLogRecPtr endpoint, TimeLineID end_tli,
parray *tli_list);
extern void validate_wal(pgBackup *backup, const char *archivedir,
time_t target_time, TransactionId target_xid,
XLogRecPtr target_lsn, TimeLineID tli,
uint32 seg_size);
extern bool validate_wal_segment(TimeLineID tli, XLogSegNo segno,
const char *prefetch_dir, uint32 wal_seg_size);
extern bool read_recovery_info(const char *archivedir, TimeLineID tli,
uint32 seg_size,
XLogRecPtr start_lsn, XLogRecPtr stop_lsn,
@ -913,6 +957,7 @@ extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_loc
extern void time2iso(char *buf, size_t len, time_t time);
extern const char *status2str(BackupStatus status);
extern BackupStatus str2status(const char *status);
extern const char *base36enc(long unsigned int value);
extern char *base36enc_dup(long unsigned int value);
extern long unsigned int base36dec(const char *text);
@ -925,7 +970,7 @@ extern int32 do_decompress(void* dst, size_t dst_size, void const* src, size_t
CompressAlg alg, const char **errormsg);
extern void pretty_size(int64 size, char *buf, size_t len);
extern void pretty_time_interval(int64 num_seconds, char *buf, size_t len);
extern void pretty_time_interval(double time, char *buf, size_t len);
extern PGconn *pgdata_basic_setup(ConnectionOptions conn_opt, PGNodeInfo *nodeInfo);
extern void check_system_identifiers(PGconn *conn, char *pgdata);
@ -948,8 +993,30 @@ extern char *pg_ptrack_get_and_clear(Oid tablespace_oid,
extern XLogRecPtr get_last_ptrack_lsn(PGconn *backup_conn, PGNodeInfo *nodeInfo);
extern parray * pg_ptrack_get_pagemapset(PGconn *backup_conn, const char *ptrack_schema, XLogRecPtr lsn);
#ifdef WIN32
#define setbuffer(stream, buf, size) setvbuf(stream, buf, buf ? _IOFBF : _IONBF, size);
#endif
/* FIO */
extern int fio_send_pages(FILE* in, FILE* out, pgFile *file, XLogRecPtr horizonLsn,
int calg, int clevel, uint32 checksum_version,
datapagemap_t *pagemap, BlockNumber* err_blknum, char **errormsg);
/* return codes for fio_send_pages */
#define OUT_BUF_SIZE (1024 * 1024)
extern int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num);
extern int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num);
/* return codes for fio_send_pages() and fio_send_file() */
#define SEND_OK (0)
#define FILE_MISSING (-1)
#define OPEN_FAILED (-2)
#define READ_FAILED (-3)
#define WRITE_FAILED (-4)
#define ZLIB_ERROR (-5)
#define REMOTE_ERROR (-6)
#define PAGE_CORRUPTION (-8)
/* Check if specified location is local for current node */
extern bool fio_is_remote(fio_location location);
extern void get_header_errormsg(Page page, char **errormsg);
extern void get_checksum_errormsg(Page page, char **errormsg,
BlockNumber absolute_blkno);
#endif /* PG_PROBACKUP_H */

View File

@ -251,7 +251,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
result = scan_parent_chain(dest_backup, &tmp_backup);
if (result == 0)
if (result == ChainIsBroken)
{
/* chain is broken, determine missing backup ID
* and orphinize all his descendants
@ -290,7 +290,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
/* No point in doing futher */
elog(ERROR, "%s of backup %s failed.", action, base36enc(dest_backup->start_time));
}
else if (result == 1)
else if (result == ChainIsInvalid)
{
/* chain is intact, but at least one parent is invalid */
set_orphan_status(backups, tmp_backup);
@ -403,7 +403,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
*/
validate_wal(dest_backup, arclog_path, rt->target_time,
rt->target_xid, rt->target_lsn,
base_full_backup->tli, instance_config.xlog_seg_size);
dest_backup->tli, instance_config.xlog_seg_size);
}
/* Orphanize every OK descendant of corrupted backup */
else
@ -472,6 +472,9 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
create_recovery_conf(target_backup_id, rt, dest_backup, params);
}
/* ssh connection to longer needed */
fio_disconnect();
/* cleanup */
parray_walk(backups, pgBackupFree);
parray_free(backups);
@ -854,7 +857,7 @@ restore_files(void *arg)
goto done;
if (!fio_is_remote_file(out))
setbuffer(out, buffer, STDIO_BUFSIZE);
setvbuf(out, buffer, _IOFBF, STDIO_BUFSIZE);
/* Restore destination file */
if (dest_file->is_datafile && !dest_file->is_cfs)
@ -977,6 +980,7 @@ create_recovery_conf(time_t backup_id,
/* construct restore_command */
if (pitr_requested)
{
fio_fprintf(fp, "\n## recovery settings\n");
/* If restore_command is provided, use it. Otherwise construct it from scratch. */
if (restore_command_provided)
sprintf(restore_command_guc, "%s", instance_config.restore_command);
@ -1052,8 +1056,15 @@ create_recovery_conf(time_t backup_id,
fio_fprintf(fp, "recovery_target_action = '%s'\n", "pause");
}
if (pitr_requested)
{
elog(LOG, "Setting restore_command to '%s'", restore_command_guc);
fio_fprintf(fp, "restore_command = '%s'\n", restore_command_guc);
}
if (params->restore_as_replica)
{
fio_fprintf(fp, "\n## standby settings\n");
/* standby_mode was removed in PG12 */
#if PG_VERSION_NUM < 120000
fio_fprintf(fp, "standby_mode = 'on'\n");
@ -1063,12 +1074,9 @@ create_recovery_conf(time_t backup_id,
fio_fprintf(fp, "primary_conninfo = '%s'\n", params->primary_conninfo);
else if (backup->primary_conninfo)
fio_fprintf(fp, "primary_conninfo = '%s'\n", backup->primary_conninfo);
}
if (pitr_requested)
{
elog(LOG, "Setting restore_command to '%s'", restore_command_guc);
fio_fprintf(fp, "restore_command = '%s'\n", restore_command_guc);
if (params->primary_slot_name != NULL)
fio_fprintf(fp, "primary_slot_name = '%s'\n", params->primary_slot_name);
}
if (fio_fflush(fp) != 0 ||
@ -1318,7 +1326,7 @@ satisfy_timeline(const parray *timelines, const pgBackup *backup)
timeline = (TimeLineHistoryEntry *) parray_get(timelines, i);
if (backup->tli == timeline->tli &&
(XLogRecPtrIsInvalid(timeline->end) ||
backup->stop_lsn < timeline->end))
backup->stop_lsn <= timeline->end))
return true;
}
return false;

View File

@ -191,14 +191,18 @@ pretty_size(int64 size, char *buf, size_t len)
}
void
pretty_time_interval(int64 num_seconds, char *buf, size_t len)
pretty_time_interval(double time, char *buf, size_t len)
{
int seconds = 0;
int minutes = 0;
int hours = 0;
int days = 0;
int num_seconds = 0;
int milliseconds = 0;
int seconds = 0;
int minutes = 0;
int hours = 0;
int days = 0;
if (num_seconds <= 0)
num_seconds = (int) time;
if (time <= 0)
{
strncpy(buf, "0", len);
return;
@ -214,6 +218,7 @@ pretty_time_interval(int64 num_seconds, char *buf, size_t len)
num_seconds %= 60;
seconds = num_seconds;
milliseconds = (int)((time - (int) time) * 1000.0);
if (days > 0)
{
@ -233,7 +238,16 @@ pretty_time_interval(int64 num_seconds, char *buf, size_t len)
return;
}
snprintf(buf, len, "%ds", seconds);
if (seconds > 0)
{
if (milliseconds > 0)
snprintf(buf, len, "%ds:%dms", seconds, milliseconds);
else
snprintf(buf, len, "%ds", seconds);
return;
}
snprintf(buf, len, "%dms", milliseconds);
return;
}

View File

@ -18,6 +18,21 @@
#include <sys/stat.h>
static const char *statusName[] =
{
"UNKNOWN",
"OK",
"ERROR",
"RUNNING",
"MERGING",
"MERGED",
"DELETING",
"DELETED",
"DONE",
"ORPHAN",
"CORRUPT"
};
const char *
base36enc(long unsigned int value)
{
@ -462,22 +477,21 @@ parse_program_version(const char *program_version)
const char *
status2str(BackupStatus status)
{
static const char *statusName[] =
{
"UNKNOWN",
"OK",
"ERROR",
"RUNNING",
"MERGING",
"MERGED",
"DELETING",
"DELETED",
"DONE",
"ORPHAN",
"CORRUPT"
};
if (status < BACKUP_STATUS_INVALID || BACKUP_STATUS_CORRUPT < status)
return "UNKNOWN";
return statusName[status];
}
BackupStatus
str2status(const char *status)
{
BackupStatus i;
for (i = BACKUP_STATUS_INVALID; i <= BACKUP_STATUS_CORRUPT; i++)
{
if (pg_strcasecmp(status, statusName[i]) == 0) return i;
}
return BACKUP_STATUS_INVALID;
}

View File

@ -14,7 +14,7 @@
#define PRINTF_BUF_SIZE 1024
#define FILE_PERMISSIONS 0600
#define PAGE_READ_ATTEMPTS 100
#define CHUNK_SIZE 1024 * 128
static __thread unsigned long fio_fdset = 0;
static __thread void* fio_stdin_buffer;
@ -27,11 +27,12 @@ fio_location MyLocation;
typedef struct
{
BlockNumber nblocks;
BlockNumber segBlockNum;
BlockNumber segmentno;
XLogRecPtr horizonLsn;
uint32 checksumVersion;
int calg;
int clevel;
int bitmapsize;
} fio_send_request;
@ -114,6 +115,7 @@ fio_safestat(const char *path, struct stat *buf)
#define stat(x, y) fio_safestat(x, y)
/* TODO: use real pread on Linux */
static ssize_t pread(int fd, void* buf, size_t size, off_t off)
{
off_t rc = lseek(fd, off, SEEK_SET);
@ -135,7 +137,7 @@ static int remove_file_or_dir(char const* path)
#endif
/* Check if specified location is local for current node */
static bool fio_is_remote(fio_location location)
bool fio_is_remote(fio_location location)
{
bool is_remote = MyLocation != FIO_LOCAL_HOST
&& location != FIO_LOCAL_HOST
@ -339,7 +341,10 @@ int fio_open(char const* path, int mode, fio_location location)
hdr.cop = FIO_OPEN;
hdr.handle = i;
hdr.size = strlen(path) + 1;
hdr.arg = mode & ~O_EXCL;
hdr.arg = mode;
// hdr.arg = mode & ~O_EXCL;
// elog(INFO, "PATH: %s MODE: %i, %i", path, mode, O_EXCL);
// elog(INFO, "MODE: %i", hdr.arg);
fio_fdset |= 1 << i;
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
@ -368,6 +373,12 @@ fio_disconnect(void)
{
if (fio_stdin)
{
fio_header hdr;
hdr.cop = FIO_DISCONNECT;
hdr.size = 0;
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr));
Assert(hdr.cop == FIO_DISCONNECTED);
SYS_CHECK(close(fio_stdin));
SYS_CHECK(close(fio_stdout));
fio_stdin = 0;
@ -483,6 +494,7 @@ int fio_close(int fd)
fio_fdset &= ~(1 << hdr.handle);
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
/* Note, that file is closed without waiting for confirmation */
return 0;
}
@ -545,13 +557,14 @@ int fio_pread(FILE* f, void* buf, off_t offs)
if (hdr.size != 0)
IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size);
/* TODO: error handling */
return hdr.arg;
}
else
{
/* For local file, opened by fopen, we should use stdio operations */
int rc;
rc = fseek(f, offs, SEEK_SET);
/* For local file, opened by fopen, we should use stdio functions */
int rc = fseek(f, offs, SEEK_SET);
if (rc < 0)
return rc;
@ -857,6 +870,8 @@ int fio_rename(char const* old_path, char const* new_path, fio_location location
IO_CHECK(fio_write_all(fio_stdout, old_path, old_path_len), old_path_len);
IO_CHECK(fio_write_all(fio_stdout, new_path, new_path_len), new_path_len);
//TODO: wait for confirmation.
return 0;
}
else
@ -908,7 +923,7 @@ int fio_sync(char const* path, fio_location location)
}
/* Get crc32 of file */
pg_crc32 fio_get_crc32(const char *file_path, fio_location location)
pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decompress)
{
if (fio_is_remote(location))
{
@ -918,6 +933,10 @@ pg_crc32 fio_get_crc32(const char *file_path, fio_location location)
hdr.cop = FIO_GET_CRC32;
hdr.handle = -1;
hdr.size = path_len;
hdr.arg = 0;
if (decompress)
hdr.arg = 1;
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(fio_stdout, file_path, path_len), path_len);
@ -926,7 +945,12 @@ pg_crc32 fio_get_crc32(const char *file_path, fio_location location)
return crc;
}
else
return pgFileGetCRC(file_path, true, true);
{
if (decompress)
return pgFileGetCRCgz(file_path, true, true);
else
return pgFileGetCRC(file_path, true, true);
}
}
/* Remove file */
@ -1003,7 +1027,6 @@ int fio_chmod(char const* path, int mode, fio_location location)
#ifdef HAVE_LIBZ
#define ZLIB_BUFFER_SIZE (64*1024)
#define MAX_WBITS 15 /* 32K LZ77 window */
#define DEF_MEM_LEVEL 8
@ -1019,6 +1042,7 @@ typedef struct fioGZFile
Bytef buf[ZLIB_BUFFER_SIZE];
} fioGZFile;
/* On error returns NULL and errno should be checked */
gzFile
fio_gzopen(char const* path, char const* mode, int level, fio_location location)
{
@ -1029,6 +1053,7 @@ fio_gzopen(char const* path, char const* mode, int level, fio_location location)
memset(&gz->strm, 0, sizeof(gz->strm));
gz->eof = 0;
gz->errnum = Z_OK;
/* check if file opened for writing */
if (strcmp(mode, PG_BINARY_W) == 0) /* compress */
{
gz->strm.next_out = gz->buf;
@ -1041,14 +1066,12 @@ fio_gzopen(char const* path, char const* mode, int level, fio_location location)
if (rc == Z_OK)
{
gz->compress = 1;
if (fio_access(path, F_OK, location) == 0)
gz->fd = fio_open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, location);
if (gz->fd < 0)
{
elog(LOG, "File %s exists", path);
free(gz);
errno = EEXIST;
return NULL;
}
gz->fd = fio_open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, location);
}
}
else
@ -1061,21 +1084,27 @@ fio_gzopen(char const* path, char const* mode, int level, fio_location location)
{
gz->compress = 0;
gz->fd = fio_open(path, O_RDONLY | PG_BINARY, location);
if (gz->fd < 0)
{
free(gz);
return NULL;
}
}
}
if (rc != Z_OK)
{
free(gz);
return NULL;
elog(ERROR, "zlib internal error when opening file %s: %s",
path, gz->strm.msg);
}
return (gzFile)((size_t)gz + FIO_GZ_REMOTE_MARKER);
}
else
{
gzFile file;
/* check if file opened for writing */
if (strcmp(mode, PG_BINARY_W) == 0)
{
int fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, FILE_PERMISSIONS);
int fd = open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, FILE_PERMISSIONS);
if (fd < 0)
return NULL;
file = gzdopen(fd, mode);
@ -1135,7 +1164,8 @@ fio_gzread(gzFile f, void *buf, unsigned size)
{
gz->strm.next_in = gz->buf;
}
rc = fio_read(gz->fd, gz->strm.next_in + gz->strm.avail_in, gz->buf + ZLIB_BUFFER_SIZE - gz->strm.next_in - gz->strm.avail_in);
rc = fio_read(gz->fd, gz->strm.next_in + gz->strm.avail_in,
gz->buf + ZLIB_BUFFER_SIZE - gz->strm.next_in - gz->strm.avail_in);
if (rc > 0)
{
gz->strm.avail_in += rc;
@ -1274,8 +1304,10 @@ z_off_t fio_gzseek(gzFile f, z_off_t offset, int whence)
#endif
/* Send file content */
static void fio_send_file(int out, char const* path)
/* Send file content
* Note: it should not be used for large files.
*/
static void fio_load_file(int out, char const* path)
{
int fd = open(path, O_RDONLY);
fio_header hdr;
@ -1301,8 +1333,24 @@ static void fio_send_file(int out, char const* path)
}
}
int fio_send_pages(FILE* in, FILE* out, pgFile *file,
XLogRecPtr horizonLsn, BlockNumber* nBlocksSkipped, int calg, int clevel)
/*
* Return number of actually(!) readed blocks, attempts or
* half-readed block are not counted.
* Return values in case of error:
* REMOTE_ERROR
* PAGE_CORRUPTION
* WRITE_FAILED
*
* If none of the above, this function return number of blocks
* readed by remote agent.
*
* In case of DELTA mode horizonLsn must be a valid lsn,
* otherwise it should be set to InvalidXLogRecPtr.
*/
int fio_send_pages(FILE* in, FILE* out, pgFile *file, XLogRecPtr horizonLsn,
int calg, int clevel, uint32 checksum_version,
datapagemap_t *pagemap, BlockNumber* err_blknum,
char **errormsg)
{
struct {
fio_header hdr;
@ -1313,144 +1361,240 @@ int fio_send_pages(FILE* in, FILE* out, pgFile *file,
Assert(fio_is_remote_file(in));
req.hdr.cop = FIO_SEND_PAGES;
req.hdr.size = sizeof(fio_send_request);
/* send message with header
8bytes 20bytes var
------------------------------------------------------
| fio_header | fio_send_request | BITMAP(if any) |
------------------------------------------------------
*/
req.hdr.handle = fio_fileno(in) & ~FIO_PIPE_MARKER;
if (pagemap)
{
req.hdr.cop = FIO_SEND_PAGES_PAGEMAP;
req.hdr.size = sizeof(fio_send_request) + pagemap->bitmapsize;
req.arg.bitmapsize = pagemap->bitmapsize;
/* TODO: add optimization for the case of pagemap
* containing small number of blocks with big serial numbers:
* https://github.com/postgrespro/pg_probackup/blob/remote_page_backup/src/utils/file.c#L1211
*/
}
else
{
req.hdr.cop = FIO_SEND_PAGES;
req.hdr.size = sizeof(fio_send_request);
}
req.arg.nblocks = file->size/BLCKSZ;
req.arg.segBlockNum = file->segno * RELSEG_SIZE;
req.arg.segmentno = file->segno * RELSEG_SIZE;
req.arg.horizonLsn = horizonLsn;
req.arg.checksumVersion = current.checksum_version;
req.arg.checksumVersion = checksum_version;
req.arg.calg = calg;
req.arg.clevel = clevel;
file->compress_alg = calg;
file->compress_alg = calg; /* TODO: wtf? why here? */
//<-----
// datapagemap_iterator_t *iter;
// BlockNumber blkno;
// iter = datapagemap_iterate(pagemap);
// while (datapagemap_next(iter, &blkno))
// elog(INFO, "block %u", blkno);
// pg_free(iter);
//<-----
IO_CHECK(fio_write_all(fio_stdout, &req, sizeof(req)), sizeof(req));
if (pagemap)
/* now send pagemap itself */
IO_CHECK(fio_write_all(fio_stdout, pagemap->bitmap, pagemap->bitmapsize), pagemap->bitmapsize);
while (true)
{
fio_header hdr;
char buf[BLCKSZ + sizeof(BackupPageHeader)];
IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr));
Assert(hdr.cop == FIO_PAGE);
if ((int)hdr.arg < 0) /* read error */
if (interrupted)
elog(ERROR, "Interrupted during page reading");
if (hdr.cop == FIO_ERROR)
{
return (int)hdr.arg;
errno = hdr.arg;
*err_blknum = hdr.size;
return REMOTE_ERROR;
}
blknum = hdr.arg;
if (hdr.size == 0) /* end of segment */
break;
Assert(hdr.size <= sizeof(buf));
IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size);
COMP_FILE_CRC32(true, file->crc, buf, hdr.size);
if (fio_fwrite(out, buf, hdr.size) != hdr.size)
else if (hdr.cop == FIO_SEND_FILE_CORRUPTION)
{
int errno_tmp = errno;
fio_fclose(out);
elog(ERROR, "File: %s, cannot write backup at block %u: %s",
file->path, blknum, strerror(errno_tmp));
*err_blknum = hdr.arg;
if (hdr.size > 0)
{
IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size);
*errormsg = pgut_malloc(hdr.size);
strncpy(*errormsg, buf, hdr.size);
}
return PAGE_CORRUPTION;
}
file->write_size += hdr.size;
n_blocks_read++;
if (((BackupPageHeader*)buf)->compressed_size == PageIsTruncated)
else if (hdr.cop == FIO_SEND_FILE_EOF)
{
blknum += 1;
/* n_blocks_read reported by EOF */
n_blocks_read = hdr.size;
break;
}
else if (hdr.cop == FIO_PAGE)
{
blknum = hdr.arg;
Assert(hdr.size <= sizeof(buf));
IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size);
COMP_FILE_CRC32(true, file->crc, buf, hdr.size);
if (fio_fwrite(out, buf, hdr.size) != hdr.size)
{
fio_fclose(out);
*err_blknum = blknum;
return WRITE_FAILED;
}
file->write_size += hdr.size;
file->uncompressed_size += BLCKSZ;
}
else
elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop);
}
*nBlocksSkipped = blknum - n_blocks_read;
return blknum;
return n_blocks_read;
}
static void fio_send_pages_impl(int fd, int out, fio_send_request* req)
static void fio_send_pages_impl(int fd, int out, char* buf, bool with_pagemap)
{
BlockNumber blknum;
BlockNumber blknum = 0;
BlockNumber n_blocks_read = 0;
XLogRecPtr page_lsn = 0;
char read_buffer[BLCKSZ+1];
fio_header hdr;
fio_send_request *req = (fio_send_request*) buf;
/* parse buffer */
datapagemap_t *map = NULL;
datapagemap_iterator_t *iter = NULL;
if (with_pagemap)
{
map = pgut_malloc(sizeof(datapagemap_t));
map->bitmapsize = req->bitmapsize;
map->bitmap = (char*) buf + sizeof(fio_send_request);
/* get first block */
iter = datapagemap_iterate(map);
datapagemap_next(iter, &blknum);
}
hdr.cop = FIO_PAGE;
read_buffer[BLCKSZ] = 1; /* barrier */
for (blknum = 0; blknum < req->nblocks; blknum++)
while (blknum < req->nblocks)
{
int rc = 0;
int retry_attempts = PAGE_READ_ATTEMPTS;
XLogRecPtr page_lsn = InvalidXLogRecPtr;
while (true)
/* TODO: handle signals on the agent */
if (interrupted)
elog(ERROR, "Interrupted during remote page reading");
/* read page, check header and validate checksumms */
/* TODO: libpq connection on the agent, so we can do ptrack
* magic right here.
*/
for (;;)
{
ssize_t rc = pread(fd, read_buffer, BLCKSZ, blknum*BLCKSZ);
ssize_t read_len = pread(fd, read_buffer, BLCKSZ, blknum*BLCKSZ);
page_lsn = InvalidXLogRecPtr;
if (rc <= 0)
/* report eof */
if (read_len == 0)
goto eof;
/* report error */
else if (read_len < 0)
{
if (rc < 0)
{
hdr.arg = -errno;
hdr.size = 0;
Assert((int)hdr.arg < 0);
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
}
else
{
BackupPageHeader bph;
bph.block = blknum;
bph.compressed_size = PageIsTruncated;
hdr.arg = blknum;
hdr.size = sizeof(bph);
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(out, &bph, sizeof(bph)), sizeof(bph));
}
return;
/* TODO: better to report exact error message, not errno */
hdr.cop = FIO_ERROR;
hdr.arg = errno;
hdr.size = blknum;
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
goto cleanup;
}
else if (rc == BLCKSZ)
else if (read_len == BLCKSZ)
{
if (!parse_page((Page)read_buffer, &page_lsn))
{
int i;
for (i = 0; read_buffer[i] == 0; i++);
rc = validate_one_page(read_buffer, req->segmentno + blknum,
InvalidXLogRecPtr, &page_lsn, req->checksumVersion);
/* Page is zeroed. No need to check header and checksum. */
if (i == BLCKSZ)
break;
}
else if (!req->checksumVersion
|| pg_checksum_page(read_buffer, req->segBlockNum + blknum) == ((PageHeader)read_buffer)->pd_checksum)
{
/* TODO: optimize copy of zeroed page */
if (rc == PAGE_IS_ZEROED)
break;
else if (rc == PAGE_IS_VALID)
break;
}
}
// else /* readed less than BLKSZ bytes, retry */
/* File is either has insane header or invalid checksum,
* retry. If retry attempts are exhausted, report corruption.
*/
if (--retry_attempts == 0)
{
hdr.size = 0;
hdr.arg = PAGE_CHECKSUM_MISMATCH;
char *errormsg = NULL;
hdr.cop = FIO_SEND_FILE_CORRUPTION;
hdr.arg = blknum;
/* Construct the error message */
if (rc == PAGE_HEADER_IS_INVALID)
get_header_errormsg(read_buffer, &errormsg);
else if (rc == PAGE_CHECKSUM_MISMATCH)
get_checksum_errormsg(read_buffer, &errormsg,
req->segmentno + blknum);
/* if error message is not empty, set payload size to its length */
hdr.size = errormsg ? strlen(errormsg) + 1 : 0;
/* send header */
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
return;
/* send error message if any */
if (errormsg)
IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size);
pg_free(errormsg);
goto cleanup;
}
}
n_blocks_read++;
/*
* horizonLsn is not 0 for delta backup.
* horizonLsn is not 0 only in case of delta backup.
* As far as unsigned number are always greater or equal than zero,
* there is no sense to add more checks.
*/
if (page_lsn >= req->horizonLsn || page_lsn == InvalidXLogRecPtr)
if ((req->horizonLsn == InvalidXLogRecPtr) ||
(page_lsn == InvalidXLogRecPtr) || /* zeroed page */
(req->horizonLsn > 0 && page_lsn >= req->horizonLsn)) /* delta */
{
char write_buffer[BLCKSZ*2];
BackupPageHeader* bph = (BackupPageHeader*)write_buffer;
const char *errormsg = NULL;
/* compress page */
hdr.arg = bph->block = blknum;
hdr.size = sizeof(BackupPageHeader);
bph->compressed_size = do_compress(write_buffer + sizeof(BackupPageHeader), sizeof(write_buffer) - sizeof(BackupPageHeader),
bph->compressed_size = do_compress(write_buffer + sizeof(BackupPageHeader),
sizeof(write_buffer) - sizeof(BackupPageHeader),
read_buffer, BLCKSZ, req->calg, req->clevel,
&errormsg);
NULL);
if (bph->compressed_size <= 0 || bph->compressed_size >= BLCKSZ)
{
/* Do not compress page */
@ -1462,10 +1606,360 @@ static void fio_send_pages_impl(int fd, int out, fio_send_request* req)
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(out, write_buffer, hdr.size), hdr.size);
}
/* next block */
if (with_pagemap)
{
/* exit if pagemap is exhausted */
if (!datapagemap_next(iter, &blknum))
break;
}
else
blknum++;
}
hdr.size = 0;
hdr.arg = blknum;
eof:
/* We are done, send eof */
hdr.cop = FIO_SEND_FILE_EOF;
hdr.arg = 0;
hdr.size = n_blocks_read; /* TODO: report number of backed up blocks */
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
cleanup:
pg_free(map);
pg_free(iter);
return;
}
/* Receive chunks of compressed data, decompress them and write to
* destination file.
* Return codes:
* FILE_MISSING (-1)
* OPEN_FAILED (-2)
* READ_FAILED (-3)
* WRITE_FAILED (-4)
* ZLIB_ERROR (-5)
* REMOTE_ERROR (-6)
*/
int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num)
{
fio_header hdr;
int exit_code = SEND_OK;
char *in_buf = pgut_malloc(CHUNK_SIZE); /* buffer for compressed data */
char *out_buf = pgut_malloc(OUT_BUF_SIZE); /* 1MB buffer for decompressed data */
size_t path_len = strlen(from_fullpath) + 1;
/* decompressor */
z_stream *strm = NULL;
hdr.cop = FIO_SEND_FILE;
hdr.size = path_len;
elog(VERBOSE, "Thread [%d]: Attempting to open remote compressed WAL file '%s'",
thread_num, from_fullpath);
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(fio_stdout, from_fullpath, path_len), path_len);
for (;;)
{
fio_header hdr;
IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr));
if (hdr.cop == FIO_SEND_FILE_EOF)
{
break;
}
else if (hdr.cop == FIO_ERROR)
{
/* handle error, reported by the agent */
if (hdr.size > 0)
{
IO_CHECK(fio_read_all(fio_stdin, in_buf, hdr.size), hdr.size);
elog(WARNING, "Thread [%d]: %s", thread_num, in_buf);
}
exit_code = hdr.arg;
goto cleanup;
}
else if (hdr.cop == FIO_PAGE)
{
int rc;
Assert(hdr.size <= CHUNK_SIZE);
IO_CHECK(fio_read_all(fio_stdin, in_buf, hdr.size), hdr.size);
/* We have received a chunk of compressed data, lets decompress it */
if (strm == NULL)
{
/* Initialize decompressor */
strm = pgut_malloc(sizeof(z_stream));
memset(strm, 0, sizeof(z_stream));
/* The fields next_in, avail_in initialized before init */
strm->next_in = (Bytef *)in_buf;
strm->avail_in = hdr.size;
rc = inflateInit2(strm, 15 + 16);
if (rc != Z_OK)
{
elog(WARNING, "Thread [%d]: Failed to initialize decompression stream for file '%s': %i: %s",
thread_num, from_fullpath, rc, strm->msg);
exit_code = ZLIB_ERROR;
goto cleanup;
}
}
else
{
strm->next_in = (Bytef *)in_buf;
strm->avail_in = hdr.size;
}
strm->next_out = (Bytef *)out_buf; /* output buffer */
strm->avail_out = OUT_BUF_SIZE; /* free space in output buffer */
/*
* From zlib documentation:
* The application must update next_in and avail_in when avail_in
* has dropped to zero. It must update next_out and avail_out when
* avail_out has dropped to zero.
*/
while (strm->avail_in != 0) /* while there is data in input buffer, decompress it */
{
/* decompress until there is no data to decompress,
* or buffer with uncompressed data is full
*/
rc = inflate(strm, Z_NO_FLUSH);
if (rc == Z_STREAM_END)
/* end of stream */
break;
else if (rc != Z_OK)
{
/* got an error */
elog(WARNING, "Thread [%d]: Decompression failed for file '%s': %i: %s",
thread_num, from_fullpath, rc, strm->msg);
exit_code = ZLIB_ERROR;
goto cleanup;
}
if (strm->avail_out == 0)
{
/* Output buffer is full, write it out */
if (fwrite(out_buf, 1, OUT_BUF_SIZE, out) != OUT_BUF_SIZE)
{
elog(WARNING, "Thread [%d]: Cannot write to file '%s': %s",
thread_num, to_fullpath, strerror(errno));
exit_code = WRITE_FAILED;
goto cleanup;
}
strm->next_out = (Bytef *)out_buf; /* output buffer */
strm->avail_out = OUT_BUF_SIZE;
}
}
/* write out leftovers if any */
if (strm->avail_out != OUT_BUF_SIZE)
{
int len = OUT_BUF_SIZE - strm->avail_out;
if (fwrite(out_buf, 1, len, out) != len)
{
elog(WARNING, "Thread [%d]: Cannot write to file: %s",
thread_num, strerror(errno));
exit_code = WRITE_FAILED;
goto cleanup;
}
}
}
else
{
elog(WARNING, "Thread [%d]: Remote agent returned message of unexpected type: %i",
thread_num, hdr.cop);
exit_code = REMOTE_ERROR;
break;
}
}
cleanup:
if (exit_code < OPEN_FAILED)
fio_disconnect(); /* discard possible pending data in pipe */
if (strm)
{
inflateEnd(strm);
pg_free(strm);
}
pg_free(in_buf);
pg_free(out_buf);
return exit_code;
}
/* Receive chunks of data and write them to destination file.
* Return codes:
* SEND_OK (0)
* FILE_MISSING (-1)
* OPEN_FAILED (-2)
* READ_FAIL (-3)
* WRITE_FAIL (-4)
*/
int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num)
{
fio_header hdr;
int exit_code = SEND_OK;
size_t path_len = strlen(from_fullpath) + 1;
char *buf = pgut_malloc(CHUNK_SIZE); /* buffer */
hdr.cop = FIO_SEND_FILE;
hdr.size = path_len;
elog(VERBOSE, "Thread [%d]: Attempting to open remote WAL file '%s'",
thread_num, from_fullpath);
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(fio_stdout, from_fullpath, path_len), path_len);
for (;;)
{
/* receive data */
IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr));
if (hdr.cop == FIO_SEND_FILE_EOF)
{
break;
}
else if (hdr.cop == FIO_ERROR)
{
/* handle error, reported by the agent */
if (hdr.size > 0)
{
IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size);
elog(WARNING, "Thread [%d]: %s", thread_num, buf);
}
exit_code = hdr.arg;
break;
}
else if (hdr.cop == FIO_PAGE)
{
Assert(hdr.size <= CHUNK_SIZE);
IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size);
/* We have received a chunk of data data, lets write it out */
if (fwrite(buf, 1, hdr.size, out) != hdr.size)
{
elog(WARNING, "Thread [%d]: Cannot write to file '%s': %s",
thread_num, to_fullpath, strerror(errno));
exit_code = WRITE_FAILED;
break;
}
}
else
{
/* TODO: fio_disconnect may get assert fail when running after this */
elog(WARNING, "Thread [%d]: Remote agent returned message of unexpected type: %i",
thread_num, hdr.cop);
exit_code = REMOTE_ERROR;
break;
}
}
if (exit_code < OPEN_FAILED)
fio_disconnect(); /* discard possible pending data in pipe */
pg_free(buf);
return exit_code;
}
/* Send file content
* On error we return FIO_ERROR message with following codes
* FILE_MISSING (-1)
* OPEN_FAILED (-2)
* READ_FAILED (-3)
*
*/
static void fio_send_file_impl(int out, char const* path)
{
FILE *fp;
fio_header hdr;
char *buf = pgut_malloc(CHUNK_SIZE);
ssize_t read_len = 0;
char *errormsg = NULL;
/* open source file for read */
/* TODO: check that file is regular file */
fp = fopen(path, PG_BINARY_R);
if (!fp)
{
hdr.cop = FIO_ERROR;
/* do not send exact wording of ENOENT error message
* because it is a very common error in our case, so
* error code is enough.
*/
if (errno == ENOENT)
{
hdr.arg = FILE_MISSING;
hdr.size = 0;
}
else
{
hdr.arg = OPEN_FAILED;
errormsg = pgut_malloc(MAXPGPATH);
/* Construct the error message */
snprintf(errormsg, MAXPGPATH, "Cannot open source file '%s': %s", path, strerror(errno));
hdr.size = strlen(errormsg) + 1;
}
/* send header and message */
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
if (errormsg)
IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size);
goto cleanup;
}
/* copy content */
for (;;)
{
read_len = fread(buf, 1, CHUNK_SIZE, fp);
/* report error */
if (read_len < 0 || (read_len == 0 && !feof(fp)))
{
hdr.cop = FIO_ERROR;
errormsg = pgut_malloc(MAXPGPATH);
hdr.arg = READ_FAILED;
/* Construct the error message */
snprintf(errormsg, MAXPGPATH, "Cannot read source file '%s': %s", path, strerror(errno));
hdr.size = strlen(errormsg) + 1;
/* send header and message */
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size);
goto cleanup;
}
else if (read_len == 0)
break;
else
{
/* send chunk */
hdr.cop = FIO_PAGE;
hdr.size = read_len;
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(out, buf, read_len), read_len);
}
}
/* we are done, send eof */
hdr.cop = FIO_SEND_FILE_EOF;
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
cleanup:
if (fp)
fclose(fp);
pg_free(buf);
pg_free(errormsg);
return;
}
/* Execute commands at remote host */
@ -1492,7 +1986,7 @@ void fio_communicate(int in, int out)
SYS_CHECK(setmode(out, _O_BINARY));
#endif
/* Main loop until command of processing master command */
/* Main loop until end of processing all master commands */
while ((rc = fio_read_all(in, &hdr, sizeof hdr)) == sizeof(hdr)) {
if (hdr.size != 0) {
if (hdr.size > buf_size) {
@ -1504,7 +1998,7 @@ void fio_communicate(int in, int out)
}
switch (hdr.cop) {
case FIO_LOAD: /* Send file content */
fio_send_file(out, buf);
fio_load_file(out, buf);
break;
case FIO_OPENDIR: /* Open directory for traversal */
dir[hdr.handle] = opendir(buf);
@ -1609,10 +2103,17 @@ void fio_communicate(int in, int out)
break;
case FIO_SEND_PAGES:
Assert(hdr.size == sizeof(fio_send_request));
fio_send_pages_impl(fd[hdr.handle], out, (fio_send_request*)buf);
fio_send_pages_impl(fd[hdr.handle], out, buf, false);
break;
case FIO_SEND_PAGES_PAGEMAP:
// buf contain fio_send_request header and bitmap.
fio_send_pages_impl(fd[hdr.handle], out, buf, true);
break;
case FIO_SEND_FILE:
fio_send_file_impl(out, buf);
break;
case FIO_SYNC:
/* open file and fsync it */
/* open file and fsync it */
tmp_fd = open(buf, O_WRONLY | PG_BINARY, FILE_PERMISSIONS);
if (tmp_fd < 0)
hdr.arg = errno;
@ -1629,9 +2130,16 @@ void fio_communicate(int in, int out)
break;
case FIO_GET_CRC32:
/* calculate crc32 for a file */
crc = pgFileGetCRC(buf, true, true);
if (hdr.arg == 1)
crc = pgFileGetCRCgz(buf, true, true);
else
crc = pgFileGetCRC(buf, true, true);
IO_CHECK(fio_write_all(out, &crc, sizeof(crc)), sizeof(crc));
break;
case FIO_DISCONNECT:
hdr.cop = FIO_DISCONNECTED;
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
break;
default:
Assert(false);
}

View File

@ -33,10 +33,20 @@ typedef enum
FIO_OPENDIR,
FIO_READDIR,
FIO_CLOSEDIR,
FIO_SEND_PAGES,
FIO_PAGE,
FIO_WRITE_COMPRESSED,
FIO_GET_CRC32
FIO_GET_CRC32,
/* used in fio_send_pages */
FIO_SEND_PAGES,
FIO_SEND_PAGES_PAGEMAP,
FIO_ERROR,
FIO_SEND_FILE,
// FIO_CHUNK,
FIO_SEND_FILE_EOF,
FIO_SEND_FILE_CORRUPTION,
/* messages for closing connection */
FIO_DISCONNECT,
FIO_DISCONNECTED,
} fio_operations;
typedef enum
@ -49,7 +59,6 @@ typedef enum
#define FIO_FDMAX 64
#define FIO_PIPE_MARKER 0x40000000
#define PAGE_CHECKSUM_MISMATCH (-256)
#define SYS_CHECK(cmd) do if ((cmd) < 0) { fprintf(stderr, "%s:%d: (%s) %s\n", __FILE__, __LINE__, #cmd, strerror(errno)); exit(EXIT_FAILURE); } while (0)
#define IO_CHECK(cmd, size) do { int _rc = (cmd); if (_rc != (size)) fio_error(_rc, size, __FILE__, __LINE__); } while (0)
@ -83,10 +92,6 @@ extern int fio_fclose(FILE* f);
extern int fio_ffstat(FILE* f, struct stat* st);
extern void fio_error(int rc, int size, char const* file, int line);
struct pgFile;
extern int fio_send_pages(FILE* in, FILE* out, struct pgFile *file, XLogRecPtr horizonLsn,
BlockNumber* nBlocksSkipped, int calg, int clevel);
extern int fio_open(char const* name, int mode, fio_location location);
extern ssize_t fio_write(int fd, void const* buf, size_t size);
extern ssize_t fio_read(int fd, void* buf, size_t size);
@ -97,7 +102,7 @@ extern int fio_truncate(int fd, off_t size);
extern int fio_close(int fd);
extern void fio_disconnect(void);
extern int fio_sync(char const* path, fio_location location);
extern pg_crc32 fio_get_crc32(const char *file_path, fio_location location);
extern pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decompress);
extern int fio_rename(char const* old_path, char const* new_path, fio_location location);
extern int fio_symlink(char const* target, char const* link_path, fio_location location);

View File

@ -197,3 +197,16 @@ parray_bsearch(parray *array, const void *key, int(*compare)(const void *, const
{
return bsearch(&key, array->data, array->used, sizeof(void *), compare);
}
/* checks that parray contains element */
bool parray_contains(parray *array, void *elem)
{
int i;
for (i = 0; i < parray_num(array); i++)
{
if (parray_get(array, i) == elem)
return true;
}
return false;
}

View File

@ -30,6 +30,7 @@ extern size_t parray_num(const parray *array);
extern void parray_qsort(parray *array, int(*compare)(const void *, const void *));
extern void *parray_bsearch(parray *array, const void *key, int(*compare)(const void *, const void *));
extern void parray_walk(parray *array, void (*action)(void *));
extern bool parray_contains(parray *array, void *elem);
#endif /* PARRAY_H */

View File

@ -220,7 +220,7 @@ bool launch_agent(void)
return false;
} else {
#endif
elog(LOG, "Spawn agent %d version %s", child_pid, PROGRAM_VERSION);
elog(LOG, "Start SSH client process, pid %d", child_pid);
SYS_CHECK(close(infd[1])); /* These are being used by the child */
SYS_CHECK(close(outfd[0]));
SYS_CHECK(close(errfd[1]));

View File

@ -479,7 +479,7 @@ do_validate_instance(void)
result = scan_parent_chain(current_backup, &tmp_backup);
/* chain is broken */
if (result == 0)
if (result == ChainIsBroken)
{
char *parent_backup_id;
/* determine missing backup ID */
@ -505,7 +505,7 @@ do_validate_instance(void)
continue;
}
/* chain is whole, but at least one parent is invalid */
else if (result == 1)
else if (result == ChainIsInvalid)
{
/* Oldest corrupt backup has a chance for revalidation */
if (current_backup->start_time != tmp_backup->start_time)
@ -630,7 +630,7 @@ do_validate_instance(void)
*/
result = scan_parent_chain(backup, &tmp_backup);
if (result == 1)
if (result == ChainIsInvalid)
{
/* revalidation make sense only if oldest invalid backup is current_backup
*/

View File

@ -263,7 +263,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
log_content)
else:
self.assertIn(
"ERROR: WAL segment 000000010000000000000002 could not be archived in 60 seconds",
"ERROR: WAL segment 000000010000000000000003 could not be archived in 60 seconds",
log_content)
log_file = os.path.join(node.logs_dir, 'postgresql.log')
@ -281,7 +281,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_pgpro434_4(self):
"""
Check pg_stop_backup_timeout, needed backup_timeout
Check pg_stop_backup_timeout, libpq-timeout requested.
Fixed in commit d84d79668b0c139 and assert fixed by ptrack 1.7
"""
fname = self.id().split('.')[3]
@ -398,15 +398,11 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
log_content)
self.assertIn(
'INFO: pg_probackup archive-push from',
'pg_probackup archive-push WAL file',
log_content)
self.assertIn(
'ERROR: WAL segment ',
log_content)
self.assertIn(
'already exists.',
'WAL file already exists in archive with different checksum',
log_content)
self.assertNotIn(
@ -448,8 +444,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '30s'})
pg_options={'checkpoint_timeout': '30s'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
@ -487,9 +482,13 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.assertIn(
'DETAIL: The failed archive command was:', log_content)
self.assertIn(
'INFO: pg_probackup archive-push from', log_content)
'pg_probackup archive-push WAL file', log_content)
self.assertNotIn(
'WAL file already exists in archive with '
'different checksum, overwriting', log_content)
self.assertIn(
'{0}" already exists.'.format(filename), log_content)
'WAL file already exists in archive with '
'different checksum', log_content)
self.assertNotIn(
'pg_probackup archive-push completed successfully', log_content)
@ -497,7 +496,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.set_archiving(backup_dir, 'node', node, overwrite=True)
node.reload()
self.switch_wal_segment(node)
sleep(2)
sleep(5)
with open(log_file, 'r') as f:
log_content = f.read()
@ -505,6 +504,10 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
'pg_probackup archive-push completed successfully' in log_content,
'Expecting messages about successfull execution archive_command')
self.assertIn(
'WAL file already exists in archive with '
'different checksum, overwriting', log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
@ -520,7 +523,9 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
self.set_archiving(
backup_dir, 'node', node,
log_level='verbose', archive_timeout=60)
node.slow_start()
@ -579,12 +584,9 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
log_file = os.path.join(node.logs_dir, 'postgresql.log')
with open(log_file, 'r') as f:
log_content = f.read()
self.assertIn(
'Cannot open destination temporary WAL file',
log_content)
self.assertIn(
'Reusing stale destination temporary WAL file',
'Reusing stale temp WAL file',
log_content)
# Clean after yourself
@ -602,7 +604,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node, archive_timeout=60)
node.slow_start()
@ -905,8 +907,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '30s',
'archive_timeout': '10s'}
)
'archive_timeout': '10s'})
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
@ -923,6 +925,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,10000) i")
master.pgbench_init(scale=5)
# TAKE FULL ARCHIVE BACKUP FROM MASTER
self.backup_node(backup_dir, 'master', master)
# GET LOGICAL CONTENT FROM MASTER
@ -937,11 +941,11 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
pgdata_replica = self.pgdata_content(replica.data_dir)
self.compare_pgdata(pgdata_master, pgdata_replica)
self.set_replica(master, replica, synchronous=True)
self.set_replica(master, replica, synchronous=False)
# ADD INSTANCE REPLICA
# self.add_instance(backup_dir, 'replica', replica)
# SET ARCHIVING FOR REPLICA
# self.set_archiving(backup_dir, 'replica', replica, replica=True)
self.set_archiving(backup_dir, 'master', replica, replica=True)
replica.slow_start(replica=True)
# CHECK LOGICAL CORRECTNESS on REPLICA
@ -973,6 +977,18 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.assertEqual(
'OK', self.show_pb(backup_dir, 'master', backup_id)['status'])
master.pgbench_init(scale=50)
sleep(10)
replica.promote()
master.pgbench_init(scale=10)
replica.pgbench_init(scale=10)
exit(1)
# Clean after yourself
self.del_test_dir(module_name, fname)
@ -1718,7 +1734,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node, log_level='verbose')
node.slow_start()
backup_id = self.backup_node(backup_dir, 'node', node)
@ -1734,6 +1750,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
node.slow_start()
node.pgbench_init(scale=2)
sleep(5)
show = self.show_archive(backup_dir)
timelines = show[0]['timelines']
@ -1755,12 +1773,571 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
tli13['closest-backup-id'])
self.assertEqual(
'0000000D000000000000001B',
'0000000D000000000000001C',
tli13['max-segno'])
# Clean after yourself
self.del_test_dir(module_name, fname)
@unittest.skip("skip")
# @unittest.expectedFailure
def test_archiving_and_slots(self):
"""
Check that archiving don`t break slot
guarantee.
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'autovacuum': 'off',
'checkpoint_timeout': '30s',
'max_wal_size': '64MB'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node, log_level='verbose')
node.slow_start()
if self.get_version(node) < 100000:
pg_receivexlog_path = self.get_bin_path('pg_receivexlog')
else:
pg_receivexlog_path = self.get_bin_path('pg_receivewal')
# "pg_receivewal --create-slot --slot archive_slot --if-not-exists "
# "&& pg_receivewal --synchronous -Z 1 /tmp/wal --slot archive_slot --no-loop"
self.run_binary(
[
pg_receivexlog_path, '-p', str(node.port), '--synchronous',
'--create-slot', '--slot', 'archive_slot', '--if-not-exists'
])
node.pgbench_init(scale=10)
pg_receivexlog = self.run_binary(
[
pg_receivexlog_path, '-p', str(node.port), '--synchronous',
'-D', os.path.join(backup_dir, 'wal', 'node'),
'--no-loop', '--slot', 'archive_slot',
'-Z', '1'
], asynchronous=True)
if pg_receivexlog.returncode:
self.assertFalse(
True,
'Failed to start pg_receivexlog: {0}'.format(
pg_receivexlog.communicate()[1]))
sleep(2)
pg_receivexlog.kill()
backup_id = self.backup_node(backup_dir, 'node', node)
node.pgbench_init(scale=20)
exit(1)
# Clean after yourself
self.del_test_dir(module_name, fname)
def test_archive_push_sanity(self):
""""""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'archive_mode': 'on',
'archive_command': 'exit 1'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
node.pgbench_init(scale=50)
node.stop()
self.set_archiving(backup_dir, 'node', node)
os.remove(os.path.join(node.logs_dir, 'postgresql.log'))
node.slow_start()
self.backup_node(backup_dir, 'node', node)
with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f:
postgres_log_content = f.read()
# print(postgres_log_content)
# make sure that .backup file is not compressed
self.assertNotIn('.backup.gz', postgres_log_content)
self.assertNotIn('WARNING', postgres_log_content)
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(
backup_dir, 'node', replica,
data_dir=replica.data_dir, options=['-R'])
#self.set_archiving(backup_dir, 'replica', replica, replica=True)
self.set_auto_conf(replica, {'port': replica.port})
self.set_auto_conf(replica, {'archive_mode': 'always'})
self.set_auto_conf(replica, {'hot_standby': 'on'})
replica.slow_start(replica=True)
self.wait_until_replica_catch_with_master(node, replica)
node.pgbench_init(scale=5)
replica.promote()
replica.pgbench_init(scale=10)
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
replica_log_content = f.read()
# make sure that .partial file is not compressed
self.assertNotIn('.partial.gz', replica_log_content)
# make sure that .history file is not compressed
self.assertNotIn('.history.gz', replica_log_content)
self.assertNotIn('WARNING', replica_log_content)
output = self.show_archive(
backup_dir, 'node', as_json=False, as_text=True,
options=['--log-level-console=VERBOSE'])
self.assertNotIn('WARNING', output)
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.expectedFailure
# @unittest.skip("skip")
def test_archive_pg_receivexlog_partial_handling(self):
"""check that archive-get delivers .partial and .gz.partial files"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
self.backup_node(backup_dir, 'node', node, options=['--stream'])
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(
backup_dir, 'node', replica, replica.data_dir, options=['-R'])
self.set_auto_conf(replica, {'port': replica.port})
self.set_replica(node, replica)
self.add_instance(backup_dir, 'replica', replica)
# self.set_archiving(backup_dir, 'replica', replica, replica=True)
replica.slow_start(replica=True)
node.safe_psql('postgres', 'CHECKPOINT')
if self.get_version(replica) < 100000:
pg_receivexlog_path = self.get_bin_path('pg_receivexlog')
else:
pg_receivexlog_path = self.get_bin_path('pg_receivewal')
cmdline = [
pg_receivexlog_path, '-p', str(replica.port), '--synchronous',
'-D', os.path.join(backup_dir, 'wal', 'replica')]
if self.archive_compress and node.major_version >= 10:
cmdline += ['-Z', '1']
pg_receivexlog = self.run_binary(cmdline, asynchronous=True)
if pg_receivexlog.returncode:
self.assertFalse(
True,
'Failed to start pg_receivexlog: {0}'.format(
pg_receivexlog.communicate()[1]))
node.safe_psql(
"postgres",
"create table t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,1000000) i")
# FULL
self.backup_node(backup_dir, 'replica', replica, options=['--stream'])
node.safe_psql(
"postgres",
"insert into t_heap select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(1000000,2000000) i")
node_restored = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node_restored'))
node_restored.cleanup()
self.restore_node(
backup_dir, 'replica', node_restored,
node_restored.data_dir, options=['--recovery-target=latest', '--recovery-target-action=promote'])
self.set_auto_conf(node_restored, {'port': node_restored.port})
self.set_auto_conf(node_restored, {'hot_standby': 'off'})
# it will set node_restored as warm standby.
# with open(os.path.join(node_restored.data_dir, "standby.signal"), 'w') as f:
# f.flush()
# f.close()
node_restored.slow_start()
result = node.safe_psql(
"postgres",
"select sum(id) from t_heap")
result_new = node_restored.safe_psql(
"postgres",
"select sum(id) from t_heap")
self.assertEqual(result, result_new)
# Clean after yourself
pg_receivexlog.kill()
self.del_test_dir(module_name, fname)
def test_multi_timeline_recovery_prefetching(self):
""""""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={'autovacuum': 'off'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
self.backup_node(backup_dir, 'node', node)
node.pgbench_init(scale=50)
target_xid = node.safe_psql(
'postgres',
'select txid_current()').rstrip()
node.pgbench_init(scale=20)
node.stop()
node.cleanup()
self.restore_node(
backup_dir, 'node', node,
options=[
'--recovery-target-xid={0}'.format(target_xid),
'--recovery-target-action=promote'])
node.slow_start()
node.pgbench_init(scale=20)
target_xid = node.safe_psql(
'postgres',
'select txid_current()').rstrip()
node.stop(['-m', 'immediate', '-D', node.data_dir])
node.cleanup()
self.restore_node(
backup_dir, 'node', node,
options=[
# '--recovery-target-xid={0}'.format(target_xid),
'--recovery-target-timeline=2',
# '--recovery-target-action=promote',
'--no-validate'])
node.slow_start()
node.pgbench_init(scale=20)
result = node.safe_psql(
'postgres',
'select * from pgbench_accounts')
node.stop()
node.cleanup()
self.restore_node(
backup_dir, 'node', node,
options=[
# '--recovery-target-xid=100500',
'--recovery-target-timeline=3',
# '--recovery-target-action=promote',
'--no-validate'])
os.remove(os.path.join(node.logs_dir, 'postgresql.log'))
restore_command = self.get_restore_command(backup_dir, 'node', node)
restore_command += ' -j 2 --batch-size=10 --log-level-console=VERBOSE'
if node.major_version >= 12:
node.append_conf(
'probackup_recovery.conf', "restore_command = '{0}'".format(restore_command))
else:
node.append_conf(
'recovery.conf', "restore_command = '{0}'".format(restore_command))
node.slow_start()
result_new = node.safe_psql(
'postgres',
'select * from pgbench_accounts')
self.assertEqual(result, result_new)
with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f:
postgres_log_content = f.read()
# check that requesting of non-existing segment do not
# throwns aways prefetch
self.assertIn(
'pg_probackup archive-get failed to '
'deliver WAL file: 000000030000000000000006',
postgres_log_content)
self.assertIn(
'pg_probackup archive-get failed to '
'deliver WAL file: 000000020000000000000006',
postgres_log_content)
self.assertIn(
'pg_probackup archive-get used prefetched '
'WAL segment 000000010000000000000006, prefetch state: 5/10',
postgres_log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
def test_archive_get_batching_sanity(self):
"""
Make sure that batching works.
.gz file is corrupted and uncompressed is not, check that both
corruption detected and uncompressed file is used.
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={'autovacuum': 'off'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
self.backup_node(backup_dir, 'node', node, options=['--stream'])
node.pgbench_init(scale=50)
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(
backup_dir, 'node', replica, replica.data_dir)
self.set_replica(node, replica, log_shipping=True)
if node.major_version >= 12:
self.set_auto_conf(replica, {'restore_command': 'exit 1'})
else:
replica.append_conf('recovery.conf', "restore_command = 'exit 1'")
replica.slow_start(replica=True)
# at this point replica is consistent
restore_command = self.get_restore_command(backup_dir, 'node', replica)
restore_command += ' -j 2 --batch-size=10'
print(restore_command)
if node.major_version >= 12:
self.set_auto_conf(replica, {'restore_command': restore_command})
else:
replica.append_conf(
'recovery.conf', "restore_command = '{0}'".format(restore_command))
replica.restart()
sleep(5)
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
postgres_log_content = f.read()
self.assertIn(
'pg_probackup archive-get completed successfully, fetched: 10/10',
postgres_log_content)
self.assertIn('used prefetched WAL segment', postgres_log_content)
self.assertIn('prefetch state: 9/10', postgres_log_content)
self.assertIn('prefetch state: 8/10', postgres_log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
def test_archive_get_prefetch_corruption(self):
"""
Make sure that WAL corruption is detected.
And --prefetch-dir is honored.
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={'autovacuum': 'off', 'wal_keep_segments': '200'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
self.backup_node(backup_dir, 'node', node, options=['--stream'])
node.pgbench_init(scale=50)
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(
backup_dir, 'node', replica, replica.data_dir)
self.set_replica(node, replica, log_shipping=True)
if node.major_version >= 12:
self.set_auto_conf(replica, {'restore_command': 'exit 1'})
else:
replica.append_conf('recovery.conf', "restore_command = 'exit 1'")
replica.slow_start(replica=True)
# at this point replica is consistent
restore_command = self.get_restore_command(backup_dir, 'node', replica)
restore_command += ' -j 2 --batch-size=10 --log-level-console=VERBOSE'
#restore_command += ' --batch-size=2 --log-level-console=VERBOSE'
if node.major_version >= 12:
self.set_auto_conf(replica, {'restore_command': restore_command})
else:
replica.append_conf(
'recovery.conf', "restore_command = '{0}'".format(restore_command))
replica.restart()
sleep(5)
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
postgres_log_content = f.read()
self.assertIn(
'pg_probackup archive-get completed successfully, fetched: 10/10',
postgres_log_content)
self.assertIn('used prefetched WAL segment', postgres_log_content)
self.assertIn('prefetch state: 9/10', postgres_log_content)
self.assertIn('prefetch state: 8/10', postgres_log_content)
replica.stop()
# generate WAL, copy it into prefetch directory, then corrupt
# some segment
node.pgbench_init(scale=20)
sleep(10)
# now copy WAL files into prefetch directory and corrupt some of them
archive_dir = os.path.join(backup_dir, 'wal', 'node')
files = os.listdir(archive_dir)
files.sort()
for filename in [files[-4], files[-3], files[-2], files[-1]]:
src_file = os.path.join(archive_dir, filename)
if node.major_version >= 10:
wal_dir = 'pg_wal'
else:
wal_dir = 'pg_xlog'
if filename.endswith('.gz'):
dst_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename[:-3])
with gzip.open(src_file, 'rb') as f_in, open(dst_file, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
else:
dst_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename)
shutil.copyfile(src_file, dst_file)
print(dst_file)
# corrupt file
if files[-2].endswith('.gz'):
filename = files[-2][:-3]
else:
filename = files[-2]
prefetched_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename)
with open(prefetched_file, "rb+", 0) as f:
f.seek(8192*2)
f.write(b"SURIKEN")
f.flush()
f.close
# enable restore_command
restore_command = self.get_restore_command(backup_dir, 'node', replica)
restore_command += ' --batch-size=2 --log-level-console=VERBOSE'
if node.major_version >= 12:
self.set_auto_conf(replica, {'restore_command': restore_command})
else:
replica.append_conf(
'recovery.conf', "restore_command = '{0}'".format(restore_command))
os.remove(os.path.join(replica.logs_dir, 'postgresql.log'))
replica.slow_start(replica=True)
sleep(10)
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
postgres_log_content = f.read()
self.assertIn(
'Prefetched WAL segment {0} is invalid, cannot use it'.format(filename),
postgres_log_content)
self.assertIn(
'LOG: restored log file "{0}" from archive'.format(filename),
postgres_log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
# TODO test with multiple not archived segments.
# TODO corrupted file in archive.
# important - switchpoint may be NullOffset LSN and not actually existing in archive to boot.
# so write WAL validation code accordingly

View File

@ -228,10 +228,9 @@ class BackupTest(ProbackupTest, unittest.TestCase):
"without valid full backup.\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
"ERROR: Valid backup on current timeline 1 is not found. "
"Create new FULL backup before an incremental one.",
e.message,
self.assertTrue(
"WARNING: Valid backup on current timeline 1 is not found" in e.message and
"ERROR: Create new full backup before an incremental one" in e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
@ -488,6 +487,7 @@ class BackupTest(ProbackupTest, unittest.TestCase):
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
if self.ptrack and node.major_version > 11:
@ -499,18 +499,29 @@ class BackupTest(ProbackupTest, unittest.TestCase):
"postgres",
"create table t_heap as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,1000) i")
node.safe_psql(
"postgres",
"CHECKPOINT;")
"from generate_series(0,10000) i")
heap_path = node.safe_psql(
"postgres",
"select pg_relation_filepath('t_heap')").rstrip()
self.backup_node(
backup_dir, 'node', node,
backup_type="full", options=["-j", "4", "--stream"])
node.safe_psql(
"postgres",
"select count(*) from t_heap")
node.safe_psql(
"postgres",
"update t_heap set id = id + 10000")
node.stop()
with open(os.path.join(node.data_dir, heap_path), "rb+", 0) as f:
heap_fullpath = os.path.join(node.data_dir, heap_path)
with open(heap_fullpath, "rb+", 0) as f:
f.seek(9000)
f.write(b"bla")
f.flush()
@ -518,6 +529,10 @@ class BackupTest(ProbackupTest, unittest.TestCase):
node.slow_start()
# self.backup_node(
# backup_dir, 'node', node,
# backup_type="full", options=["-j", "4", "--stream"])
try:
self.backup_node(
backup_dir, 'node', node,
@ -525,11 +540,360 @@ class BackupTest(ProbackupTest, unittest.TestCase):
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because tablespace mapping is incorrect"
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
if self.ptrack:
self.assertIn(
'ERROR: Corruption detected in file "{0}", block 1: '
'page verification failed, calculated checksum'.format(
heap_fullpath),
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
sleep(1)
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="delta", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Corruption detected in file "{0}", block 1: '
'page verification failed, calculated checksum'.format(
heap_fullpath),
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
sleep(1)
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="page", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Corruption detected in file "{0}", block 1: '
'page verification failed, calculated checksum'.format(
heap_fullpath),
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
sleep(1)
if self.ptrack:
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="ptrack", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'WARNING: page verification failed, '
'calculated checksum' in e.message and
'ERROR: query failed: ERROR: '
'invalid page in block 1 of relation' in e.message and
'ERROR: Data files transferring failed' in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_backup_detect_invalid_block_header(self):
"""make node, corrupt some page, check that backup failed"""
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
ptrack_enable=self.ptrack,
initdb_params=['--data-checksums'])
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
if self.ptrack and node.major_version > 11:
node.safe_psql(
"postgres",
"create extension ptrack")
node.safe_psql(
"postgres",
"create table t_heap as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,10000) i")
heap_path = node.safe_psql(
"postgres",
"select pg_relation_filepath('t_heap')").rstrip()
self.backup_node(
backup_dir, 'node', node,
backup_type="full", options=["-j", "4", "--stream"])
node.safe_psql(
"postgres",
"select count(*) from t_heap")
node.safe_psql(
"postgres",
"update t_heap set id = id + 10000")
node.stop()
heap_fullpath = os.path.join(node.data_dir, heap_path)
with open(heap_fullpath, "rb+", 0) as f:
f.seek(8193)
f.write(b"blahblahblahblah")
f.flush()
f.close
node.slow_start()
# self.backup_node(
# backup_dir, 'node', node,
# backup_type="full", options=["-j", "4", "--stream"])
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="full", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Corruption detected in file "{0}", block 1: '
'page header invalid, pd_lower'.format(heap_fullpath),
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
sleep(1)
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="delta", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Corruption detected in file "{0}", block 1: '
'page header invalid, pd_lower'.format(heap_fullpath),
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
sleep(1)
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="page", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Corruption detected in file "{0}", block 1: '
'page header invalid, pd_lower'.format(heap_fullpath),
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
sleep(1)
if self.ptrack:
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="ptrack", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'WARNING: page verification failed, '
'calculated checksum' in e.message and
'ERROR: query failed: ERROR: '
'invalid page in block 1 of relation' in e.message and
'ERROR: Data files transferring failed' in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_backup_detect_missing_permissions(self):
"""make node, corrupt some page, check that backup failed"""
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
ptrack_enable=self.ptrack,
initdb_params=['--data-checksums'])
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
if self.ptrack and node.major_version > 11:
node.safe_psql(
"postgres",
"create extension ptrack")
node.safe_psql(
"postgres",
"create table t_heap as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,10000) i")
heap_path = node.safe_psql(
"postgres",
"select pg_relation_filepath('t_heap')").rstrip()
self.backup_node(
backup_dir, 'node', node,
backup_type="full", options=["-j", "4", "--stream"])
node.safe_psql(
"postgres",
"select count(*) from t_heap")
node.safe_psql(
"postgres",
"update t_heap set id = id + 10000")
node.stop()
heap_fullpath = os.path.join(node.data_dir, heap_path)
with open(heap_fullpath, "rb+", 0) as f:
f.seek(8193)
f.write(b"blahblahblahblah")
f.flush()
f.close
node.slow_start()
# self.backup_node(
# backup_dir, 'node', node,
# backup_type="full", options=["-j", "4", "--stream"])
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="full", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Corruption detected in file "{0}", block 1: '
'page header invalid, pd_lower'.format(heap_fullpath),
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
sleep(1)
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="delta", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Corruption detected in file "{0}", block 1: '
'page header invalid, pd_lower'.format(heap_fullpath),
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
sleep(1)
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="page", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Corruption detected in file "{0}", block 1: '
'page header invalid, pd_lower'.format(heap_fullpath),
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
sleep(1)
if self.ptrack:
try:
self.backup_node(
backup_dir, 'node', node,
backup_type="ptrack", options=["-j", "4", "--stream"])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of block corruption"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'WARNING: page verification failed, '
'calculated checksum' in e.message and
@ -538,24 +902,6 @@ class BackupTest(ProbackupTest, unittest.TestCase):
'ERROR: Data files transferring failed' in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
else:
if self.remote:
self.assertTrue(
"ERROR: Failed to read file" in e.message and
"data file checksum mismatch" in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
else:
self.assertIn(
'WARNING: Corruption detected in file',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
self.assertIn(
'ERROR: Data file corruption',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
@ -1947,10 +2293,9 @@ class BackupTest(ProbackupTest, unittest.TestCase):
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Valid backup on current timeline 1 is not found. '
'Create new FULL backup before an incremental one.',
e.message,
self.assertTrue(
'WARNING: Valid backup on current timeline 1 is not found' in e.message and
'ERROR: Create new full backup before an incremental one' in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
@ -1977,10 +2322,13 @@ class BackupTest(ProbackupTest, unittest.TestCase):
initdb_params=['--data-checksums'],
pg_options={
'archive_timeout': '30s',
'checkpoint_timeout': '1h'})
'archive_mode': 'always',
'checkpoint_timeout': '60s',
'wal_level': 'logical'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_config(backup_dir, 'node', options=['--archive-timeout=60s'])
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
@ -2100,12 +2448,15 @@ class BackupTest(ProbackupTest, unittest.TestCase):
self.restore_node(backup_dir, 'node', replica)
self.set_replica(node, replica)
self.add_instance(backup_dir, 'replica', replica)
self.set_config(
backup_dir, 'replica',
options=['--archive-timeout=120s', '--log-level-console=LOG'])
self.set_archiving(backup_dir, 'replica', replica, replica=True)
self.set_auto_conf(replica, {'hot_standby': 'on'})
# freeze bgwriter to get rid of RUNNING XACTS records
bgwriter_pid = node.auxiliary_pids[ProcessType.BackgroundWriter][0]
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
# bgwriter_pid = node.auxiliary_pids[ProcessType.BackgroundWriter][0]
# gdb_checkpointer = self.gdb_attach(bgwriter_pid)
copy_tree(
os.path.join(backup_dir, 'wal', 'node'),
@ -2113,21 +2464,22 @@ class BackupTest(ProbackupTest, unittest.TestCase):
replica.slow_start(replica=True)
self.switch_wal_segment(node)
self.switch_wal_segment(node)
# self.switch_wal_segment(node)
# self.switch_wal_segment(node)
# FULL backup from replica
self.backup_node(
backup_dir, 'replica', replica,
datname='backupdb', options=['--stream', '-U', 'backup', '--archive-timeout=30s'])
datname='backupdb', options=['-U', 'backup'])
# stream full backup from replica
self.backup_node(
backup_dir, 'replica', replica,
datname='backupdb', options=['--stream', '-U', 'backup'])
# self.switch_wal_segment(node)
self.backup_node(
backup_dir, 'replica', replica, datname='backupdb',
options=['-U', 'backup', '--archive-timeout=300s'])
# PAGE backup from replica
self.switch_wal_segment(node)
self.backup_node(
backup_dir, 'replica', replica, backup_type='page',
datname='backupdb', options=['-U', 'backup', '--archive-timeout=30s'])
@ -2137,20 +2489,22 @@ class BackupTest(ProbackupTest, unittest.TestCase):
datname='backupdb', options=['--stream', '-U', 'backup'])
# DELTA backup from replica
self.switch_wal_segment(node)
self.backup_node(
backup_dir, 'replica', replica, backup_type='delta',
datname='backupdb', options=['-U', 'backup', '--archive-timeout=30s'])
datname='backupdb', options=['-U', 'backup'])
self.backup_node(
backup_dir, 'replica', replica, backup_type='delta',
datname='backupdb', options=['--stream', '-U', 'backup'])
# PTRACK backup from replica
if self.ptrack:
self.switch_wal_segment(node)
self.backup_node(
backup_dir, 'replica', replica, backup_type='delta',
datname='backupdb', options=['-U', 'backup', '--archive-timeout=30s'])
backup_dir, 'replica', replica, backup_type='ptrack',
datname='backupdb', options=['-U', 'backup'])
self.backup_node(
backup_dir, 'replica', replica, backup_type='delta',
backup_dir, 'replica', replica, backup_type='ptrack',
datname='backupdb', options=['--stream', '-U', 'backup'])
# Clean after yourself
@ -2356,91 +2710,31 @@ class BackupTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_streaming_timeout(self):
def test_note_sanity(self):
"""
Illustrate the problem of loosing exact error
message because our WAL streaming engine is "borrowed"
from pg_receivexlog
test that adding note to backup works as expected
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '1h',
'wal_sender_timeout': '5s'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
# FULL backup
gdb = self.backup_node(
backup_dir, 'node', node, gdb=True,
options=['--stream', '--log-level-file=LOG'])
gdb.set_breakpoint('pg_stop_backup')
gdb.run_until_break()
sleep(10)
gdb.continue_execution_until_error()
gdb._execute('detach')
sleep(2)
log_file_path = os.path.join(backup_dir, 'log', 'pg_probackup.log')
with open(log_file_path) as f:
log_content = f.read()
self.assertIn(
'could not receive data from WAL stream',
log_content)
self.assertIn(
'ERROR: Problem in receivexlog',
log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
def test_note(self):
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
initdb_params=['--data-checksums'])
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
# FULL backup
self.backup_node(backup_dir, 'node', node, options=['--stream', '--log-level-file=LOG', '--note=test_note'])
self.backup_node(
backup_dir, 'node', node,
options=['--stream', '--log-level-file=LOG', '--note=test_note'])
show_backups = self.show_pb(backup_dir, 'node')
# self.assertEqual(len(show_backups), 1)
# print(self.show_pb(backup_dir, as_text=True, as_json=True))
show_backups = self.show_pb(backup_dir, 'node')
self.assertEqual(show_backups[0]['note'], "test_note")
print(self.show_pb(backup_dir, as_text=True, as_json=True))
self.assertEqual(show_backups[0]['note'], "test_note")
# Clean after yourself
self.del_test_dir(module_name, fname)

View File

@ -765,3 +765,83 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_page_vacuum_truncate_compression(self):
"""
make node, create table, take full backup,
delete all data, vacuum relation,
take page backup, insert some data,
take second page backup,
restore latest page backup using new binary
and check data correctness
old binary should be 2.2.x version
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={'autovacuum': 'off'})
self.init_pb(backup_dir, old_binary=True)
self.add_instance(backup_dir, 'node', node, old_binary=True)
self.set_archiving(backup_dir, 'node', node, old_binary=True)
node.slow_start()
node.safe_psql(
"postgres",
"create sequence t_seq; "
"create table t_heap as select i as id, "
"md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,1024) i")
node.safe_psql(
"postgres",
"vacuum t_heap")
self.backup_node(
backup_dir, 'node',node, old_binary=True, options=['--compress'])
node.safe_psql(
"postgres",
"delete from t_heap")
node.safe_psql(
"postgres",
"vacuum t_heap")
self.backup_node(
backup_dir, 'node', node, backup_type='page',
old_binary=True, options=['--compress'])
node.safe_psql(
"postgres",
"insert into t_heap select i as id, "
"md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,1) i")
self.backup_node(
backup_dir, 'node', node, backup_type='page',
old_binary=True, options=['--compress'])
pgdata = self.pgdata_content(node.data_dir)
node_restored = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node_restored'))
node_restored.cleanup()
self.restore_node(backup_dir, 'node', node_restored)
# Physical comparison
pgdata_restored = self.pgdata_content(node_restored.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
self.set_auto_conf(node_restored, {'port': node_restored.port})
node_restored.slow_start()
# Clean after yourself
self.del_test_dir(module_name, fname)

View File

@ -801,3 +801,75 @@ class DeleteTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
def test_delete_error_backups(self):
"""delete increment and all after him"""
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
initdb_params=['--data-checksums'])
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
# full backup mode
self.backup_node(backup_dir, 'node', node)
# page backup mode
self.backup_node(backup_dir, 'node', node, backup_type="page")
# Take FULL BACKUP
backup_id_a = self.backup_node(backup_dir, 'node', node)
# Take PAGE BACKUP
backup_id_b = self.backup_node(backup_dir, 'node', node, backup_type="page")
backup_id_c = self.backup_node(backup_dir, 'node', node, backup_type="page")
backup_id_d = self.backup_node(backup_dir, 'node', node, backup_type="page")
# full backup mode
self.backup_node(backup_dir, 'node', node)
self.backup_node(backup_dir, 'node', node, backup_type="page")
backup_id_e = self.backup_node(backup_dir, 'node', node, backup_type="page")
self.backup_node(backup_dir, 'node', node, backup_type="page")
# Change status to ERROR
self.change_backup_status(backup_dir, 'node', backup_id_a, 'ERROR')
self.change_backup_status(backup_dir, 'node', backup_id_c, 'ERROR')
self.change_backup_status(backup_dir, 'node', backup_id_e, 'ERROR')
print(self.show_pb(backup_dir, as_text=True, as_json=False))
show_backups = self.show_pb(backup_dir, 'node')
self.assertEqual(len(show_backups), 10)
# delete error backups
output = self.delete_pb(backup_dir, 'node', options=['--status=ERROR', '--dry-run'])
show_backups = self.show_pb(backup_dir, 'node')
self.assertEqual(len(show_backups), 10)
self.assertIn(
"Deleting all backups with status 'ERROR' in dry run mode",
output)
self.assertIn(
"INFO: Backup {0} with status OK can be deleted".format(backup_id_d),
output)
print(self.show_pb(backup_dir, as_text=True, as_json=False))
show_backups = self.show_pb(backup_dir, 'node')
output = self.delete_pb(backup_dir, 'node', options=['--status=ERROR'])
print(output)
show_backups = self.show_pb(backup_dir, 'node')
self.assertEqual(len(show_backups), 4)
self.assertEqual(show_backups[0]['status'], "OK")
self.assertEqual(show_backups[1]['status'], "OK")
self.assertEqual(show_backups[2]['status'], "OK")
self.assertEqual(show_backups[3]['status'], "OK")
# Clean after yourself
self.del_test_dir(module_name, fname)

View File

@ -294,3 +294,257 @@ class FalsePositive(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
@unittest.expectedFailure
def test_pg_10_waldir(self):
"""
test group access for PG >= 11
"""
if self.pg_config_version < self.version_to_num('10.0'):
return unittest.skip('You need PostgreSQL >= 10 for this test')
fname = self.id().split('.')[3]
wal_dir = os.path.join(
os.path.join(self.tmp_path, module_name, fname), 'wal_dir')
shutil.rmtree(wal_dir, ignore_errors=True)
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=[
'--data-checksums',
'--waldir={0}'.format(wal_dir)])
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
# take FULL backup
self.backup_node(
backup_dir, 'node', node, options=['--stream'])
pgdata = self.pgdata_content(node.data_dir)
# restore backup
node_restored = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node_restored'))
node_restored.cleanup()
self.restore_node(
backup_dir, 'node', node_restored)
# compare pgdata permissions
pgdata_restored = self.pgdata_content(node_restored.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
self.assertTrue(
os.path.islink(os.path.join(node_restored.data_dir, 'pg_wal')),
'pg_wal should be symlink')
# Clean after yourself
self.del_test_dir(module_name, fname)
@unittest.expectedFailure
# @unittest.skip("skip")
def test_recovery_target_time_backup_victim(self):
"""
Check that for validation to recovery target
probackup chooses valid backup
https://github.com/postgrespro/pg_probackup/issues/104
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
# FULL backup
self.backup_node(backup_dir, 'node', node)
node.safe_psql(
"postgres",
"create table t_heap as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,10000) i")
target_time = node.safe_psql(
"postgres",
"select now()").rstrip()
node.safe_psql(
"postgres",
"create table t_heap1 as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,100) i")
gdb = self.backup_node(backup_dir, 'node', node, gdb=True)
gdb.set_breakpoint('pg_stop_backup')
gdb.run_until_break()
gdb.remove_all_breakpoints()
gdb._execute('signal SIGINT')
gdb.continue_execution_until_error()
backup_id = self.show_pb(backup_dir, 'node')[1]['id']
self.assertEqual(
'ERROR',
self.show_pb(backup_dir, 'node', backup_id)['status'],
'Backup STATUS should be "ERROR"')
self.validate_pb(
backup_dir, 'node',
options=['--recovery-target-time={0}'.format(target_time)])
# Clean after yourself
self.del_test_dir(module_name, fname)
@unittest.expectedFailure
# @unittest.skip("skip")
def test_recovery_target_lsn_backup_victim(self):
"""
Check that for validation to recovery target
probackup chooses valid backup
https://github.com/postgrespro/pg_probackup/issues/104
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
# FULL backup
self.backup_node(backup_dir, 'node', node)
node.safe_psql(
"postgres",
"create table t_heap as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,10000) i")
node.safe_psql(
"postgres",
"create table t_heap1 as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,100) i")
gdb = self.backup_node(
backup_dir, 'node', node,
options=['--log-level-console=LOG'], gdb=True)
gdb.set_breakpoint('pg_stop_backup')
gdb.run_until_break()
gdb.remove_all_breakpoints()
gdb._execute('signal SIGINT')
gdb.continue_execution_until_error()
backup_id = self.show_pb(backup_dir, 'node')[1]['id']
self.assertEqual(
'ERROR',
self.show_pb(backup_dir, 'node', backup_id)['status'],
'Backup STATUS should be "ERROR"')
self.switch_wal_segment(node)
target_lsn = self.show_pb(backup_dir, 'node', backup_id)['start-lsn']
self.validate_pb(
backup_dir, 'node',
options=['--recovery-target-lsn={0}'.format(target_lsn)])
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
@unittest.expectedFailure
def test_streaming_timeout(self):
"""
Illustrate the problem of loosing exact error
message because our WAL streaming engine is "borrowed"
from pg_receivexlog
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '1h',
'wal_sender_timeout': '5s'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
# FULL backup
gdb = self.backup_node(
backup_dir, 'node', node, gdb=True,
options=['--stream', '--log-level-file=LOG'])
gdb.set_breakpoint('pg_stop_backup')
gdb.run_until_break()
sleep(10)
gdb.continue_execution_until_error()
gdb._execute('detach')
sleep(2)
log_file_path = os.path.join(backup_dir, 'log', 'pg_probackup.log')
with open(log_file_path) as f:
log_content = f.read()
self.assertIn(
'could not receive data from WAL stream',
log_content)
self.assertIn(
'ERROR: Problem in receivexlog',
log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
@unittest.expectedFailure
def test_validate_all_empty_catalog(self):
"""
"""
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
initdb_params=['--data-checksums'])
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
try:
self.validate_pb(backup_dir)
self.assertEqual(
1, 0,
"Expecting Error because backup_dir is empty.\n "
"Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: This backup catalog contains no backup instances',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)

View File

@ -339,7 +339,7 @@ class ProbackupTest(object):
options['wal_level'] = 'logical'
options['hot_standby'] = 'off'
options['log_line_prefix'] = '"%t [%p]: [%l-1] "'
options['log_line_prefix'] = '%t [%p]: [%l-1] '
options['log_statement'] = 'none'
options['log_duration'] = 'on'
options['log_min_duration_statement'] = 0
@ -1131,7 +1131,8 @@ class ProbackupTest(object):
def set_archiving(
self, backup_dir, instance, node, replica=False,
overwrite=False, compress=False, old_binary=False):
overwrite=False, compress=False, old_binary=False,
log_level=False, archive_timeout=False):
# parse postgresql.auto.conf
options = {}
@ -1161,12 +1162,26 @@ class ProbackupTest(object):
if overwrite:
options['archive_command'] += '--overwrite '
options['archive_command'] += '--log-level-console=verbose '
options['archive_command'] += '-j 5 '
options['archive_command'] += '--batch-size 10 '
options['archive_command'] += '--no-sync '
if archive_timeout:
options['archive_command'] += '--archive-timeout={0} '.format(
archive_timeout)
if os.name == 'posix':
options['archive_command'] += '--wal-file-path=%p --wal-file-name=%f'
elif os.name == 'nt':
options['archive_command'] += '--wal-file-path="%p" --wal-file-name="%f"'
if log_level:
options['archive_command'] += ' --log-level-console={0}'.format(log_level)
options['archive_command'] += ' --log-level-file={0} '.format(log_level)
self.set_auto_conf(node, options)
def get_restore_command(self, backup_dir, instance, node):
@ -1244,7 +1259,8 @@ class ProbackupTest(object):
def set_replica(
self, master, replica,
replica_name='replica',
synchronous=False
synchronous=False,
log_shipping=False
):
self.set_auto_conf(
@ -1264,19 +1280,22 @@ class ProbackupTest(object):
if os.stat(probackup_recovery_path).st_size > 0:
config = 'probackup_recovery.conf'
self.set_auto_conf(
replica,
{'primary_conninfo': 'user={0} port={1} application_name={2} '
' sslmode=prefer sslcompression=1'.format(
self.user, master.port, replica_name)},
config)
if not log_shipping:
self.set_auto_conf(
replica,
{'primary_conninfo': 'user={0} port={1} application_name={2} '
' sslmode=prefer sslcompression=1'.format(
self.user, master.port, replica_name)},
config)
else:
replica.append_conf('recovery.conf', 'standby_mode = on')
replica.append_conf(
'recovery.conf',
"primary_conninfo = 'user={0} port={1} application_name={2}"
" sslmode=prefer sslcompression=1'".format(
self.user, master.port, replica_name))
if not log_shipping:
replica.append_conf(
'recovery.conf',
"primary_conninfo = 'user={0} port={1} application_name={2}"
" sslmode=prefer sslcompression=1'".format(
self.user, master.port, replica_name))
if synchronous:
self.set_auto_conf(

View File

@ -391,13 +391,12 @@ class PageTest(ProbackupTest, unittest.TestCase):
# PGBENCH STUFF
pgbench = node.pgbench(options=['-T', '50', '-c', '1', '--no-vacuum'])
pgbench.wait()
node.safe_psql("postgres", "checkpoint")
# GET LOGICAL CONTENT FROM NODE
result = node.safe_psql("postgres", "select * from pgbench_accounts")
# PAGE BACKUP
self.backup_node(
backup_dir, 'node', node, backup_type='page')
self.backup_node(backup_dir, 'node', node, backup_type='page')
# GET PHYSICAL CONTENT FROM NODE
pgdata = self.pgdata_content(node.data_dir)
@ -464,18 +463,15 @@ class PageTest(ProbackupTest, unittest.TestCase):
"postgres",
"create table t_heap tablespace somedata as select i as id,"
" md5(i::text) as text, md5(i::text)::tsvector as tsvector"
" from generate_series(0,100) i"
)
" from generate_series(0,100) i")
node.safe_psql(
"postgres",
"delete from t_heap"
)
"delete from t_heap")
node.safe_psql(
"postgres",
"vacuum t_heap"
)
"vacuum t_heap")
# PAGE BACKUP
self.backup_node(
@ -485,8 +481,7 @@ class PageTest(ProbackupTest, unittest.TestCase):
# RESTORE
node_restored = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node_restored')
)
base_dir=os.path.join(module_name, fname, 'node_restored'))
node_restored.cleanup()
self.restore_node(
@ -824,7 +819,7 @@ class PageTest(ProbackupTest, unittest.TestCase):
self.backup_node(backup_dir, 'node', node)
# make some wals
node.pgbench_init(scale=4)
node.pgbench_init(scale=10)
# delete last wal segment
wals_dir = os.path.join(backup_dir, 'wal', 'node')
@ -879,7 +874,6 @@ class PageTest(ProbackupTest, unittest.TestCase):
'INFO: Wait for WAL segment' in e.message and
'to be archived' in e.message and
'Could not read WAL record at' in e.message and
'incorrect resource manager data checksum in record at' in e.message and
'Possible WAL corruption. Error has occured during reading WAL segment' in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
@ -904,7 +898,6 @@ class PageTest(ProbackupTest, unittest.TestCase):
'INFO: Wait for WAL segment' in e.message and
'to be archived' in e.message and
'Could not read WAL record at' in e.message and
'incorrect resource manager data checksum in record at' in e.message and
'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format(
file) in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
@ -947,8 +940,10 @@ class PageTest(ProbackupTest, unittest.TestCase):
self.set_archiving(backup_dir, 'alien_node', alien_node)
alien_node.slow_start()
self.backup_node(backup_dir, 'node', node)
self.backup_node(backup_dir, 'alien_node', alien_node)
self.backup_node(
backup_dir, 'node', node, options=['--stream'])
self.backup_node(
backup_dir, 'alien_node', alien_node, options=['--stream'])
# make some wals
node.safe_psql(
@ -1001,8 +996,6 @@ class PageTest(ProbackupTest, unittest.TestCase):
'INFO: Wait for WAL segment' in e.message and
'to be archived' in e.message and
'Could not read WAL record at' in e.message and
'WAL file is from different database system: WAL file database system identifier is' in e.message and
'pg_control database system identifier is' in e.message and
'Possible WAL corruption. Error has occured during reading WAL segment' in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
@ -1122,8 +1115,7 @@ class PageTest(ProbackupTest, unittest.TestCase):
# RESTORE
node_restored = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node_restored')
)
base_dir=os.path.join(module_name, fname, 'node_restored'))
node_restored.cleanup()
self.restore_node(
@ -1187,6 +1179,85 @@ class PageTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
# @unittest.expectedFailure
def test_multi_timeline_page(self):
"""
Check that backup in PAGE mode choose
parent backup correctly:
t12 /---P-->
...
t3 /---->
t2 /---->
t1 -F-----D->
P must have F as parent
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={'autovacuum': 'off'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
node.pgbench_init(scale=50)
full_id = self.backup_node(backup_dir, 'node', node)
pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum'])
pgbench.wait()
self.backup_node(backup_dir, 'node', node, backup_type='delta')
node.cleanup()
self.restore_node(
backup_dir, 'node', node, backup_id=full_id,
options=[
'--recovery-target=immediate',
'--recovery-target-action=promote'])
node.slow_start()
pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum'])
pgbench.wait()
# create timelines
for i in range(2, 12):
node.cleanup()
self.restore_node(
backup_dir, 'node', node, backup_id=full_id,
options=['--recovery-target-timeline={0}'.format(i)])
node.slow_start()
pgbench = node.pgbench(options=['-T', '3', '-c', '1', '--no-vacuum'])
pgbench.wait()
page_id = self.backup_node(
backup_dir, 'node', node, backup_type='page',
options=['--log-level-file=VERBOSE'])
pgdata = self.pgdata_content(node.data_dir)
node.cleanup()
self.restore_node(backup_dir, 'node', node)
pgdata_restored = self.pgdata_content(node.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
show = self.show_archive(backup_dir)
timelines = show[0]['timelines']
# self.assertEqual()
self.assertEqual(
self.show_pb(backup_dir, 'node', page_id)['parent-backup-id'],
full_id)
# Clean after yourself
self.del_test_dir(module_name, fname)
@unittest.skip("skip")
# @unittest.expectedFailure
def test_page_pg_resetxlog(self):

View File

@ -43,7 +43,7 @@ class CheckSystemID(ProbackupTest, unittest.TestCase):
"Output: {0} \n CMD: {1}".format(repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'ERROR: could not open file' in e.message and
'ERROR: Could not open file' in e.message and
'pg_control' in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))

View File

@ -3,10 +3,10 @@ import unittest
from .helpers.ptrack_helpers import ProbackupTest, ProbackupException, idx_ptrack
from datetime import datetime, timedelta
import subprocess
from testgres import QueryException
from testgres import QueryException, StartNodeException
import shutil
import sys
import time
from time import sleep
from threading import Thread
@ -210,46 +210,36 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
"GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;"
)
if self.ptrack:
fnames = []
if node.major_version < 12:
fnames += [
'pg_catalog.oideq(oid, oid)',
'pg_catalog.ptrack_version()',
'pg_catalog.pg_ptrack_clear()',
'pg_catalog.pg_ptrack_control_lsn()',
'pg_catalog.pg_ptrack_get_and_clear_db(oid, oid)',
'pg_catalog.pg_ptrack_get_and_clear(oid, oid)',
'pg_catalog.pg_ptrack_get_block_2(oid, oid, oid, bigint)'
]
else:
# TODO why backup works without these grants ?
# fnames += [
# 'pg_ptrack_get_pagemapset(pg_lsn)',
# 'pg_ptrack_control_lsn()',
# 'pg_ptrack_get_block(oid, oid, oid, bigint)'
# ]
node.safe_psql(
"backupdb",
"CREATE SCHEMA ptrack")
node.safe_psql(
"backupdb",
"CREATE EXTENSION ptrack WITH SCHEMA ptrack")
node.safe_psql(
"backupdb",
"GRANT USAGE ON SCHEMA ptrack TO backup")
if node.major_version < 12:
fnames = [
'pg_catalog.oideq(oid, oid)',
'pg_catalog.ptrack_version()',
'pg_catalog.pg_ptrack_clear()',
'pg_catalog.pg_ptrack_control_lsn()',
'pg_catalog.pg_ptrack_get_and_clear_db(oid, oid)',
'pg_catalog.pg_ptrack_get_and_clear(oid, oid)',
'pg_catalog.pg_ptrack_get_block_2(oid, oid, oid, bigint)'
]
for fname in fnames:
node.safe_psql(
"backupdb",
"GRANT EXECUTE ON FUNCTION {0} TO backup".format(fname))
else:
node.safe_psql(
"backupdb",
"GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup")
"CREATE SCHEMA ptrack")
node.safe_psql(
"backupdb",
"CREATE EXTENSION ptrack WITH SCHEMA ptrack")
node.safe_psql(
"backupdb",
"GRANT USAGE ON SCHEMA ptrack TO backup")
node.safe_psql(
"backupdb",
"GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup")
if ProbackupTest.enterprise:
node.safe_psql(
@ -3848,7 +3838,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
# @unittest.expectedFailure
@unittest.expectedFailure
def test_ptrack_pg_resetxlog(self):
fname = self.id().split('.')[3]
node = self.make_simple_node(
@ -4016,14 +4006,17 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
node.stop(['-m', 'immediate', '-D', node.data_dir])
ptrack_map = os.path.join(node.data_dir, 'global', 'ptrack.map')
ptrack_map_mmap = os.path.join(node.data_dir, 'global', 'ptrack.map.mmap')
# Let`s do index corruption. ptrack.map, ptrack.map.mmap
with open(os.path.join(node.data_dir, 'global', 'ptrack.map'), "rb+", 0) as f:
with open(ptrack_map, "rb+", 0) as f:
f.seek(42)
f.write(b"blablahblahs")
f.flush()
f.close
with open(os.path.join(node.data_dir, 'global', 'ptrack.map.mmap'), "rb+", 0) as f:
with open(ptrack_map_mmap, "rb+", 0) as f:
f.seek(42)
f.write(b"blablahblahs")
f.flush()
@ -4031,13 +4024,97 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
# os.remove(os.path.join(node.logs_dir, node.pg_log_name))
try:
node.slow_start()
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because ptrack.map is corrupted"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except StartNodeException as e:
self.assertIn(
'Cannot start node',
e.message,
'\n Unexpected Error Message: {0}\n'
' CMD: {1}'.format(repr(e.message), self.cmd))
log_file = os.path.join(node.logs_dir, 'postgresql.log')
with open(log_file, 'r') as f:
log_content = f.read()
self.assertIn(
'FATAL: incorrect checksum of file "{0}"'.format(ptrack_map),
log_content)
self.set_auto_conf(node, {'ptrack_map_size': '0'})
node.slow_start()
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='ptrack', options=['--stream'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because instance ptrack is disabled"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Ptrack is disabled',
e.message,
'\n Unexpected Error Message: {0}\n'
' CMD: {1}'.format(repr(e.message), self.cmd))
node.safe_psql(
'postgres',
"update t_heap set id = nextval('t_seq'), text = md5(text), "
"tsvector = md5(repeat(tsvector::text, 10))::tsvector")
node.stop(['-m', 'immediate', '-D', node.data_dir])
self.set_auto_conf(node, {'ptrack_map_size': '32'})
node.slow_start()
sleep(1)
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='ptrack', options=['--stream'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because ptrack map is from future"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: LSN from ptrack_control',
e.message,
'\n Unexpected Error Message: {0}\n'
' CMD: {1}'.format(repr(e.message), self.cmd))
sleep(1)
self.backup_node(
backup_dir, 'node', node,
backup_type='delta', options=['--stream'])
node.safe_psql(
'postgres',
"update t_heap set id = nextval('t_seq'), text = md5(text), "
"tsvector = md5(repeat(tsvector::text, 10))::tsvector")
self.backup_node(
backup_dir, 'node', node,
backup_type='ptrack', options=['--stream'])
pgdata = self.pgdata_content(node.data_dir)
node.cleanup()
self.restore_node(backup_dir, 'node', node)

View File

@ -24,22 +24,27 @@ class RemoteTest(ProbackupTest, unittest.TestCase):
self.add_instance(backup_dir, 'node', node)
node.slow_start()
try:
self.backup_node(
backup_dir, 'node',
node, options=['--remote-proto=ssh', '--stream'], no_remote=True)
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because remote-host option is missing."
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
"Insert correct error",
e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
output = self.backup_node(
backup_dir, 'node', node,
options=['--stream'], no_remote=True, return_id=False)
self.assertIn('remote: false', output)
# try:
# self.backup_node(
# backup_dir, 'node',
# node, options=['--remote-proto=ssh', '--stream'], no_remote=True)
# # we should die here because exception is what we expect to happen
# self.assertEqual(
# 1, 0,
# "Expecting Error because remote-host option is missing."
# "\n Output: {0} \n CMD: {1}".format(
# repr(self.output), self.cmd))
# except ProbackupException as e:
# self.assertIn(
# "Insert correct error",
# e.message,
# "\n Unexpected Error Message: {0}\n CMD: {1}".format(
# repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)

View File

@ -571,30 +571,25 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
'Skipped because backup from replica is not supported in PG 9.5')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
self.add_instance(backup_dir, 'node', master)
self.set_archiving(backup_dir, 'node', master)
master.slow_start()
# freeze bgwriter to get rid of RUNNING XACTS records
bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0]
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
self.backup_node(backup_dir, 'master', master)
self.backup_node(backup_dir, 'node', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
self.restore_node(backup_dir, 'node', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
self.set_archiving(backup_dir, 'node', replica, replica=True)
replica.slow_start(replica=True)
@ -602,7 +597,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
self.switch_wal_segment(master)
output = self.backup_node(
backup_dir, 'replica', replica,
backup_dir, 'node', replica, replica.data_dir,
options=[
'--archive-timeout=30',
'--log-level-console=LOG',
@ -611,24 +606,24 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
return_id=False)
self.assertIn(
'LOG: Null offset in stop_backup_lsn value 0/3000000',
'LOG: Null offset in stop_backup_lsn value 0/4000000',
output)
self.assertIn(
'WARNING: WAL segment 000000010000000000000003 could not be streamed in 30 seconds',
'WARNING: WAL segment 000000010000000000000004 could not be streamed in 30 seconds',
output)
self.assertIn(
'WARNING: Failed to get next WAL record after 0/3000000, looking for previous WAL record',
'WARNING: Failed to get next WAL record after 0/4000000, looking for previous WAL record',
output)
self.assertIn(
'LOG: Looking for LSN 0/3000000 in segment: 000000010000000000000002',
'LOG: Looking for LSN 0/4000000 in segment: 000000010000000000000003',
output)
self.assertIn(
'has endpoint 0/3000000 which is '
'equal or greater than requested LSN 0/3000000',
'has endpoint 0/4000000 which is '
'equal or greater than requested LSN 0/4000000',
output)
self.assertIn(
@ -719,19 +714,19 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
log_content = f.read()
self.assertIn(
'LOG: Null offset in stop_backup_lsn value 0/3000000',
'LOG: Null offset in stop_backup_lsn value 0/4000000',
log_content)
self.assertIn(
'LOG: Looking for segment: 000000010000000000000003',
'LOG: Looking for segment: 000000010000000000000004',
log_content)
self.assertIn(
'LOG: First record in WAL segment "000000010000000000000003": 0/3000028',
'LOG: First record in WAL segment "000000010000000000000004": 0/4000028',
log_content)
self.assertIn(
'LOG: current.stop_lsn: 0/3000028',
'LOG: current.stop_lsn: 0/4000028',
log_content)
# Clean after yourself
@ -757,31 +752,26 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
'Skipped because backup from replica is not supported in PG 9.5')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
self.add_instance(backup_dir, 'node', master)
self.set_archiving(backup_dir, 'node', master)
master.slow_start()
self.backup_node(backup_dir, 'master', master)
self.backup_node(backup_dir, 'node', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
self.restore_node(backup_dir, 'node', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
self.set_archiving(backup_dir, 'node', replica, replica=True)
# freeze bgwriter to get rid of RUNNING XACTS records
bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0]
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
replica.slow_start(replica=True)
self.switch_wal_segment(master)
@ -789,7 +779,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
# take backup from replica
output = self.backup_node(
backup_dir, 'replica', replica,
backup_dir, 'node', replica, replica.data_dir,
options=[
'--archive-timeout=30',
'--log-level-console=LOG',
@ -797,24 +787,24 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
return_id=False)
self.assertIn(
'LOG: Null offset in stop_backup_lsn value 0/3000000',
'LOG: Null offset in stop_backup_lsn value 0/4000000',
output)
self.assertIn(
'WARNING: WAL segment 000000010000000000000003 could not be archived in 30 seconds',
'WARNING: WAL segment 000000010000000000000004 could not be archived in 30 seconds',
output)
self.assertIn(
'WARNING: Failed to get next WAL record after 0/3000000, looking for previous WAL record',
'WARNING: Failed to get next WAL record after 0/4000000, looking for previous WAL record',
output)
self.assertIn(
'LOG: Looking for LSN 0/3000000 in segment: 000000010000000000000002',
'LOG: Looking for LSN 0/4000000 in segment: 000000010000000000000003',
output)
self.assertIn(
'has endpoint 0/3000000 which is '
'equal or greater than requested LSN 0/3000000',
'has endpoint 0/4000000 which is '
'equal or greater than requested LSN 0/4000000',
output)
self.assertIn(
@ -846,44 +836,39 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
'Skipped because backup from replica is not supported in PG 9.5')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
self.add_instance(backup_dir, 'node', master)
self.set_archiving(backup_dir, 'node', master)
master.slow_start()
self.backup_node(backup_dir, 'master', master)
self.backup_node(backup_dir, 'node', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
self.restore_node(backup_dir, 'node', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
self.set_archiving(backup_dir, 'node', replica, replica=True)
replica.slow_start(replica=True)
# take backup from replica
self.backup_node(
backup_dir, 'replica', replica,
backup_dir, 'node', replica, replica.data_dir,
options=[
'--archive-timeout=30',
'--log-level-console=verbose',
'--log-level-console=LOG',
'--no-validate'],
return_id=False)
try:
self.backup_node(
backup_dir, 'replica', replica,
backup_dir, 'node', replica, replica.data_dir,
options=[
'--archive-timeout=30',
'--log-level-console=verbose',
'--log-level-console=LOG',
'--no-validate'])
# we should die here because exception is what we expect to happen
self.assertEqual(
@ -893,19 +878,19 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'LOG: Looking for LSN 0/3000060 in segment: 000000010000000000000003',
'LOG: Looking for LSN 0/4000060 in segment: 000000010000000000000004',
e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
self.assertIn(
'INFO: Wait for LSN 0/3000060 in archived WAL segment',
'INFO: Wait for LSN 0/4000060 in archived WAL segment',
e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
self.assertIn(
'ERROR: WAL segment 000000010000000000000003 could not be archived in 30 seconds',
'ERROR: WAL segment 000000010000000000000004 could not be archived in 30 seconds',
e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
@ -1016,7 +1001,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
@unittest.skip("skip")
def test_replica_promote_1(self):
"""
"""
@ -1037,7 +1022,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
# set replica True, so archive_mode 'always' is used.
# set replica True, so archive_mode 'always' is used.
self.set_archiving(backup_dir, 'master', master, replica=True)
master.slow_start()
@ -1091,6 +1076,528 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_promote_2(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
# set replica True, so archive_mode 'always' is used.
self.set_archiving(
backup_dir, 'master', master, replica=True)
master.slow_start()
self.backup_node(backup_dir, 'master', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.set_replica(master, replica)
self.set_auto_conf(replica, {'port': replica.port})
replica.slow_start(replica=True)
master.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,1) i')
self.wait_until_replica_catch_with_master(master, replica)
replica.promote()
replica.safe_psql(
'postgres',
'CHECKPOINT')
# replica.safe_psql(
# 'postgres',
# 'create table t2()')
#
# replica.safe_psql(
# 'postgres',
# 'CHECKPOINT')
self.backup_node(
backup_dir, 'master', replica, data_dir=replica.data_dir,
backup_type='page')
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_promote_3(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
master.slow_start()
self.backup_node(backup_dir, 'master', master, options=['--stream'])
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.set_replica(master, replica)
self.set_auto_conf(replica, {'port': replica.port})
replica.slow_start(replica=True)
master.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(master, replica)
self.add_instance(backup_dir, 'replica', replica)
replica.safe_psql(
'postgres',
'CHECKPOINT')
full_id = self.backup_node(
backup_dir, 'replica',
replica, options=['--stream'])
master.safe_psql(
'postgres',
'CREATE TABLE t2 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(master, replica)
replica.safe_psql(
'postgres',
'CHECKPOINT')
self.backup_node(
backup_dir, 'replica', replica,
backup_type='delta', options=['--stream'])
replica.promote()
replica.safe_psql(
'postgres',
'CHECKPOINT')
# failing, because without archving, it is impossible to
# take multi-timeline backup.
try:
self.backup_node(
backup_dir, 'replica', replica,
backup_type='delta', options=['--stream'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of timeline switch "
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'WARNING: Cannot find valid backup on previous timelines, '
'WAL archive is not available' in e.message and
'ERROR: Create new full backup before an incremental one' in e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_promote_archive_delta(self):
"""
t3 /---D3-->
t2 /------->
t1 --F---D1--D2--
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node1 = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node1'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '30s',
'archive_timeout': '30s',
'autovacuum': 'off'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node1)
self.set_config(
backup_dir, 'node', options=['--archive-timeout=60s'])
self.set_archiving(backup_dir, 'node', node1)
node1.slow_start()
self.backup_node(backup_dir, 'node', node1, options=['--stream'])
# Create replica
node2 = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node2'))
node2.cleanup()
self.restore_node(backup_dir, 'node', node2, node2.data_dir)
# Settings for Replica
self.set_replica(node1, node2)
self.set_auto_conf(node2, {'port': node2.port})
self.set_archiving(backup_dir, 'node', node2, replica=True)
node2.slow_start(replica=True)
node1.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(node1, node2)
node1.safe_psql(
'postgres',
'CREATE TABLE t2 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(node1, node2)
# delta backup on replica on timeline 1
delta1_id = self.backup_node(
backup_dir, 'node', node2, node2.data_dir,
'delta', options=['--stream'])
# delta backup on replica on timeline 1
delta2_id = self.backup_node(
backup_dir, 'node', node2, node2.data_dir, 'delta')
self.change_backup_status(
backup_dir, 'node', delta2_id, 'ERROR')
# node2 is now master
node2.promote()
node2.safe_psql('postgres', 'CHECKPOINT')
node2.safe_psql(
'postgres',
'CREATE TABLE t3 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
# node1 is now replica
node1.cleanup()
# kludge "backup_id=delta1_id"
self.restore_node(
backup_dir, 'node', node1, node1.data_dir,
backup_id=delta1_id,
options=[
'--recovery-target-timeline=2',
'--recovery-target=latest'])
# Settings for Replica
self.set_replica(node2, node1)
self.set_auto_conf(node1, {'port': node1.port})
self.set_archiving(backup_dir, 'node', node1, replica=True)
node1.slow_start(replica=True)
node2.safe_psql(
'postgres',
'CREATE TABLE t4 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,30) i')
self.wait_until_replica_catch_with_master(node2, node1)
# node1 is back to be a master
node1.promote()
node1.safe_psql('postgres', 'CHECKPOINT')
# delta backup on timeline 3
self.backup_node(
backup_dir, 'node', node1, node1.data_dir, 'delta',
options=['--archive-timeout=60'])
pgdata = self.pgdata_content(node1.data_dir)
node1.cleanup()
self.restore_node(backup_dir, 'node', node1, node1.data_dir)
pgdata_restored = self.pgdata_content(node1.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_promote_archive_page(self):
"""
t3 /---P3-->
t2 /------->
t1 --F---P1--P2--
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node1 = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node1'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '30s',
'archive_timeout': '30s',
'autovacuum': 'off'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node1)
self.set_archiving(backup_dir, 'node', node1)
self.set_config(
backup_dir, 'node', options=['--archive-timeout=60s'])
node1.slow_start()
self.backup_node(backup_dir, 'node', node1, options=['--stream'])
# Create replica
node2 = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node2'))
node2.cleanup()
self.restore_node(backup_dir, 'node', node2, node2.data_dir)
# Settings for Replica
self.set_replica(node1, node2)
self.set_auto_conf(node2, {'port': node2.port})
self.set_archiving(backup_dir, 'node', node2, replica=True)
node2.slow_start(replica=True)
node1.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(node1, node2)
node1.safe_psql(
'postgres',
'CREATE TABLE t2 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(node1, node2)
# page backup on replica on timeline 1
page1_id = self.backup_node(
backup_dir, 'node', node2, node2.data_dir,
'page', options=['--stream'])
# page backup on replica on timeline 1
page2_id = self.backup_node(
backup_dir, 'node', node2, node2.data_dir, 'page')
self.change_backup_status(
backup_dir, 'node', page2_id, 'ERROR')
# node2 is now master
node2.promote()
node2.safe_psql('postgres', 'CHECKPOINT')
node2.safe_psql(
'postgres',
'CREATE TABLE t3 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
# node1 is now replica
node1.cleanup()
# kludge "backup_id=page1_id"
self.restore_node(
backup_dir, 'node', node1, node1.data_dir,
backup_id=page1_id,
options=[
'--recovery-target-timeline=2',
'--recovery-target=latest'])
# Settings for Replica
self.set_replica(node2, node1)
self.set_auto_conf(node1, {'port': node1.port})
self.set_archiving(backup_dir, 'node', node1, replica=True)
node1.slow_start(replica=True)
node2.safe_psql(
'postgres',
'CREATE TABLE t4 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,30) i')
self.wait_until_replica_catch_with_master(node2, node1)
# node1 is back to be a master
node1.promote()
node1.safe_psql('postgres', 'CHECKPOINT')
# delta3_id = self.backup_node(
# backup_dir, 'node', node2, node2.data_dir, 'delta')
# page backup on timeline 3
page3_id = self.backup_node(
backup_dir, 'node', node1, node1.data_dir, 'page',
options=['--archive-timeout=60'])
pgdata = self.pgdata_content(node1.data_dir)
node1.cleanup()
self.restore_node(backup_dir, 'node', node1, node1.data_dir)
pgdata_restored = self.pgdata_content(node1.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_parent_choosing(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
master.slow_start()
self.backup_node(backup_dir, 'master', master, options=['--stream'])
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.set_replica(master, replica)
self.set_auto_conf(replica, {'port': replica.port})
replica.slow_start(replica=True)
master.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(master, replica)
self.add_instance(backup_dir, 'replica', replica)
full_id = self.backup_node(
backup_dir, 'replica',
replica, options=['--stream'])
master.safe_psql(
'postgres',
'CREATE TABLE t2 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(master, replica)
self.backup_node(
backup_dir, 'replica', replica,
backup_type='delta', options=['--stream'])
replica.promote()
replica.safe_psql('postgres', 'CHECKPOINT')
# failing, because without archving, it is impossible to
# take multi-timeline backup.
try:
self.backup_node(
backup_dir, 'replica', replica,
backup_type='delta', options=['--stream'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of timeline switch "
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'WARNING: Cannot find valid backup on previous timelines, '
'WAL archive is not available' in e.message and
'ERROR: Create new full backup before an incremental one' in e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_instance_from_the_past(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
full_id = self.backup_node(backup_dir, 'node', node, options=['--stream'])
node.pgbench_init(scale=10)
self.backup_node(backup_dir, 'node', node, options=['--stream'])
node.cleanup()
self.restore_node(backup_dir, 'node', node, backup_id=full_id)
node.slow_start()
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='delta', options=['--stream'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because instance is from the past "
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'ERROR: Current START LSN' in e.message and
'is lower than START LSN' in e.message and
'It may indicate that we are trying to backup '
'PostgreSQL instance from the past' in e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# TODO:
# null offset STOP LSN and latest record in previous segment is conrecord (manual only)

View File

@ -1915,8 +1915,7 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
node.slow_start()
# Take FULL
self.backup_node(
backup_dir, 'node', node)
self.backup_node(backup_dir, 'node', node)
if self.get_version(node) >= self.version_to_num('12.0'):
recovery_conf = os.path.join(node.data_dir, 'probackup_recovery.conf')
@ -1925,27 +1924,28 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
# restore
node.cleanup()
self.restore_node(
backup_dir, 'node', node)
self.restore_node(backup_dir, 'node', node)
# with open(recovery_conf, 'r') as f:
# print(f.read())
# hash_1 = hashlib.md5(
# open(recovery_conf, 'rb').read()).hexdigest()
hash_1 = hashlib.md5(
open(recovery_conf, 'rb').read()).hexdigest()
with open(recovery_conf, 'r') as f:
content_1 = f.read()
# restore
node.cleanup()
self.restore_node(
backup_dir, 'node', node, options=['--recovery-target=latest'])
# with open(recovery_conf, 'r') as f:
# print(f.read())
self.restore_node(backup_dir, 'node', node, options=['--recovery-target=latest'])
hash_2 = hashlib.md5(
open(recovery_conf, 'rb').read()).hexdigest()
# hash_2 = hashlib.md5(
# open(recovery_conf, 'rb').read()).hexdigest()
self.assertEqual(hash_1, hash_2)
with open(recovery_conf, 'r') as f:
content_2 = f.read()
self.assertEqual(content_1, content_2)
# self.assertEqual(hash_1, hash_2)
# Clean after yourself
self.del_test_dir(module_name, fname)
@ -2231,55 +2231,6 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_pg_10_waldir(self):
"""
test group access for PG >= 11
"""
if self.pg_config_version < self.version_to_num('10.0'):
return unittest.skip('You need PostgreSQL >= 10 for this test')
fname = self.id().split('.')[3]
wal_dir = os.path.join(
os.path.join(self.tmp_path, module_name, fname), 'wal_dir')
shutil.rmtree(wal_dir, ignore_errors=True)
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=[
'--data-checksums',
'--waldir={0}'.format(wal_dir)])
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
# take FULL backup
self.backup_node(
backup_dir, 'node', node, options=['--stream'])
pgdata = self.pgdata_content(node.data_dir)
# restore backup
node_restored = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node_restored'))
node_restored.cleanup()
self.restore_node(
backup_dir, 'node', node_restored)
# compare pgdata permissions
pgdata_restored = self.pgdata_content(node_restored.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
self.assertTrue(
os.path.islink(os.path.join(node_restored.data_dir, 'pg_wal')),
'pg_wal should be symlink')
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_restore_concurrent_drop_table(self):
""""""
@ -3427,3 +3378,48 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_restore_primary_slot_info(self):
"""
"""
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
# Take FULL
self.backup_node(backup_dir, 'node', node, options=['--stream'])
node.pgbench_init(scale=1)
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
node.safe_psql(
"SELECT pg_create_physical_replication_slot('master_slot')")
self.restore_node(
backup_dir, 'node', replica,
options=['-R', '--primary-slot-name=master_slot'])
self.set_auto_conf(replica, {'port': replica.port})
self.set_auto_conf(replica, {'hot_standby': 'on'})
if self.get_version(node) >= self.version_to_num('12.0'):
standby_signal = os.path.join(replica.data_dir, 'standby.signal')
self.assertTrue(
os.path.isfile(standby_signal),
"File '{0}' do not exists".format(standby_signal))
replica.slow_start(replica=True)
# Clean after yourself
self.del_test_dir(module_name, fname)

View File

@ -1712,10 +1712,9 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
"without valid full backup.\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
"ERROR: Valid backup on current timeline 1 is not found. "
"Create new FULL backup before an incremental one.",
e.message,
self.assertTrue(
"WARNING: Valid backup on current timeline 1 is not found" in e.message and
"ERROR: Create new full backup before an incremental one" in e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
@ -2675,7 +2674,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
self.assertIn(
'LOG: Archive backup {0} to stay consistent protect from '
'purge WAL interval between 000000010000000000000004 '
'and 000000010000000000000004 on timeline 1'.format(B1), output)
'and 000000010000000000000005 on timeline 1'.format(B1), output)
start_lsn_B4 = self.show_pb(backup_dir, 'node', B4)['start-lsn']
self.assertIn(
@ -2684,13 +2683,13 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
self.assertIn(
'LOG: Timeline 3 to stay reachable from timeline 1 protect '
'from purge WAL interval between 000000020000000000000005 and '
'000000020000000000000008 on timeline 2', output)
'from purge WAL interval between 000000020000000000000006 and '
'000000020000000000000009 on timeline 2', output)
self.assertIn(
'LOG: Timeline 3 to stay reachable from timeline 1 protect '
'from purge WAL interval between 000000010000000000000004 and '
'000000010000000000000005 on timeline 1', output)
'000000010000000000000006 on timeline 1', output)
show_tli1_before = self.show_archive(backup_dir, 'node', tli=1)
show_tli2_before = self.show_archive(backup_dir, 'node', tli=2)
@ -2745,19 +2744,19 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
self.assertEqual(
show_tli1_after['lost-segments'][0]['begin-segno'],
'000000010000000000000006')
'000000010000000000000007')
self.assertEqual(
show_tli1_after['lost-segments'][0]['end-segno'],
'000000010000000000000009')
'00000001000000000000000A')
self.assertEqual(
show_tli2_after['lost-segments'][0]['begin-segno'],
'000000020000000000000009')
'00000002000000000000000A')
self.assertEqual(
show_tli2_after['lost-segments'][0]['end-segno'],
'000000020000000000000009')
'00000002000000000000000A')
self.validate_pb(backup_dir, 'node')

View File

@ -13,36 +13,6 @@ module_name = 'validate'
class ValidateTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
# @unittest.expectedFailure
def test_validate_all_empty_catalog(self):
"""
"""
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
initdb_params=['--data-checksums'])
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
try:
self.validate_pb(backup_dir)
self.assertEqual(
1, 0,
"Expecting Error because backup_dir is empty.\n "
"Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: This backup catalog contains no backup instances',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
# @unittest.expectedFailure
def test_basic_validate_nullified_heap_page_backup(self):
@ -1816,7 +1786,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
self.assertTrue(
'LOG: archive command failed with exit code 1' in log_content and
'DETAIL: The failed archive command was:' in log_content and
'INFO: pg_probackup archive-push from' in log_content,
'WAL file already exists in archive with different checksum' in log_content,
'Expecting error messages about failed archive_command'
)
self.assertFalse(
@ -3538,130 +3508,6 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.expectedFailure
# @unittest.skip("skip")
def test_recovery_target_time_backup_victim(self):
"""
Check that for validation to recovery target
probackup chooses valid backup
https://github.com/postgrespro/pg_probackup/issues/104
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
# FULL backup
self.backup_node(backup_dir, 'node', node)
node.safe_psql(
"postgres",
"create table t_heap as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,10000) i")
target_time = node.safe_psql(
"postgres",
"select now()").rstrip()
node.safe_psql(
"postgres",
"create table t_heap1 as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,100) i")
gdb = self.backup_node(backup_dir, 'node', node, gdb=True)
gdb.set_breakpoint('pg_stop_backup')
gdb.run_until_break()
gdb.remove_all_breakpoints()
gdb._execute('signal SIGINT')
gdb.continue_execution_until_error()
backup_id = self.show_pb(backup_dir, 'node')[1]['id']
self.assertEqual(
'ERROR',
self.show_pb(backup_dir, 'node', backup_id)['status'],
'Backup STATUS should be "ERROR"')
self.validate_pb(
backup_dir, 'node',
options=['--recovery-target-time={0}'.format(target_time)])
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.expectedFailure
# @unittest.skip("skip")
def test_recovery_target_lsn_backup_victim(self):
"""
Check that for validation to recovery target
probackup chooses valid backup
https://github.com/postgrespro/pg_probackup/issues/104
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
# FULL backup
self.backup_node(backup_dir, 'node', node)
node.safe_psql(
"postgres",
"create table t_heap as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,10000) i")
node.safe_psql(
"postgres",
"create table t_heap1 as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,100) i")
gdb = self.backup_node(
backup_dir, 'node', node,
options=['--log-level-console=LOG'], gdb=True)
gdb.set_breakpoint('pg_stop_backup')
gdb.run_until_break()
gdb.remove_all_breakpoints()
gdb._execute('signal SIGINT')
gdb.continue_execution_until_error()
backup_id = self.show_pb(backup_dir, 'node')[1]['id']
self.assertEqual(
'ERROR',
self.show_pb(backup_dir, 'node', backup_id)['status'],
'Backup STATUS should be "ERROR"')
self.switch_wal_segment(node)
target_lsn = self.show_pb(backup_dir, 'node', backup_id)['start-lsn']
self.validate_pb(
backup_dir, 'node',
options=['--recovery-target-lsn={0}'.format(target_lsn)])
# Clean after yourself
self.del_test_dir(module_name, fname)
@unittest.skip("skip")
def test_partial_validate_empty_and_mangled_database_map(self):
"""

24
travis/Dockerfile.in Normal file
View File

@ -0,0 +1,24 @@
FROM ololobus/postgres-dev:stretch
USER root
RUN apt-get update
RUN apt-get -yq install python python-pip python-virtualenv
# Environment
ENV PG_MAJOR=${PG_VERSION} PG_BRANCH=${PG_BRANCH}
ENV LANG=C.UTF-8 PGHOME=/pg/testdir/pgbin
# Make directories
RUN mkdir -p /pg/testdir
COPY run_tests.sh /run.sh
RUN chmod 755 /run.sh
COPY . /pg/testdir
WORKDIR /pg/testdir
# Grant privileges
RUN chown -R postgres:postgres /pg/testdir
USER postgres
ENTRYPOINT MODE=${MODE} /run.sh

View File

@ -0,0 +1,2 @@
tests:
build: .

25
travis/make_dockerfile.sh Executable file
View File

@ -0,0 +1,25 @@
#!/usr/bin/env sh
if [ -z ${PG_VERSION+x} ]; then
echo PG_VERSION is not set!
exit 1
fi
if [ -z ${PG_BRANCH+x} ]; then
echo PG_BRANCH is not set!
exit 1
fi
if [ -z ${MODE+x} ]; then
MODE=basic
fi
echo PG_VERSION=${PG_VERSION}
echo PG_BRANCH=${PG_BRANCH}
echo MODE=${MODE}
sed \
-e 's/${PG_VERSION}/'${PG_VERSION}/g \
-e 's/${PG_BRANCH}/'${PG_BRANCH}/g \
-e 's/${MODE}/'${MODE}/g \
Dockerfile.in > Dockerfile

80
travis/run_tests.sh Executable file
View File

@ -0,0 +1,80 @@
#!/usr/bin/env bash
#
# Copyright (c) 2019-2020, Postgres Professional
#
PG_SRC=$PWD/postgres
# # Here PG_VERSION is provided by postgres:X-alpine docker image
# curl "https://ftp.postgresql.org/pub/source/v$PG_VERSION/postgresql-$PG_VERSION.tar.bz2" -o postgresql.tar.bz2
# echo "$PG_SHA256 *postgresql.tar.bz2" | sha256sum -c -
# mkdir $PG_SRC
# tar \
# --extract \
# --file postgresql.tar.bz2 \
# --directory $PG_SRC \
# --strip-components 1
# Clone Postgres
echo "############### Getting Postgres sources:"
git clone https://github.com/postgres/postgres.git -b $PG_BRANCH --depth=1
# Compile and install Postgres
echo "############### Compiling Postgres:"
cd postgres # Go to postgres dir
./configure --prefix=$PGHOME --enable-debug --enable-cassert --enable-depend --enable-tap-tests
make -s -j$(nproc) install
make -s -j$(nproc) -C contrib/ install
# Override default Postgres instance
export PATH=$PGHOME/bin:$PATH
export LD_LIBRARY_PATH=$PGHOME/lib
export PG_CONFIG=$(which pg_config)
# Get amcheck if missing
if [ ! -d "contrib/amcheck" ]; then
echo "############### Getting missing amcheck:"
git clone https://github.com/petergeoghegan/amcheck.git --depth=1 contrib/amcheck
make USE_PGXS=1 -C contrib/amcheck install
fi
# Get back to testdir
cd ..
# Show pg_config path (just in case)
echo "############### pg_config path:"
which pg_config
# Show pg_config just in case
echo "############### pg_config:"
pg_config
# Build and install pg_probackup (using PG_CPPFLAGS and SHLIB_LINK for gcov)
echo "############### Compiling and installing pg_probackup:"
# make USE_PGXS=1 PG_CPPFLAGS="-coverage" SHLIB_LINK="-coverage" top_srcdir=$CUSTOM_PG_SRC install
make USE_PGXS=1 top_srcdir=$PG_SRC install
# Setup python environment
echo "############### Setting up python env:"
virtualenv pyenv
source pyenv/bin/activate
pip install testgres==1.8.2
echo "############### Testing:"
if [ "$MODE" = "basic" ]; then
export PG_PROBACKUP_TEST_BASIC=ON
python -m unittest -v tests
python -m unittest -v tests.init
else
python -m unittest -v tests.$MODE
fi
# Generate *.gcov files
# gcov src/*.c src/*.h
# Send coverage stats to Codecov
# bash <(curl -s https://codecov.io/bash)