1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2024-11-28 09:33:54 +02:00

Merge branch 'master' into issue_120

This commit is contained in:
Grigory Smolkin 2020-04-17 14:10:57 +03:00
commit 3429ef149e
32 changed files with 4464 additions and 880 deletions

11
.gitignore vendored
View File

@ -47,3 +47,14 @@
# Doc files
/doc/*html
# Docker files
/docker-compose.yml
/Dockerfile
/Dockerfile.in
/run_tests.sh
/make_dockerfile.sh
/backup_restore.sh
# Misc
.python-version

View File

@ -1,7 +1,47 @@
sudo: required
os: linux
dist: bionic
language: c
services:
- docker
- docker
before_install:
- cp travis/* .
install:
- ./make_dockerfile.sh
- docker-compose build
script:
- docker run -v $(pwd):/tests --rm centos:7 /tests/travis/backup_restore.sh
- docker-compose run tests
# - docker-compose run $(bash <(curl -s https://codecov.io/env)) tests
# - docker run -v $(pwd):/tests --rm centos:7 /tests/travis/backup_restore.sh
notifications:
email:
on_success: change
on_failure: always
# Default MODE is basic, i.e. all tests with PG_PROBACKUP_TEST_BASIC=ON
env:
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=archive
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=backup
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=compression
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=delta
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=locking
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=merge
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=page
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=replica
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=retention
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=restore
- PG_VERSION=11 PG_BRANCH=REL_11_STABLE
- PG_VERSION=10 PG_BRANCH=REL_10_STABLE
- PG_VERSION=9.6 PG_BRANCH=REL9_6_STABLE
- PG_VERSION=9.5 PG_BRANCH=REL9_5_STABLE
jobs:
allow_failures:
- if: env(MODE) IN (archive, backup, delta, locking, merge, replica, retention, restore)

View File

@ -15,9 +15,9 @@ OBJS += src/pg_crc.o src/datapagemap.o src/receivelog.o src/streamutil.o \
EXTRA_CLEAN = src/pg_crc.c src/datapagemap.c src/datapagemap.h \
src/receivelog.c src/receivelog.h src/streamutil.c src/streamutil.h \
src/xlogreader.c
src/xlogreader.c src/instr_time.h
INCLUDES = src/datapagemap.h src/streamutil.h src/receivelog.h
INCLUDES = src/datapagemap.h src/streamutil.h src/receivelog.h src/instr_time.h
ifdef USE_PGXS
PG_CONFIG = pg_config
@ -60,6 +60,8 @@ all: checksrcdir $(INCLUDES);
$(PROGRAM): $(OBJS)
src/instr_time.h: $(top_srcdir)/src/include/portability/instr_time.h
rm -f $@ && $(LN_S) $(srchome)/src/include/portability/instr_time.h $@
src/datapagemap.c: $(top_srcdir)/src/bin/pg_rewind/datapagemap.c
rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_rewind/datapagemap.c $@
src/datapagemap.h: $(top_srcdir)/src/bin/pg_rewind/datapagemap.h

View File

@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.com/postgrespro/pg_probackup.svg?branch=master)](https://travis-ci.com/postgrespro/pg_probackup)
# pg_probackup
`pg_probackup` is a utility to manage backup and recovery of PostgreSQL database clusters. It is designed to perform periodic backups of the PostgreSQL instance that enable you to restore the server in case of a failure.
@ -38,8 +40,9 @@ Regardless of the chosen backup type, all backups taken with `pg_probackup` supp
`PTRACK` backup support provided via following options:
* vanilla PostgreSQL compiled with ptrack patch. Currently there are patches for [PostgreSQL 9.6](https://gist.githubusercontent.com/gsmol/5b615c971dfd461c76ef41a118ff4d97/raw/e471251983f14e980041f43bea7709b8246f4178/ptrack_9.6.6_v1.5.patch) and [PostgreSQL 10](https://gist.githubusercontent.com/gsmol/be8ee2a132b88463821021fd910d960e/raw/de24f9499f4f314a4a3e5fae5ed4edb945964df8/ptrack_10.1_v1.5.patch)
* Postgres Pro Standard 9.6, 10, 11
* Postgres Pro Enterprise 9.6, 10, 11
* vanilla PostgreSQL 12 with [ptrack extension](https://github.com/postgrespro/ptrack)
* Postgres Pro Standard 9.6, 10, 11, 12
* Postgres Pro Enterprise 9.6, 10, 11, 12
## Limitations

View File

@ -131,7 +131,6 @@ doc/src/sgml/pgprobackup.sgml
<arg choice="plain"><option>archive-push</option></arg>
<arg choice="plain"><option>-B</option> <replaceable>backup_dir</replaceable></arg>
<arg choice="plain"><option>--instance</option> <replaceable>instance_name</replaceable></arg>
<arg choice="plain"><option>--wal-file-path</option> <replaceable>wal_file_path</replaceable></arg>
<arg choice="plain"><option>--wal-file-name</option> <replaceable>wal_file_name</replaceable></arg>
<arg rep="repeat"><replaceable>option</replaceable></arg>
</cmdsynopsis>
@ -427,14 +426,6 @@ doc/src/sgml/pgprobackup.sgml
or <application>libc</application>/<application>libicu</application> versions.
</para>
</listitem>
<listitem>
<para>
All backups in the incremental chain must belong to the same
timeline. For example, if you have taken incremental backups on a
standby server that gets promoted, you have to take another FULL
backup.
</para>
</listitem>
</itemizedlist>
</para>
</refsect2>
@ -754,9 +745,10 @@ ALTER ROLE backup WITH REPLICATION;
<title>Setting up Continuous WAL Archiving</title>
<para>
Making backups in PAGE backup mode, performing
<link linkend="pbk-performing-point-in-time-pitr-recovery">PITR</link>
and making backups with
<link linkend="pbk-archive-mode">ARCHIVE</link> WAL delivery mode
<link linkend="pbk-performing-point-in-time-pitr-recovery">PITR</link>,
making backups with
<link linkend="pbk-archive-mode">ARCHIVE</link> WAL delivery mode and
running incremental backup after timeline switch
require
<ulink url="https://postgrespro.com/docs/postgresql/current/continuous-archiving.html">continuous
WAL archiving</ulink> to be enabled. To set up continuous
@ -786,7 +778,7 @@ ALTER ROLE backup WITH REPLICATION;
parameter, as follows:
</para>
<programlisting>
archive_command = '<replaceable>install_dir</replaceable>/pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --wal-file-path=%p --wal-file-name=%f [<replaceable>remote_options</replaceable>]'
archive_command = '<replaceable>install_dir</replaceable>/pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --wal-file-name=%f [<replaceable>remote_options</replaceable>]'
</programlisting>
</listitem>
</itemizedlist>
@ -1483,7 +1475,7 @@ pg_probackup checkdb [-B <replaceable>backup_dir</replaceable> [--instance <repl
enough to specify the backup instance of this cluster for
<application>pg_probackup</application> to determine the required
connection options. However, if <literal>-B</literal> and
<literal>--instance</literal> options are ommitted, you have to provide
<literal>--instance</literal> options are omitted, you have to provide
<link linkend="pbk-connection-opts">connection options</link> and
<replaceable>data_dir</replaceable> via environment
variables or command-line options.
@ -2247,7 +2239,7 @@ BACKUP INSTANCE 'node'
<para>
<literal>MERGED</literal> — the backup data files were
successfully merged, but its metadata is in the process
of been updated. Only full backup can have this status.
of being updated. Only full backups can have this status.
</para>
</listitem>
<listitem>
@ -2372,7 +2364,8 @@ primary_conninfo = 'user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmod
<listitem>
<para>
<literal>expire-time</literal> — the point in time
when a pinned backup can be removed by retention purge.
when a pinned backup can be removed in accordance with retention
policy. This attribute is only available for pinned backups.
</para>
</listitem>
<listitem>
@ -2816,17 +2809,19 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> [--instance <replacea
<refsect2 id="pbk-configuring-retention-policy">
<title>Configuring Retention Policy</title>
<para>
With <application>pg_probackup</application>, you can set retention policies for backups
and WAL archive. All policies can be combined together in any
way.
With <application>pg_probackup</application>, you can configure
retention policy to remove redundant backups, clean up unneeded
WAL files, as well as pin specific backups to ensure they are
kept for the specified time, as explained in the sections below.
All these actions can be combined together in any way.
</para>
<refsect3 id="pbk-retention-policy">
<title>Backup Retention Policy</title>
<title>Removing Redundant Backups</title>
<para>
By default, all backup copies created with <application>pg_probackup</application> are
stored in the specified backup catalog. To save disk space,
you can configure retention policy and periodically clean up
redundant backup copies accordingly.
you can configure retention policy to remove redundant backup copies.
</para>
<para>
To configure retention policy, set one or more of the
@ -2849,56 +2844,51 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> [--instance <replacea
<emphasis role="strong">the number of days</emphasis> from the
current moment. For example, if
<literal>retention-window=7</literal>, <application>pg_probackup</application> must
delete all backup copies that are older than seven days, with
all the corresponding WAL files.
keep at least one backup copy that is older than seven days, with
all the corresponding WAL files, and all the backups that follow.
</para>
<para>
If both <option>--retention-redundancy</option> and
<option>--retention-window</option> options are set,
<application>pg_probackup</application> keeps backup copies that satisfy at least one
condition. For example, if you set
<literal>--retention-redundancy=2</literal> and
<literal>--retention-window=7</literal>, <application>pg_probackup</application> purges
the backup catalog to keep only two full backup copies and all
backups that are newer than seven days:
<option>--retention-window</option> options are set, both these
conditions have to be taken into account when purging the backup
catalog. For example, if you set <literal>--retention-redundancy=2</literal>
and <literal>--retention-window=7</literal>,
<application>pg_probackup</application> has to keep two full backup
copies, as well as all the backups required to ensure recoverability
for the last seven days:
</para>
<programlisting>
pg_probackup set-config -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --retention-redundancy=2 --retention-window=7
</programlisting>
<para>
To clean up the backup catalog in accordance with retention
policy, run:
To clean up the backup catalog in accordance with retention policy,
you have to run the <xref linkend="pbk-delete"/> command with
<link linkend="pbk-retention-opts">retention flags</link>, as shown
below, or use the <xref linkend="pbk-backup"/> command with
these flags to process the outdated backup copies right when the new
backup is created.
</para>
<para>
For example, to remove all backup copies that no longer satisfy the
defined retention policy, run the following command with the
<literal>--delete-expired</literal> flag:
</para>
<programlisting>
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --delete-expired
</programlisting>
<para>
<application>pg_probackup</application> deletes all backup copies that do not conform to
the defined retention policy.
</para>
<para>
If you would like to also remove the WAL files that are no
longer required for any of the backups, add the
longer required for any of the backups, you should also specify the
<option>--delete-wal</option> flag:
</para>
<programlisting>
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --delete-expired --delete-wal
</programlisting>
<note>
<para>
Alternatively, you can use the
<option>--delete-expired</option>,
<option>--merge-expired</option>,
<option>--delete-wal</option> flags and the
<option>--retention-window</option> and
<option>--retention-redundancy</option> options together
with the <xref linkend="pbk-backup"/> command to
remove and merge the outdated backup copies once the new
backup is created.
</para>
</note>
<para>
You can set or override the current retention policy by
You can also set or override the current retention policy by
specifying <option>--retention-redundancy</option> and
<option>--retention-window</option> options directly when
running <command>delete</command> or <command>backup</command>
@ -2919,6 +2909,7 @@ pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replace
<xref linkend="pbk-backup"/> or
<xref linkend="pbk-delete"/> commands.
</para>
<para>
Suppose you have backed up the <replaceable>node</replaceable>
instance in the <replaceable>backup_dir</replaceable> directory,
@ -2971,9 +2962,10 @@ BACKUP INSTANCE 'node'
The <literal>Time</literal> field for the merged backup displays the time
required for the merge.
</para>
</refsect3>
<refsect3 id="pbk-backup-pinning">
<title>Backup Pinning</title>
<title>Pinning Backups</title>
<para>
If you need to keep certain backups longer than the
established retention policy allows, you can pin them
@ -3012,8 +3004,8 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> --instance <replaceab
</programlisting>
</para>
<para>
If the backup is pinned, the <literal>expire-time</literal>
attribute displays its expiration time:
If the backup is pinned, it has the <literal>expire-time</literal>
attribute that displays its expiration time:
<programlisting>
...
recovery-time = '2017-05-16 12:57:31'
@ -3023,34 +3015,65 @@ data-bytes = 22288792
</programlisting>
</para>
<para>
Only pinned backups have the <literal>expire-time</literal>
attribute in the backup metadata.
</para>
<note>
<para>
A pinned incremental backup implicitly pins all
its parent backups.
</para>
</note>
<para>
You can unpin the backup by setting the
<option>--ttl</option> option to zero using the
<xref linkend="pbk-set-backup"/> command. For example:
You can unpin the backup by setting the <option>--ttl</option> option to zero:
</para>
<programlisting>
pg_probackup set-backup -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> -i <replaceable>backup_id</replaceable> --ttl=0
</programlisting>
<note>
<para>
A pinned incremental backup implicitly pins all
its parent backups. If you unpin such a backup later,
its implicitly pinned parents will also be automatically unpinned.
</para>
</note>
</refsect3>
<refsect3 id="pbk-wal-archive-retention-policy">
<title>WAL Archive Retention Policy</title>
<title>Configuring WAL Archive Retention Policy</title>
<para>
By default, <application>pg_probackup</application> purges
only redundant WAL segments that cannot be applied to any of the
backups in the backup catalog. To save disk space,
you can configure WAL archive retention policy, which allows to
keep WAL of limited depth measured in backups per timeline.
When <link linkend="pbk-setting-up-continuous-wal-archiving">continuous
WAL archiving</link> is enabled, archived WAL segments can take a lot
of disk space. Even if you delete old backup copies from time to time,
the <literal>--delete-wal</literal> flag can
purge only those WAL segments that do not apply to any of the
remaining backups in the backup catalog. However, if point-in-time
recovery is critical only for the most recent backups, you can
configure WAL archive retention policy to keep WAL archive of
limited depth and win back some more disk space.
</para>
<para>
To configure WAL archive retention policy, you have to run the
<xref linkend="pbk-set-config"/> command with the
<literal>--wal-depth</literal> option that specifies the number
of backups that can be used for PITR.
This setting applies to all the timelines, so you should be able to perform
PITR for the same number of backups on each timeline, if available.
<link linkend="pbk-backup-pinning">Pinned backups</link> are
not included into this count: if one of the latest backups
is pinned, <application>pg_probackup</application> ensures that
PITR is possible for one extra backup.
</para>
<para>
To remove WAL segments that do not satisfy the defined WAL archive
retention policy, you simply have to run the <xref linkend="pbk-delete"/>
or <xref linkend="pbk-backup"/> command with the <literal>--delete-wal</literal>
flag. For archive backups, WAL segments between <literal>Start LSN</literal>
and <literal>Stop LSN</literal> are always kept intact, so such backups
remain valid regardless of the <literal>--wal-depth</literal> setting
and can still be restored, if required.
</para>
<para>
You can also use the <option>--wal-depth</option> option
with the <xref linkend="pbk-delete"/> and <xref linkend="pbk-backup"/>
commands to override the previously defined WAL archive retention
policy and purge old WAL segments on the fly.
</para>
<para>
Suppose you have backed up the <literal>node</literal>
instance in the <replaceable>backup_dir</replaceable> directory and
@ -3104,8 +3127,8 @@ ARCHIVE INSTANCE 'node'
</programlisting>
<para>
If you would like, for example, to keep only those WAL
segments that can be applied to the last valid backup, use the
<option>--wal-depth</option> option:
segments that can be applied to the latest valid backup, set the
<option>--wal-depth</option> option to 1:
</para>
<programlisting>
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance node --delete-wal --wal-depth=1
@ -3131,12 +3154,6 @@ ARCHIVE INSTANCE 'node'
===============================================================================================================================
1 0 0/0 000000010000000000000048 000000010000000000000049 1 72kB 228.00 7 OK
</programlisting>
<note>
<para>
<link linkend="pbk-backup-pinning">Pinned backups</link> are
ignored for the purpose of WAL Archive Retention Policy fulfilment.
</para>
</note>
</refsect3>
</refsect2>
<refsect2 id="pbk-merging-backups">
@ -3152,16 +3169,16 @@ ARCHIVE INSTANCE 'node'
pg_probackup merge -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> -i <replaceable>backup_id</replaceable>
</programlisting>
<para>
This command merges the specified incremental backup to its
parent full backup, together with all incremental backups
between them. If the specified backup ID belong to the full backup,
then it will be merged with the closest incremental backup.
Once the merge is complete, the incremental
backups are removed as redundant. Thus, the merge operation is
virtually equivalent to retaking a full backup and removing all
the outdated backups, but it allows to save much time,
especially for large data volumes, as well as I/O and network traffic
if you are using <application>pg_probackup</application> in the
This command merges backups that belong to a common incremental backup
chain. If you specify a full backup, it will be merged with its first
incremental backup. If you specify an incremental backup, it will be
merged to its parent full backup, together with all incremental backups
between them. Once the merge is complete, the full backup takes in all
the merged data, and the incremental backups are removed as redundant.
Thus, the merge operation is virtually equivalent to retaking a full
backup and removing all the outdated backups, but it allows to save much
time, especially for large data volumes, as well as I/O and network
traffic if you are using <application>pg_probackup</application> in the
<link linkend="pbk-remote-backup">remote</link> mode.
</para>
<para>
@ -3175,8 +3192,10 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> --instance <replaceab
</programlisting>
<para>
If the merge is still in progress, the backup status is
displayed as <literal>MERGING</literal> or, at the final stage,
<literal>MERGED</literal>. The merge is idempotent, so you can
displayed as <literal>MERGING</literal>. For full backups,
it can also be shown as <literal>MERGED</literal> while the
metadata is being updated at the final stage of the merge.
The merge is idempotent, so you can
restart the merge if it was interrupted.
</para>
</refsect2>
@ -3581,9 +3600,11 @@ pg_probackup backup -B <replaceable>backup_dir</replaceable> -b <replaceable>bac
<listitem>
<para>
Do not sync backed up files to disk. You can use this flag to speed
up backup process. Using this flag can result in data
up the backup process. Using this flag can result in data
corruption in case of operating system or hardware crash.
Corruption can be detected by backup validation.
If you use this option, it is recommended to run the
<xref linkend="pbk-validate"/> command once the backup is complete
to detect possible issues.
</para>
</listitem>
</varlistentry>
@ -3617,7 +3638,7 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
[--force] [--no-sync]
[--restore-command=<replaceable>cmdline</replaceable>]
[--primary-conninfo=<replaceable>primary_conninfo</replaceable>]
[-S | --primary-slot-name=<replaceable>slotname</replaceable>]
[-S | --primary-slot-name=<replaceable>slot_name</replaceable>]
[<replaceable>recovery_target_options</replaceable>] [<replaceable>logging_options</replaceable>] [<replaceable>remote_options</replaceable>]
[<replaceable>partial_restore_options</replaceable>] [<replaceable>remote_wal_archive_options</replaceable>]
</programlisting>
@ -3662,7 +3683,7 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
Sets the
<ulink url="https://postgrespro.com/docs/postgresql/current/runtime-config-replication.html#GUC-PRIMARY-CONNINFO">primary_conninfo</ulink>
parameter to the specified value.
This option will be ignored unless the <option>-R</option> flag if specified.
This option will be ignored unless the <option>-R</option> flag is specified.
</para>
<para>
Example: <literal>--primary-conninfo='host=192.168.1.50 port=5432 user=foo password=foopass'</literal>
@ -3676,9 +3697,9 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
<listitem>
<para>
Sets the
<ulink url="https://postgrespro.com/docs/postgresql/current/runtime-config-replication#GUC-PRIMARY-SLOT-NAME">primary_slot_name</ulink>
<ulink url="https://postgrespro.com/docs/postgresql/current/runtime-config-replication.html#GUC-PRIMARY-SLOT-NAME">primary_slot_name</ulink>
parameter to the specified value.
This option will be ignored unless the <option>-R</option> flag if specified.
This option will be ignored unless the <option>-R</option> flag is specified.
</para>
</listitem>
</varlistentry>
@ -3775,6 +3796,8 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
Do not sync restored files to disk. You can use this flag to speed
up restore process. Using this flag can result in data
corruption in case of operating system or hardware crash.
If it happens, you have to run the <xref linkend="pbk-restore"/>
command again.
</para>
</listitem>
</varlistentry>
@ -3902,12 +3925,12 @@ pg_probackup merge -B <replaceable>backup_dir</replaceable> --instance <replacea
[<replaceable>logging_options</replaceable>]
</programlisting>
<para>
Merges the specified incremental backup to its parent full
backup, together with all incremental backups between them, if
any. If the specified backup ID belong to the full backup,
then it will be merged with the closest incremental backup.
As a result, the full backup takes in all the merged
data, and the incremental backups are removed as redundant.
Merges backups that belong to a common incremental backup
chain. If you specify a full backup, it will be merged with its first
incremental backup. If you specify an incremental backup, it will be
merged to its parent full backup, together with all incremental backups
between them. Once the merge is complete, the full backup takes in all
the merged data, and the incremental backups are removed as redundant.
</para>
<para>
For details, see the section
@ -3941,9 +3964,12 @@ pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replace
<title>archive-push</title>
<programlisting>
pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable>
--wal-file-path=<replaceable>wal_file_path</replaceable> --wal-file-name=<replaceable>wal_file_name</replaceable>
[--help] [--compress] [--compress-algorithm=<replaceable>compression_algorithm</replaceable>]
[--compress-level=<replaceable>compression_level</replaceable>] [--overwrite]
--wal-file-name=<replaceable>wal_file_name</replaceable> [--wal-file-path=<replaceable>wal_file_path</replaceable>]
[--help] [--no-sync] [--compress] [--no-ready-rename] [--overwrite]
[-j <replaceable>num_threads</replaceable>] [--batch-size=<replaceable>batch_size</replaceable>]
[--archive-timeout=<replaceable>timeout</replaceable>]
[--compress-algorithm=<replaceable>compression_algorithm</replaceable>]
[--compress-level=<replaceable>compression_level</replaceable>]
[<replaceable>remote_options</replaceable>] [<replaceable>logging_options</replaceable>]
</programlisting>
<para>
@ -3954,12 +3980,10 @@ pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <r
backup instance and the cluster do not match, this command
fails with the following error message: <literal>Refuse to push WAL
segment segment_name into archive. Instance parameters
mismatch.</literal> For each WAL file moved to the backup catalog, you
will see the following message in the <productname>PostgreSQL</productname> log file:
<literal>pg_probackup archive-push completed successfully</literal>.
mismatch.</literal>
</para>
<para>
If the files to be copied already exist in the backup catalog,
If the files to be copied already exists in the backup catalog,
<application>pg_probackup</application> computes and compares their checksums. If the
checksums match, <command>archive-push</command> skips the corresponding file and
returns a successful execution code. Otherwise, <command>archive-push</command>
@ -3968,13 +3992,25 @@ pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <r
with the <option>--overwrite</option> flag.
</para>
<para>
The files are copied to a temporary file with the
<literal>.part</literal> suffix. After the copy is
done, atomic rename is performed. This algorithm ensures that a
failed <command>archive-push</command> will not stall continuous archiving and
that concurrent archiving from multiple sources into a single
WAL archive have no risk of archive corruption. WAL segments copied to
the archive are synced to disk.
Each file is copied to a temporary file with the
<literal>.part</literal> suffix. If the temporary file already
exists, <application>pg_probackup</application> will wait
<option>archive_timeout</option> seconds before discarding it.
After the copy is done, atomic rename is performed.
This algorithm ensures that a failed <command>archive-push</command>
will not stall continuous archiving and that concurrent archiving from
multiple sources into a single WAL archive has no risk of archive
corruption.
</para>
<para>
To speed up archiving, you can specify the <option>-j</option> option
to run <command>archive-push</command> on multiple threads.
If you provide the <option>--batch-size</option> option, WAL files
will be copied in batches of the specified size.
</para>
<para>
WAL segments copied to the archive are synced to disk unless
the <option>--no-sync</option> flag is used.
</para>
<para>
You can use <command>archive-push</command> in the
@ -3994,6 +4030,8 @@ pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <r
<title>archive-get</title>
<programlisting>
pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --wal-file-path=<replaceable>wal_file_path</replaceable> --wal-file-name=<replaceable>wal_file_name</replaceable>
[-j <replaceable>num_threads</replaceable>] [--batch-size=<replaceable>batch_size</replaceable>]
[--prefetch-dir=<replaceable>prefetch_dir_path</replaceable>] [--no-validate-wal]
[--help] [<replaceable>remote_options</replaceable>] [<replaceable>logging_options</replaceable>]
</programlisting>
<para>
@ -4004,6 +4042,17 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
restoring backups using a WAL archive. You do not need to set
it manually.
</para>
<para>
To speed up recovery, you can specify the <option>-j</option> option
to run <command>archive-get</command> on multiple threads.
If you provide the <option>--batch-size</option> option, WAL segments
will be copied in batches of the specified size.
</para>
<para>
For details, see section <link linkend="pbk-archiving-options">Archiving Options</link>.
</para>
</refsect3>
</refsect2>
<refsect2 id="pbk-options">
@ -4080,7 +4129,8 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
<para>
Sets the number of parallel threads for <command>backup</command>,
<command>restore</command>, <command>merge</command>,
<command>validate</command>, and <command>checkdb</command> processes.
<command>validate</command>, <command>checkdb</command>, and
<command>archive-push</command> processes.
</para>
</listitem>
</varlistentry>
@ -4130,7 +4180,7 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
The <literal>immediate</literal> value stops the recovery
after reaching the consistent state of the specified
backup, or the latest available backup if the
<option>-i</option>/<option>--backup_id</option> option is omitted.
<option>-i</option>/<option>--backup-id</option> option is omitted.
This is the default behavior for STREAM backups.
</para>
</listitem>
@ -4739,6 +4789,78 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--batch-size=<replaceable>batch_size</replaceable></option></term>
<listitem>
<para>
Sets the maximum number of files that can be copied into the archive
by a single <command>archive-push</command> process, or from
the archive by a single <command>archive-get</command> process.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--archive-timeout=<replaceable>wait_time</replaceable></option></term>
<listitem>
<para>
Sets the timeout for considering existing <literal>.part</literal>
files to be stale. By default, <application>pg_probackup</application>
waits 300 seconds.
This option can be used only with <xref linkend="pbk-archive-push"/> command.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--no-ready-rename</option></term>
<listitem>
<para>
Do not rename status files in the <literal>archive_status</literal> directory.
This option should be used only if <parameter>archive_command</parameter>
contains multiple commands.
This option can be used only with <xref linkend="pbk-archive-push"/> command.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--no-sync</option></term>
<listitem>
<para>
Do not sync copied WAL files to disk. You can use this flag to speed
up archiving process. Using this flag can result in WAL archive
corruption in case of operating system or hardware crash.
This option can be used only with <xref linkend="pbk-archive-push"/> command.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--prefetch-dir=<replaceable>path</replaceable></option></term>
<listitem>
<para>
Directory used to store prefetched WAL segments if <option>--batch-size</option> option is used.
Directory must be located on the same filesystem and on the same mountpoint the
<literal>PGDATA/pg_wal</literal> is located.
By default files are stored in <literal>PGDATA/pg_wal/pbk_prefetch</literal> directory.
This option can be used only with <xref linkend="pbk-archive-get"/> command.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--no-validate-wal</option></term>
<listitem>
<para>
Do not validate prefetched WAL file before using it.
Use this option if you want to increase the speed of recovery.
This option can be used only with <xref linkend="pbk-archive-get"/> command.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
</refsect3>
@ -5176,7 +5298,7 @@ INFO: Backup PZ7YK2 completed
<step id="pbk-lets-take-a-look-at-the-backup-catalog">
<title>Let's take a look at the backup catalog:</title>
<programlisting>
[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11'
[backupman@backup_host] pg_probackup-11 show -B /mnt/backups --instance 'pg-11'
BACKUP INSTANCE 'pg-11'
==================================================================================================================================
@ -5267,7 +5389,7 @@ remote-host = postgres_host
<step id="pbk-lets-take-a-look-at-the-backup-catalog-1">
<title>Let's take a look at the backup catalog:</title>
<programlisting>
[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11'
[backupman@backup_host] pg_probackup-11 show -B /mnt/backups --instance 'pg-11'
====================================================================================================================================
Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status

File diff suppressed because it is too large Load Diff

View File

@ -153,6 +153,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
PGconn *master_conn = NULL;
PGconn *pg_startbackup_conn = NULL;
/* used for multitimeline incremental backup */
parray *tli_list = NULL;
/* for fancy reporting */
time_t start_time, end_time;
char pretty_time[20];
@ -181,17 +185,43 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
current.backup_mode == BACKUP_MODE_DIFF_PTRACK ||
current.backup_mode == BACKUP_MODE_DIFF_DELTA)
{
char prev_backup_filelist_path[MAXPGPATH];
/* get list of backups already taken */
backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID);
prev_backup = catalog_get_last_data_backup(backup_list, current.tli, current.start_time);
if (prev_backup == NULL)
elog(ERROR, "Valid backup on current timeline %X is not found. "
"Create new FULL backup before an incremental one.",
{
/* try to setup multi-timeline backup chain */
elog(WARNING, "Valid backup on current timeline %u is not found, "
"try to look up on previous timelines",
current.tli);
tli_list = catalog_get_timelines(&instance_config);
if (parray_num(tli_list) == 0)
elog(WARNING, "Cannot find valid backup on previous timelines, "
"WAL archive is not available");
else
{
prev_backup = get_multi_timeline_parent(backup_list, tli_list, current.tli,
current.start_time, &instance_config);
if (prev_backup == NULL)
elog(WARNING, "Cannot find valid backup on previous timelines");
}
/* failed to find suitable parent, error out */
if (!prev_backup)
elog(ERROR, "Create new full backup before an incremental one");
}
}
if (prev_backup)
{
char prev_backup_filelist_path[MAXPGPATH];
elog(INFO, "Parent backup: %s", base36enc(prev_backup->start_time));
join_path_components(prev_backup_filelist_path, prev_backup->root_dir,
DATABASE_FILE_LIST);
/* Files of previous backup needed by DELTA backup */
@ -378,8 +408,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE ||
current.backup_mode == BACKUP_MODE_DIFF_PTRACK)
{
elog(INFO, "Compiling pagemap of changed blocks");
bool pagemap_isok = true;
time(&start_time);
elog(INFO, "Extracting pagemap of changed blocks");
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
{
@ -388,8 +420,9 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
* reading WAL segments present in archives up to the point
* where this backup has started.
*/
extractPageMap(arclog_path, current.tli, instance_config.xlog_seg_size,
prev_backup->start_lsn, current.start_lsn);
pagemap_isok = extractPageMap(arclog_path, instance_config.xlog_seg_size,
prev_backup->start_lsn, prev_backup->tli,
current.start_lsn, current.tli, tli_list);
}
else if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK)
{
@ -407,8 +440,14 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
}
time(&end_time);
elog(INFO, "Pagemap compiled, time elapsed %.0f sec",
difftime(end_time, start_time));
/* TODO: add ms precision */
if (pagemap_isok)
elog(INFO, "Pagemap successfully extracted, time elapsed: %.0f sec",
difftime(end_time, start_time));
else
elog(ERROR, "Pagemap extraction failed, time elasped: %.0f sec",
difftime(end_time, start_time));
}
/*
@ -667,6 +706,15 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
elog(INFO, "Backup files are synced, time elapsed: %s", pretty_time);
}
/* be paranoid about instance been from the past */
if (current.backup_mode != BACKUP_MODE_FULL &&
current.stop_lsn < prev_backup->stop_lsn)
elog(ERROR, "Current backup STOP LSN %X/%X is lower than STOP LSN %X/%X of previous backup %s. "
"It may indicate that we are trying to backup PostgreSQL instance from the past.",
(uint32) (current.stop_lsn >> 32), (uint32) (current.stop_lsn),
(uint32) (prev_backup->stop_lsn >> 32), (uint32) (prev_backup->stop_lsn),
base36enc(prev_backup->stop_lsn));
/* clean external directories list */
if (external_dirs)
free_dir_list(external_dirs);
@ -678,6 +726,12 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
parray_free(backup_list);
}
if (tli_list)
{
parray_walk(tli_list, timelineInfoFree);
parray_free(tli_list);
}
parray_walk(backup_files_list, pgFileFree);
parray_free(backup_files_list);
backup_files_list = NULL;

View File

@ -42,6 +42,24 @@ timelineInfoNew(TimeLineID tli)
return tlinfo;
}
/* free timelineInfo object */
void
timelineInfoFree(void *tliInfo)
{
timelineInfo *tli = (timelineInfo *) tliInfo;
parray_walk(tli->xlog_filelist, pgFileFree);
parray_free(tli->xlog_filelist);
if (tli->backups)
{
parray_walk(tli->backups, pgBackupFree);
parray_free(tli->backups);
}
pfree(tliInfo);
}
/* Iterate over locked backups and delete locks files */
static void
unlink_lock_atexit(void)
@ -597,7 +615,7 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
switch (scan_parent_chain(backup, &tmp_backup))
{
/* broken chain */
case 0:
case ChainIsBroken:
invalid_backup_id = base36enc_dup(tmp_backup->parent_backup);
elog(WARNING, "Backup %s has missing parent: %s. Cannot be a parent",
@ -606,7 +624,7 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
continue;
/* chain is intact, but at least one parent is invalid */
case 1:
case ChainIsInvalid:
invalid_backup_id = base36enc_dup(tmp_backup->start_time);
elog(WARNING, "Backup %s has invalid parent: %s. Cannot be a parent",
@ -615,17 +633,13 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
continue;
/* chain is ok */
case 2:
case ChainIsOk:
/* Yes, we could call is_parent() earlier - after choosing the ancestor,
* but this way we have an opportunity to detect and report all possible
* anomalies.
*/
if (is_parent(full_backup->start_time, backup, true))
{
elog(INFO, "Parent backup: %s",
base36enc(backup->start_time));
return backup;
}
}
}
/* skip yourself */
@ -641,6 +655,150 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
return NULL;
}
/*
* For multi-timeline chain, look up suitable parent for incremental backup.
* Multi-timeline chain has full backup and one or more descendants located
* on different timelines.
*/
pgBackup *
get_multi_timeline_parent(parray *backup_list, parray *tli_list,
TimeLineID current_tli, time_t current_start_time,
InstanceConfig *instance)
{
int i;
timelineInfo *my_tlinfo = NULL;
timelineInfo *tmp_tlinfo = NULL;
pgBackup *ancestor_backup = NULL;
/* there are no timelines in the archive */
if (parray_num(tli_list) == 0)
return NULL;
/* look for current timelineInfo */
for (i = 0; i < parray_num(tli_list); i++)
{
timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i);
if (tlinfo->tli == current_tli)
{
my_tlinfo = tlinfo;
break;
}
}
if (my_tlinfo == NULL)
return NULL;
/* Locate tlinfo of suitable full backup.
* Consider this example:
* t3 s2-------X <-! We are here
* /
* t2 s1----D---*----E--->
* /
* t1--A--B--*---C------->
*
* A, E - full backups
* B, C, D - incremental backups
*
* We must find A.
*/
tmp_tlinfo = my_tlinfo;
while (tmp_tlinfo->parent_link)
{
/* if timeline has backups, iterate over them */
if (tmp_tlinfo->parent_link->backups)
{
for (i = 0; i < parray_num(tmp_tlinfo->parent_link->backups); i++)
{
pgBackup *backup = (pgBackup *) parray_get(tmp_tlinfo->parent_link->backups, i);
if (backup->backup_mode == BACKUP_MODE_FULL &&
(backup->status == BACKUP_STATUS_OK ||
backup->status == BACKUP_STATUS_DONE) &&
backup->stop_lsn <= tmp_tlinfo->switchpoint)
{
ancestor_backup = backup;
break;
}
}
}
if (ancestor_backup)
break;
tmp_tlinfo = tmp_tlinfo->parent_link;
}
/* failed to find valid FULL backup on parent timelines */
if (!ancestor_backup)
return NULL;
else
elog(LOG, "Latest valid full backup: %s, tli: %i",
base36enc(ancestor_backup->start_time), ancestor_backup->tli);
/* At this point we found suitable full backup,
* now we must find his latest child, suitable to be
* parent of current incremental backup.
* Consider this example:
* t3 s2-------X <-! We are here
* /
* t2 s1----D---*----E--->
* /
* t1--A--B--*---C------->
*
* A, E - full backups
* B, C, D - incremental backups
*
* We found A, now we must find D.
*/
/* Optimistically, look on current timeline for valid incremental backup, child of ancestor */
if (my_tlinfo->backups)
{
/* backups are sorted in descending order and we need latest valid */
for (i = 0; i < parray_num(my_tlinfo->backups); i++)
{
pgBackup *tmp_backup = NULL;
pgBackup *backup = (pgBackup *) parray_get(my_tlinfo->backups, i);
/* found suitable parent */
if (scan_parent_chain(backup, &tmp_backup) == ChainIsOk &&
is_parent(ancestor_backup->start_time, backup, false))
return backup;
}
}
/* Iterate over parent timelines and look for a valid backup, child of ancestor */
tmp_tlinfo = my_tlinfo;
while (tmp_tlinfo->parent_link)
{
/* if timeline has backups, iterate over them */
if (tmp_tlinfo->parent_link->backups)
{
for (i = 0; i < parray_num(tmp_tlinfo->parent_link->backups); i++)
{
pgBackup *tmp_backup = NULL;
pgBackup *backup = (pgBackup *) parray_get(tmp_tlinfo->parent_link->backups, i);
/* We are not interested in backups
* located outside of our timeline history
*/
if (backup->stop_lsn > tmp_tlinfo->switchpoint)
continue;
if (scan_parent_chain(backup, &tmp_backup) == ChainIsOk &&
is_parent(ancestor_backup->start_time, backup, true))
return backup;
}
}
tmp_tlinfo = tmp_tlinfo->parent_link;
}
return NULL;
}
/* create backup directory in $BACKUP_PATH */
int
pgBackupCreateDir(pgBackup *backup)
@ -2225,18 +2383,18 @@ scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup)
{
/* Set oldest child backup in chain */
*result_backup = target_backup;
return 0;
return ChainIsBroken;
}
/* chain is ok, but some backups are invalid */
if (invalid_backup)
{
*result_backup = invalid_backup;
return 1;
return ChainIsInvalid;
}
*result_backup = target_backup;
return 2;
return ChainIsOk;
}
/*

View File

@ -803,7 +803,7 @@ backup_non_data_file(pgFile *file, pgFile *prev_file,
file->mtime <= parent_backup_time)
{
file->crc = fio_get_crc32(from_fullpath, FIO_DB_HOST);
file->crc = fio_get_crc32(from_fullpath, FIO_DB_HOST, false);
/* ...and checksum is the same... */
if (EQ_TRADITIONAL_CRC32(file->crc, prev_file->crc))
@ -1069,7 +1069,7 @@ restore_non_data_file_internal(FILE *in, FILE *out, pgFile *file,
break;
if (read_len < 0)
elog(ERROR, "Cannot read backup mode file \"%s\": %s",
elog(ERROR, "Cannot read backup file \"%s\": %s",
from_fullpath, strerror(errno));
if (fio_fwrite(out, buf, read_len) != read_len)

View File

@ -315,6 +315,72 @@ pgFileGetCRC(const char *file_path, bool use_crc32c, bool missing_ok)
return crc;
}
/*
* Read the local file to compute its CRC.
* We cannot make decision about file decompression because
* user may ask to backup already compressed files and we should be
* obvious about it.
*/
pg_crc32
pgFileGetCRCgz(const char *file_path, bool use_crc32c, bool missing_ok)
{
gzFile fp;
pg_crc32 crc = 0;
char buf[STDIO_BUFSIZE];
int len = 0;
int err;
INIT_FILE_CRC32(use_crc32c, crc);
/* open file in binary read mode */
fp = gzopen(file_path, PG_BINARY_R);
if (fp == NULL)
{
if (errno == ENOENT)
{
if (missing_ok)
{
FIN_FILE_CRC32(use_crc32c, crc);
return crc;
}
}
elog(ERROR, "Cannot open file \"%s\": %s",
file_path, strerror(errno));
}
/* calc CRC of file */
for (;;)
{
if (interrupted)
elog(ERROR, "interrupted during CRC calculation");
len = gzread(fp, &buf, sizeof(buf));
if (len <= 0)
{
/* we either run into eof or error */
if (gzeof(fp))
break;
else
{
const char *err_str = NULL;
err_str = gzerror(fp, &err);
elog(ERROR, "Cannot read from compressed file %s", err_str);
}
}
/* update CRC */
COMP_FILE_CRC32(use_crc32c, crc, buf, len);
}
FIN_FILE_CRC32(use_crc32c, crc);
gzclose(fp);
return crc;
}
void
pgFileFree(void *file)
{

View File

@ -214,10 +214,11 @@ help_pg_probackup(void)
printf(_(" [--help]\n"));
printf(_("\n %s archive-push -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
printf(_(" --wal-file-path=wal-file-path\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" [--overwrite]\n"));
printf(_(" [--compress]\n"));
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
printf(_(" [--archive-timeout=timeout]\n"));
printf(_(" [--no-ready-rename] [--no-sync]\n"));
printf(_(" [--overwrite] [--compress]\n"));
printf(_(" [--compress-algorithm=compress-algorithm]\n"));
printf(_(" [--compress-level=compress-level]\n"));
printf(_(" [--remote-proto] [--remote-host]\n"));
@ -228,6 +229,8 @@ help_pg_probackup(void)
printf(_("\n %s archive-get -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
printf(_(" --wal-file-path=wal-file-path\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
printf(_(" [--no-validate-wal]\n"));
printf(_(" [--remote-proto] [--remote-host]\n"));
printf(_(" [--remote-port] [--remote-path] [--remote-user]\n"));
printf(_(" [--ssh-options]\n"));
@ -869,10 +872,11 @@ static void
help_archive_push(void)
{
printf(_("\n%s archive-push -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
printf(_(" --wal-file-path=wal-file-path\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" [--overwrite]\n"));
printf(_(" [--compress]\n"));
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
printf(_(" [--archive-timeout=timeout]\n"));
printf(_(" [--no-ready-rename] [--no-sync]\n"));
printf(_(" [--overwrite] [--compress]\n"));
printf(_(" [--compress-algorithm=compress-algorithm]\n"));
printf(_(" [--compress-level=compress-level]\n"));
printf(_(" [--remote-proto] [--remote-host]\n"));
@ -881,10 +885,13 @@ help_archive_push(void)
printf(_(" -B, --backup-path=backup-path location of the backup storage area\n"));
printf(_(" --instance=instance_name name of the instance to delete\n"));
printf(_(" --wal-file-path=wal-file-path\n"));
printf(_(" relative path name of the WAL file on the server\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" name of the WAL file to retrieve from the server\n"));
printf(_(" name of the file to copy into WAL archive\n"));
printf(_(" -j, --threads=NUM number of parallel threads\n"));
printf(_(" --batch-size=NUM number of files to be copied\n"));
printf(_(" --archive-timeout=timeout wait timeout before discarding stale temp file(default: 5min)\n"));
printf(_(" --no-ready-rename do not rename '.ready' files in 'archive_status' directory\n"));
printf(_(" --no-sync do not sync WAL file to disk\n"));
printf(_(" --overwrite overwrite archived WAL file\n"));
printf(_("\n Compression options:\n"));
@ -912,6 +919,8 @@ help_archive_get(void)
printf(_("\n%s archive-get -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
printf(_(" --wal-file-path=wal-file-path\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
printf(_(" [--no-validate-wal]\n"));
printf(_(" [--remote-proto] [--remote-host]\n"));
printf(_(" [--remote-port] [--remote-path] [--remote-user]\n"));
printf(_(" [--ssh-options]\n\n"));
@ -922,6 +931,10 @@ help_archive_get(void)
printf(_(" relative destination path name of the WAL file on the server\n"));
printf(_(" --wal-file-name=wal-file-name\n"));
printf(_(" name of the WAL file to retrieve from the archive\n"));
printf(_(" -j, --threads=NUM number of parallel threads\n"));
printf(_(" --batch-size=NUM number of files to be prefetched\n"));
printf(_(" --prefetch-dir=path location of the store area for prefetched WAL files\n"));
printf(_(" --no-validate-wal skip validation of prefetched WAL file before using it\n"));
printf(_("\n Remote options:\n"));
printf(_(" --remote-proto=protocol remote protocol to use\n"));

View File

@ -138,6 +138,9 @@ typedef struct
*/
bool got_target;
/* Should we read record, located at endpoint position */
bool inclusive_endpoint;
/*
* Return value from the thread.
* 0 means there is no error, 1 - there is an error.
@ -162,7 +165,8 @@ static bool RunXLogThreads(const char *archivedir,
XLogRecPtr startpoint, XLogRecPtr endpoint,
bool consistent_read,
xlog_record_function process_record,
XLogRecTarget *last_rec);
XLogRecTarget *last_rec,
bool inclusive_endpoint);
//static XLogReaderState *InitXLogThreadRead(xlog_thread_arg *arg);
static bool SwitchThreadToNextWal(XLogReaderState *xlogreader,
xlog_thread_arg *arg);
@ -231,18 +235,121 @@ static XLogRecPtr wal_target_lsn = InvalidXLogRecPtr;
* Pagemap extracting is processed using threads. Each thread reads single WAL
* file.
*/
void
extractPageMap(const char *archivedir, TimeLineID tli, uint32 wal_seg_size,
XLogRecPtr startpoint, XLogRecPtr endpoint)
bool
extractPageMap(const char *archivedir, uint32 wal_seg_size,
XLogRecPtr startpoint, TimeLineID start_tli,
XLogRecPtr endpoint, TimeLineID end_tli,
parray *tli_list)
{
bool extract_isok = true;
bool extract_isok = false;
extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId,
InvalidXLogRecPtr, tli, wal_seg_size,
startpoint, endpoint, false, extractPageInfo,
NULL);
if (!extract_isok)
elog(ERROR, "Pagemap compiling failed");
if (start_tli == end_tli)
/* easy case */
extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId,
InvalidXLogRecPtr, end_tli, wal_seg_size,
startpoint, endpoint, false, extractPageInfo,
NULL, true);
else
{
/* We have to process WAL located on several different xlog intervals,
* located on different timelines.
*
* Consider this example:
* t3 C-----X <!- We are here
* /
* t2 B---*-->
* /
* t1 -A----*------->
*
* A - prev backup START_LSN
* B - switchpoint for t2, available as t2->switchpoint
* C - switch for t3, available as t3->switchpoint
* X - current backup START_LSN
*
* Intervals to be parsed:
* - [A,B) on t1
* - [B,C) on t2
* - [C,X] on t3
*/
int i;
parray *interval_list = parray_new();
timelineInfo *end_tlinfo = NULL;
timelineInfo *tmp_tlinfo = NULL;
XLogRecPtr prev_switchpoint = InvalidXLogRecPtr;
lsnInterval *wal_interval = NULL;
/* We must find TLI information about final timeline (t3 in example) */
for (i = 0; i < parray_num(tli_list); i++)
{
tmp_tlinfo = parray_get(tli_list, i);
if (tmp_tlinfo->tli == end_tli)
{
end_tlinfo = tmp_tlinfo;
break;
}
}
/* Iterate over timelines backward,
* starting with end_tli and ending with start_tli.
* For every timeline calculate LSN-interval that must be parsed.
*/
tmp_tlinfo = end_tlinfo;
while (tmp_tlinfo)
{
wal_interval = pgut_malloc(sizeof(lsnInterval));
wal_interval->tli = tmp_tlinfo->tli;
if (tmp_tlinfo->tli == end_tli)
{
wal_interval->begin_lsn = tmp_tlinfo->switchpoint;
wal_interval->end_lsn = endpoint;
}
else if (tmp_tlinfo->tli == start_tli)
{
wal_interval->begin_lsn = startpoint;
wal_interval->end_lsn = prev_switchpoint;
}
else
{
wal_interval->begin_lsn = tmp_tlinfo->switchpoint;
wal_interval->end_lsn = prev_switchpoint;
}
prev_switchpoint = tmp_tlinfo->switchpoint;
tmp_tlinfo = tmp_tlinfo->parent_link;
parray_append(interval_list, wal_interval);
}
for (i = parray_num(interval_list) - 1; i >= 0; i--)
{
bool inclusive_endpoint;
wal_interval = parray_get(interval_list, i);
/* In case of replica promotion, endpoints of intermediate
* timelines can be unreachable.
*/
inclusive_endpoint = false;
/* ... but not the end timeline */
if (wal_interval->tli == end_tli)
inclusive_endpoint = true;
extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId,
InvalidXLogRecPtr, wal_interval->tli, wal_seg_size,
wal_interval->begin_lsn, wal_interval->end_lsn,
false, extractPageInfo, NULL, inclusive_endpoint);
if (!extract_isok)
break;
pg_free(wal_interval);
}
pg_free(interval_list);
}
return extract_isok;
}
/*
@ -262,7 +369,7 @@ validate_backup_wal_from_start_to_stop(pgBackup *backup,
got_endpoint = RunXLogThreads(archivedir, 0, InvalidTransactionId,
InvalidXLogRecPtr, tli, xlog_seg_size,
backup->start_lsn, backup->stop_lsn,
false, NULL, NULL);
false, NULL, NULL, true);
if (!got_endpoint)
{
@ -349,7 +456,7 @@ validate_wal(pgBackup *backup, const char *archivedir,
* If recovery target is provided, ensure that archive files exist in
* archive directory.
*/
if (dir_is_empty(archivedir, FIO_BACKUP_HOST))
if (dir_is_empty(archivedir, FIO_LOCAL_HOST))
elog(ERROR, "WAL archive is empty. You cannot restore backup to a recovery target without WAL archive.");
/*
@ -373,7 +480,7 @@ validate_wal(pgBackup *backup, const char *archivedir,
all_wal = all_wal ||
RunXLogThreads(archivedir, target_time, target_xid, target_lsn,
tli, wal_seg_size, backup->stop_lsn,
InvalidXLogRecPtr, true, validateXLogRecord, &last_rec);
InvalidXLogRecPtr, true, validateXLogRecord, &last_rec, true);
if (last_rec.rec_time > 0)
time2iso(last_timestamp, lengthof(last_timestamp),
timestamptz_to_time_t(last_rec.rec_time));
@ -753,20 +860,35 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
if (!reader_data->xlogexists)
{
char xlogfname[MAXFNAMELEN];
char partial_file[MAXPGPATH];
GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno,
wal_seg_size);
snprintf(reader_data->xlogpath, MAXPGPATH, "%s/%s", wal_archivedir,
xlogfname);
GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno, wal_seg_size);
if (fileExists(reader_data->xlogpath, FIO_BACKUP_HOST))
snprintf(reader_data->xlogpath, MAXPGPATH, "%s/%s", wal_archivedir, xlogfname);
snprintf(reader_data->gz_xlogpath, MAXPGPATH, "%s.gz", reader_data->xlogpath);
/* We fall back to using .partial segment in case if we are running
* multi-timeline incremental backup right after standby promotion.
* TODO: it should be explicitly enabled.
*/
snprintf(partial_file, MAXPGPATH, "%s.partial", reader_data->xlogpath);
/* If segment do not exists, but the same
* segment with '.partial' suffix does, use it instead */
if (!fileExists(reader_data->xlogpath, FIO_LOCAL_HOST) &&
fileExists(partial_file, FIO_LOCAL_HOST))
{
snprintf(reader_data->xlogpath, MAXPGPATH, "%s", partial_file);
}
if (fileExists(reader_data->xlogpath, FIO_LOCAL_HOST))
{
elog(LOG, "Thread [%d]: Opening WAL segment \"%s\"",
reader_data->thread_num, reader_data->xlogpath);
reader_data->xlogexists = true;
reader_data->xlogfile = fio_open(reader_data->xlogpath,
O_RDONLY | PG_BINARY, FIO_BACKUP_HOST);
O_RDONLY | PG_BINARY, FIO_LOCAL_HOST);
if (reader_data->xlogfile < 0)
{
@ -778,29 +900,23 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
}
#ifdef HAVE_LIBZ
/* Try to open compressed WAL segment */
else
else if (fileExists(reader_data->gz_xlogpath, FIO_LOCAL_HOST))
{
snprintf(reader_data->gz_xlogpath, sizeof(reader_data->gz_xlogpath),
"%s.gz", reader_data->xlogpath);
if (fileExists(reader_data->gz_xlogpath, FIO_BACKUP_HOST))
{
elog(LOG, "Thread [%d]: Opening compressed WAL segment \"%s\"",
reader_data->thread_num, reader_data->gz_xlogpath);
elog(LOG, "Thread [%d]: Opening compressed WAL segment \"%s\"",
reader_data->thread_num, reader_data->gz_xlogpath);
reader_data->xlogexists = true;
reader_data->gz_xlogfile = fio_gzopen(reader_data->gz_xlogpath,
"rb", -1, FIO_BACKUP_HOST);
if (reader_data->gz_xlogfile == NULL)
{
elog(WARNING, "Thread [%d]: Could not open compressed WAL segment \"%s\": %s",
reader_data->thread_num, reader_data->gz_xlogpath,
strerror(errno));
return -1;
}
reader_data->xlogexists = true;
reader_data->gz_xlogfile = fio_gzopen(reader_data->gz_xlogpath,
"rb", -1, FIO_LOCAL_HOST);
if (reader_data->gz_xlogfile == NULL)
{
elog(WARNING, "Thread [%d]: Could not open compressed WAL segment \"%s\": %s",
reader_data->thread_num, reader_data->gz_xlogpath,
strerror(errno));
return -1;
}
}
#endif
/* Exit without error if WAL segment doesn't exist */
if (!reader_data->xlogexists)
return -1;
@ -923,7 +1039,7 @@ RunXLogThreads(const char *archivedir, time_t target_time,
TransactionId target_xid, XLogRecPtr target_lsn, TimeLineID tli,
uint32 segment_size, XLogRecPtr startpoint, XLogRecPtr endpoint,
bool consistent_read, xlog_record_function process_record,
XLogRecTarget *last_rec)
XLogRecTarget *last_rec, bool inclusive_endpoint)
{
pthread_t *threads;
xlog_thread_arg *thread_args;
@ -932,17 +1048,25 @@ RunXLogThreads(const char *archivedir, time_t target_time,
XLogSegNo endSegNo = 0;
bool result = true;
if (!XRecOffIsValid(startpoint))
if (!XRecOffIsValid(startpoint) && !XRecOffIsNull(startpoint))
elog(ERROR, "Invalid startpoint value %X/%X",
(uint32) (startpoint >> 32), (uint32) (startpoint));
if (!XLogRecPtrIsInvalid(endpoint))
{
if (!XRecOffIsValid(endpoint))
// if (XRecOffIsNull(endpoint) && !inclusive_endpoint)
if (XRecOffIsNull(endpoint))
{
GetXLogSegNo(endpoint, endSegNo, segment_size);
endSegNo--;
}
else if (!XRecOffIsValid(endpoint))
{
elog(ERROR, "Invalid endpoint value %X/%X",
(uint32) (endpoint >> 32), (uint32) (endpoint));
GetXLogSegNo(endpoint, endSegNo, segment_size);
}
else
GetXLogSegNo(endpoint, endSegNo, segment_size);
}
/* Initialize static variables for workers */
@ -977,6 +1101,7 @@ RunXLogThreads(const char *archivedir, time_t target_time,
arg->startpoint = startpoint;
arg->endpoint = endpoint;
arg->endSegNo = endSegNo;
arg->inclusive_endpoint = inclusive_endpoint;
arg->got_target = false;
/* By default there is some error */
arg->ret = 1;
@ -1192,6 +1317,18 @@ XLogThreadWorker(void *arg)
reader_data->thread_num,
(uint32) (errptr >> 32), (uint32) (errptr));
/* In we failed to read record located at endpoint position,
* and endpoint is not inclusive, do not consider this as an error.
*/
if (!thread_arg->inclusive_endpoint &&
errptr == thread_arg->endpoint)
{
elog(LOG, "Thread [%d]: Endpoint %X/%X is not inclusive, switch to the next timeline",
reader_data->thread_num,
(uint32) (thread_arg->endpoint >> 32), (uint32) (thread_arg->endpoint));
break;
}
/*
* If we don't have all WAL files from prev backup start_lsn to current
* start_lsn, we won't be able to build page map and PAGE backup will
@ -1583,3 +1720,28 @@ getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
return false;
}
bool validate_wal_segment(TimeLineID tli, XLogSegNo segno, const char *prefetch_dir, uint32 wal_seg_size)
{
XLogRecPtr startpoint;
XLogRecPtr endpoint;
bool rc;
int tmp_num_threads = num_threads;
num_threads = 1;
/* calculate startpoint and endpoint */
GetXLogRecPtr(segno, 0, wal_seg_size, startpoint);
GetXLogRecPtr(segno+1, 0, wal_seg_size, endpoint);
/* disable multi-threading */
num_threads = 1;
rc = RunXLogThreads(prefetch_dir, 0, InvalidTransactionId,
InvalidXLogRecPtr, tli, wal_seg_size,
startpoint, endpoint, false, NULL, NULL, true);
num_threads = tmp_num_threads;
return rc;
}

View File

@ -125,9 +125,15 @@ bool compress_shortcut = false;
char *instance_name;
/* archive push options */
int batch_size = 1;
static char *wal_file_path;
static char *wal_file_name;
static bool file_overwrite = false;
static bool file_overwrite = false;
static bool no_ready_rename = false;
/* archive get options */
static char *prefetch_dir;
bool no_validate_wal = false;
/* show options */
ShowFormat show_format = SHOW_PLAIN;
@ -172,7 +178,6 @@ static ConfigOption cmd_options[] =
{ 'f', 'b', "backup-mode", opt_backup_mode, SOURCE_CMD_STRICT },
{ 'b', 'C', "smooth-checkpoint", &smooth_checkpoint, SOURCE_CMD_STRICT },
{ 's', 'S', "slot", &replication_slot, SOURCE_CMD_STRICT },
{ 's', 'S', "primary-slot-name",&replication_slot, SOURCE_CMD_STRICT },
{ 'b', 181, "temp-slot", &temp_slot, SOURCE_CMD_STRICT },
{ 'b', 182, "delete-wal", &delete_wal, SOURCE_CMD_STRICT },
{ 'b', 183, "delete-expired", &delete_expired, SOURCE_CMD_STRICT },
@ -189,13 +194,14 @@ static ConfigOption cmd_options[] =
{ 'f', 155, "external-mapping", opt_externaldir_map, SOURCE_CMD_STRICT },
{ 's', 141, "recovery-target-name", &target_name, SOURCE_CMD_STRICT },
{ 's', 142, "recovery-target-action", &target_action, SOURCE_CMD_STRICT },
{ 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMD_STRICT },
{ 'b', 143, "no-validate", &no_validate, SOURCE_CMD_STRICT },
{ 'b', 154, "skip-block-validation", &skip_block_validation, SOURCE_CMD_STRICT },
{ 'b', 156, "skip-external-dirs", &skip_external_dirs, SOURCE_CMD_STRICT },
{ 'f', 158, "db-include", opt_datname_include_list, SOURCE_CMD_STRICT },
{ 'f', 159, "db-exclude", opt_datname_exclude_list, SOURCE_CMD_STRICT },
{ 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMD_STRICT },
{ 's', 160, "primary-conninfo", &primary_conninfo, SOURCE_CMD_STRICT },
{ 's', 'S', "primary-slot-name",&replication_slot, SOURCE_CMD_STRICT },
/* checkdb options */
{ 'b', 195, "amcheck", &need_amcheck, SOURCE_CMD_STRICT },
{ 'b', 196, "heapallindexed", &heapallindexed, SOURCE_CMD_STRICT },
@ -218,9 +224,14 @@ static ConfigOption cmd_options[] =
{ 's', 150, "wal-file-path", &wal_file_path, SOURCE_CMD_STRICT },
{ 's', 151, "wal-file-name", &wal_file_name, SOURCE_CMD_STRICT },
{ 'b', 152, "overwrite", &file_overwrite, SOURCE_CMD_STRICT },
{ 'b', 153, "no-ready-rename", &no_ready_rename, SOURCE_CMD_STRICT },
{ 'i', 162, "batch-size", &batch_size, SOURCE_CMD_STRICT },
/* archive-get options */
{ 's', 163, "prefetch-dir", &prefetch_dir, SOURCE_CMD_STRICT },
{ 'b', 164, "no-validate-wal", &no_validate_wal, SOURCE_CMD_STRICT },
/* show options */
{ 'f', 153, "format", opt_show_format, SOURCE_CMD_STRICT },
{ 'b', 161, "archive", &show_archive, SOURCE_CMD_STRICT },
{ 'f', 165, "format", opt_show_format, SOURCE_CMD_STRICT },
{ 'b', 166, "archive", &show_archive, SOURCE_CMD_STRICT },
/* set-backup options */
{ 'I', 170, "ttl", &ttl, SOURCE_CMD_STRICT, SOURCE_DEFAULT, 0, OPTION_UNIT_S, option_get_value},
{ 's', 171, "expire-time", &expire_time_string, SOURCE_CMD_STRICT },
@ -264,9 +275,6 @@ main(int argc, char *argv[])
{
char *command = NULL,
*command_name;
/* Check if backup_path is directory. */
struct stat stat_buf;
int rc;
PROGRAM_NAME_FULL = argv[0];
@ -446,11 +454,6 @@ main(int argc, char *argv[])
/* Ensure that backup_path is an absolute path */
if (!is_absolute_path(backup_path))
elog(ERROR, "-B, --backup-path must be an absolute path");
/* Ensure that backup_path is a path to a directory */
rc = stat(backup_path, &stat_buf);
if (rc != -1 && !S_ISDIR(stat_buf.st_mode))
elog(ERROR, "-B, --backup-path must be a path to directory");
}
/* Ensure that backup_path is an absolute path */
@ -502,12 +505,16 @@ main(int argc, char *argv[])
/*
* Ensure that requested backup instance exists.
* for all commands except init, which doesn't take this parameter
* and add-instance which creates new instance.
* for all commands except init, which doesn't take this parameter,
* add-instance which creates new instance
* and archive-get, which just do not require it at this point
*/
if (backup_subcmd != INIT_CMD && backup_subcmd != ADD_INSTANCE_CMD)
if (backup_subcmd != INIT_CMD && backup_subcmd != ADD_INSTANCE_CMD &&
backup_subcmd != ARCHIVE_GET_CMD)
{
if (fio_access(backup_instance_path, F_OK, FIO_BACKUP_HOST) != 0)
struct stat st;
if (fio_stat(backup_instance_path, &st, true, FIO_BACKUP_HOST) != 0)
{
elog(WARNING, "Failed to access directory \"%s\": %s",
backup_instance_path, strerror(errno));
@ -516,6 +523,12 @@ main(int argc, char *argv[])
elog(ERROR, "Instance '%s' does not exist in this backup catalog",
instance_name);
}
else
{
/* Ensure that backup_path is a path to a directory */
if (!S_ISDIR(st.st_mode))
elog(ERROR, "-B, --backup-path must be a path to directory");
}
}
}
@ -531,7 +544,8 @@ main(int argc, char *argv[])
config_get_opt_env(instance_options);
/* Read options from configuration file */
if (backup_subcmd != ADD_INSTANCE_CMD)
if (backup_subcmd != ADD_INSTANCE_CMD &&
backup_subcmd != ARCHIVE_GET_CMD)
{
join_path_components(path, backup_instance_path,
BACKUP_CATALOG_CONF_FILE);
@ -748,17 +762,22 @@ main(int argc, char *argv[])
if (num_threads < 1)
num_threads = 1;
if (batch_size < 1)
batch_size = 1;
compress_init();
/* do actual operation */
switch (backup_subcmd)
{
case ARCHIVE_PUSH_CMD:
return do_archive_push(&instance_config, wal_file_path,
wal_file_name, file_overwrite);
do_archive_push(&instance_config, wal_file_path, wal_file_name,
batch_size, file_overwrite, no_sync, no_ready_rename);
break;
case ARCHIVE_GET_CMD:
return do_archive_get(&instance_config,
wal_file_path, wal_file_name);
do_archive_get(&instance_config, prefetch_dir,
wal_file_path, wal_file_name, batch_size, !no_validate_wal);
break;
case ADD_INSTANCE_CMD:
return do_add_instance(&instance_config);
case DELETE_INSTANCE_CMD:

View File

@ -67,7 +67,6 @@ extern const char *PROGRAM_EMAIL;
#define DATABASE_MAP "database_map"
/* Timeout defaults */
#define PARTIAL_WAL_TIMER 60
#define ARCHIVE_TIMEOUT_DEFAULT 300
#define REPLICA_TIMEOUT_DEFAULT 300
@ -475,7 +474,7 @@ struct timelineInfo {
TimeLineID tli; /* this timeline */
TimeLineID parent_tli; /* parent timeline. 0 if none */
timelineInfo *parent_link; /* link to parent timeline */
XLogRecPtr switchpoint; /* if this timeline has a parent
XLogRecPtr switchpoint; /* if this timeline has a parent, then
* switchpoint contains switchpoint LSN,
* otherwise 0 */
XLogSegNo begin_segno; /* first present segment in this timeline */
@ -501,6 +500,13 @@ typedef struct xlogInterval
XLogSegNo end_segno;
} xlogInterval;
typedef struct lsnInterval
{
TimeLineID tli;
XLogRecPtr begin_lsn;
XLogRecPtr end_lsn;
} lsnInterval;
typedef enum xlogFileType
{
SEGMENT,
@ -573,6 +579,9 @@ typedef struct BackupPageHeader
#define GetXLogSegNoFromScrath(logSegNo, log, seg, wal_segsz_bytes) \
logSegNo = (uint64) log * XLogSegmentsPerXLogId(wal_segsz_bytes) + seg
#define GetXLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \
XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes)
#else
#define GetXLogSegNo(xlrp, logSegNo, wal_segsz_bytes) \
XLByteToSeg(xlrp, logSegNo)
@ -589,6 +598,9 @@ typedef struct BackupPageHeader
#define GetXLogSegNoFromScrath(logSegNo, log, seg, wal_segsz_bytes) \
logSegNo = (uint64) log * XLogSegmentsPerXLogId + seg
#define GetXLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \
XLogFromFileName(fname, tli, logSegNo)
#endif
#define IsSshProtocol() (instance_config.remote.host && strcmp(instance_config.remote.proto, "ssh") == 0)
@ -692,10 +704,11 @@ extern int do_init(void);
extern int do_add_instance(InstanceConfig *instance);
/* in archive.c */
extern int do_archive_push(InstanceConfig *instance, char *wal_file_path,
char *wal_file_name, bool overwrite);
extern int do_archive_get(InstanceConfig *instance, char *wal_file_path,
char *wal_file_name);
extern void do_archive_push(InstanceConfig *instance, char *wal_file_path,
char *wal_file_name, int batch_size, bool overwrite,
bool no_sync, bool no_ready_rename);
extern void do_archive_get(InstanceConfig *instance, const char *prefetch_dir_arg, char *wal_file_path,
char *wal_file_name, int batch_size, bool validate_wal);
/* in configure.c */
extern void do_show_config(void);
@ -758,6 +771,10 @@ extern void catalog_lock_backup_list(parray *backup_list, int from_idx,
extern pgBackup *catalog_get_last_data_backup(parray *backup_list,
TimeLineID tli,
time_t current_start_time);
extern pgBackup *get_multi_timeline_parent(parray *backup_list, parray *tli_list,
TimeLineID current_tli, time_t current_start_time,
InstanceConfig *instance);
extern void timelineInfoFree(void *tliInfo);
extern parray *catalog_get_timelines(InstanceConfig *instance);
extern void do_set_backup(const char *instance_name, time_t backup_id,
pgSetBackupParams *set_backup_params);
@ -784,6 +801,11 @@ extern int pgBackupCompareIdEqual(const void *l, const void *r);
extern pgBackup* find_parent_full_backup(pgBackup *current_backup);
extern int scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup);
/* return codes for scan_parent_chain */
#define ChainIsBroken 0
#define ChainIsInvalid 1
#define ChainIsOk 2
extern bool is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive);
extern bool is_prolific(parray *backup_list, pgBackup *target_backup);
extern bool in_backup_list(parray *backup_list, pgBackup *target_backup);
@ -847,6 +869,7 @@ extern void pgFileDelete(pgFile *file, const char *full_path);
extern void pgFileFree(void *file);
extern pg_crc32 pgFileGetCRC(const char *file_path, bool missing_ok, bool use_crc32c);
extern pg_crc32 pgFileGetCRCgz(const char *file_path, bool missing_ok, bool use_crc32c);
extern int pgFileCompareName(const void *f1, const void *f2);
extern int pgFileComparePath(const void *f1, const void *f2);
@ -892,13 +915,16 @@ extern bool create_empty_file(fio_location from_location, const char *to_root,
extern bool check_file_pages(pgFile *file, XLogRecPtr stop_lsn,
uint32 checksum_version, uint32 backup_version);
/* parsexlog.c */
extern void extractPageMap(const char *archivedir,
TimeLineID tli, uint32 seg_size,
XLogRecPtr startpoint, XLogRecPtr endpoint);
extern bool extractPageMap(const char *archivedir, uint32 wal_seg_size,
XLogRecPtr startpoint, TimeLineID start_tli,
XLogRecPtr endpoint, TimeLineID end_tli,
parray *tli_list);
extern void validate_wal(pgBackup *backup, const char *archivedir,
time_t target_time, TransactionId target_xid,
XLogRecPtr target_lsn, TimeLineID tli,
uint32 seg_size);
extern bool validate_wal_segment(TimeLineID tli, XLogSegNo segno,
const char *prefetch_dir, uint32 wal_seg_size);
extern bool read_recovery_info(const char *archivedir, TimeLineID tli,
uint32 seg_size,
XLogRecPtr start_lsn, XLogRecPtr stop_lsn,
@ -941,7 +967,7 @@ extern int32 do_decompress(void* dst, size_t dst_size, void const* src, size_t
CompressAlg alg, const char **errormsg);
extern void pretty_size(int64 size, char *buf, size_t len);
extern void pretty_time_interval(int64 num_seconds, char *buf, size_t len);
extern void pretty_time_interval(double time, char *buf, size_t len);
extern PGconn *pgdata_basic_setup(ConnectionOptions conn_opt, PGNodeInfo *nodeInfo);
extern void check_system_identifiers(PGconn *conn, char *pgdata);
@ -968,12 +994,23 @@ extern parray * pg_ptrack_get_pagemapset(PGconn *backup_conn, const char *ptrack
extern int fio_send_pages(FILE* in, FILE* out, pgFile *file, XLogRecPtr horizonLsn,
int calg, int clevel, uint32 checksum_version,
datapagemap_t *pagemap, BlockNumber* err_blknum, char **errormsg);
/* return codes for fio_send_pages */
#define WRITE_FAILED (-1)
#define REMOTE_ERROR (-2)
#define PAGE_CORRUPTION (-3)
#define SEND_OK (-4)
#define OUT_BUF_SIZE (1024 * 1024)
extern int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num);
extern int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num);
/* return codes for fio_send_pages() and fio_send_file() */
#define SEND_OK (0)
#define FILE_MISSING (-1)
#define OPEN_FAILED (-2)
#define READ_FAILED (-3)
#define WRITE_FAILED (-4)
#define ZLIB_ERROR (-5)
#define REMOTE_ERROR (-6)
#define PAGE_CORRUPTION (-8)
/* Check if specified location is local for current node */
extern bool fio_is_remote(fio_location location);
extern void get_header_errormsg(Page page, char **errormsg);
extern void get_checksum_errormsg(Page page, char **errormsg,

View File

@ -251,7 +251,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
result = scan_parent_chain(dest_backup, &tmp_backup);
if (result == 0)
if (result == ChainIsBroken)
{
/* chain is broken, determine missing backup ID
* and orphinize all his descendants
@ -290,7 +290,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
/* No point in doing futher */
elog(ERROR, "%s of backup %s failed.", action, base36enc(dest_backup->start_time));
}
else if (result == 1)
else if (result == ChainIsInvalid)
{
/* chain is intact, but at least one parent is invalid */
set_orphan_status(backups, tmp_backup);
@ -403,7 +403,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
*/
validate_wal(dest_backup, arclog_path, rt->target_time,
rt->target_xid, rt->target_lsn,
base_full_backup->tli, instance_config.xlog_seg_size);
dest_backup->tli, instance_config.xlog_seg_size);
}
/* Orphanize every OK descendant of corrupted backup */
else
@ -1326,7 +1326,7 @@ satisfy_timeline(const parray *timelines, const pgBackup *backup)
timeline = (TimeLineHistoryEntry *) parray_get(timelines, i);
if (backup->tli == timeline->tli &&
(XLogRecPtrIsInvalid(timeline->end) ||
backup->stop_lsn < timeline->end))
backup->stop_lsn <= timeline->end))
return true;
}
return false;

View File

@ -191,14 +191,18 @@ pretty_size(int64 size, char *buf, size_t len)
}
void
pretty_time_interval(int64 num_seconds, char *buf, size_t len)
pretty_time_interval(double time, char *buf, size_t len)
{
int seconds = 0;
int minutes = 0;
int hours = 0;
int days = 0;
int num_seconds = 0;
int milliseconds = 0;
int seconds = 0;
int minutes = 0;
int hours = 0;
int days = 0;
if (num_seconds <= 0)
num_seconds = (int) time;
if (time <= 0)
{
strncpy(buf, "0", len);
return;
@ -214,6 +218,7 @@ pretty_time_interval(int64 num_seconds, char *buf, size_t len)
num_seconds %= 60;
seconds = num_seconds;
milliseconds = (int)((time - (int) time) * 1000.0);
if (days > 0)
{
@ -233,7 +238,16 @@ pretty_time_interval(int64 num_seconds, char *buf, size_t len)
return;
}
snprintf(buf, len, "%ds", seconds);
if (seconds > 0)
{
if (milliseconds > 0)
snprintf(buf, len, "%ds:%dms", seconds, milliseconds);
else
snprintf(buf, len, "%ds", seconds);
return;
}
snprintf(buf, len, "%dms", milliseconds);
return;
}

View File

@ -14,6 +14,7 @@
#define PRINTF_BUF_SIZE 1024
#define FILE_PERMISSIONS 0600
#define CHUNK_SIZE 1024 * 128
static __thread unsigned long fio_fdset = 0;
static __thread void* fio_stdin_buffer;
@ -136,7 +137,7 @@ static int remove_file_or_dir(char const* path)
#endif
/* Check if specified location is local for current node */
static bool fio_is_remote(fio_location location)
bool fio_is_remote(fio_location location)
{
bool is_remote = MyLocation != FIO_LOCAL_HOST
&& location != FIO_LOCAL_HOST
@ -340,7 +341,10 @@ int fio_open(char const* path, int mode, fio_location location)
hdr.cop = FIO_OPEN;
hdr.handle = i;
hdr.size = strlen(path) + 1;
hdr.arg = mode & ~O_EXCL;
hdr.arg = mode;
// hdr.arg = mode & ~O_EXCL;
// elog(INFO, "PATH: %s MODE: %i, %i", path, mode, O_EXCL);
// elog(INFO, "MODE: %i", hdr.arg);
fio_fdset |= 1 << i;
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
@ -490,6 +494,7 @@ int fio_close(int fd)
fio_fdset &= ~(1 << hdr.handle);
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
/* Note, that file is closed without waiting for confirmation */
return 0;
}
@ -865,6 +870,8 @@ int fio_rename(char const* old_path, char const* new_path, fio_location location
IO_CHECK(fio_write_all(fio_stdout, old_path, old_path_len), old_path_len);
IO_CHECK(fio_write_all(fio_stdout, new_path, new_path_len), new_path_len);
//TODO: wait for confirmation.
return 0;
}
else
@ -916,7 +923,7 @@ int fio_sync(char const* path, fio_location location)
}
/* Get crc32 of file */
pg_crc32 fio_get_crc32(const char *file_path, fio_location location)
pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decompress)
{
if (fio_is_remote(location))
{
@ -926,6 +933,10 @@ pg_crc32 fio_get_crc32(const char *file_path, fio_location location)
hdr.cop = FIO_GET_CRC32;
hdr.handle = -1;
hdr.size = path_len;
hdr.arg = 0;
if (decompress)
hdr.arg = 1;
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(fio_stdout, file_path, path_len), path_len);
@ -934,7 +945,12 @@ pg_crc32 fio_get_crc32(const char *file_path, fio_location location)
return crc;
}
else
return pgFileGetCRC(file_path, true, true);
{
if (decompress)
return pgFileGetCRCgz(file_path, true, true);
else
return pgFileGetCRC(file_path, true, true);
}
}
/* Remove file */
@ -1011,7 +1027,6 @@ int fio_chmod(char const* path, int mode, fio_location location)
#ifdef HAVE_LIBZ
#define ZLIB_BUFFER_SIZE (64*1024)
#define MAX_WBITS 15 /* 32K LZ77 window */
#define DEF_MEM_LEVEL 8
@ -1027,6 +1042,7 @@ typedef struct fioGZFile
Bytef buf[ZLIB_BUFFER_SIZE];
} fioGZFile;
/* On error returns NULL and errno should be checked */
gzFile
fio_gzopen(char const* path, char const* mode, int level, fio_location location)
{
@ -1037,6 +1053,7 @@ fio_gzopen(char const* path, char const* mode, int level, fio_location location)
memset(&gz->strm, 0, sizeof(gz->strm));
gz->eof = 0;
gz->errnum = Z_OK;
/* check if file opened for writing */
if (strcmp(mode, PG_BINARY_W) == 0) /* compress */
{
gz->strm.next_out = gz->buf;
@ -1049,14 +1066,12 @@ fio_gzopen(char const* path, char const* mode, int level, fio_location location)
if (rc == Z_OK)
{
gz->compress = 1;
if (fio_access(path, F_OK, location) == 0)
gz->fd = fio_open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, location);
if (gz->fd < 0)
{
elog(LOG, "File %s exists", path);
free(gz);
errno = EEXIST;
return NULL;
}
gz->fd = fio_open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, location);
}
}
else
@ -1069,21 +1084,27 @@ fio_gzopen(char const* path, char const* mode, int level, fio_location location)
{
gz->compress = 0;
gz->fd = fio_open(path, O_RDONLY | PG_BINARY, location);
if (gz->fd < 0)
{
free(gz);
return NULL;
}
}
}
if (rc != Z_OK)
{
free(gz);
return NULL;
elog(ERROR, "zlib internal error when opening file %s: %s",
path, gz->strm.msg);
}
return (gzFile)((size_t)gz + FIO_GZ_REMOTE_MARKER);
}
else
{
gzFile file;
/* check if file opened for writing */
if (strcmp(mode, PG_BINARY_W) == 0)
{
int fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, FILE_PERMISSIONS);
int fd = open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, FILE_PERMISSIONS);
if (fd < 0)
return NULL;
file = gzdopen(fd, mode);
@ -1143,7 +1164,8 @@ fio_gzread(gzFile f, void *buf, unsigned size)
{
gz->strm.next_in = gz->buf;
}
rc = fio_read(gz->fd, gz->strm.next_in + gz->strm.avail_in, gz->buf + ZLIB_BUFFER_SIZE - gz->strm.next_in - gz->strm.avail_in);
rc = fio_read(gz->fd, gz->strm.next_in + gz->strm.avail_in,
gz->buf + ZLIB_BUFFER_SIZE - gz->strm.next_in - gz->strm.avail_in);
if (rc > 0)
{
gz->strm.avail_in += rc;
@ -1282,8 +1304,10 @@ z_off_t fio_gzseek(gzFile f, z_off_t offset, int whence)
#endif
/* Send file content */
static void fio_send_file(int out, char const* path)
/* Send file content
* Note: it should not be used for large files.
*/
static void fio_load_file(int out, char const* path)
{
int fd = open(path, O_RDONLY);
fio_header hdr;
@ -1440,7 +1464,7 @@ int fio_send_pages(FILE* in, FILE* out, pgFile *file, XLogRecPtr horizonLsn,
file->uncompressed_size += BLCKSZ;
}
else
elog(ERROR, "Remote agent returned message of unknown type");
elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop);
}
return n_blocks_read;
@ -1607,6 +1631,337 @@ cleanup:
return;
}
/* Receive chunks of compressed data, decompress them and write to
* destination file.
* Return codes:
* FILE_MISSING (-1)
* OPEN_FAILED (-2)
* READ_FAILED (-3)
* WRITE_FAILED (-4)
* ZLIB_ERROR (-5)
* REMOTE_ERROR (-6)
*/
int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num)
{
fio_header hdr;
int exit_code = SEND_OK;
char *in_buf = pgut_malloc(CHUNK_SIZE); /* buffer for compressed data */
char *out_buf = pgut_malloc(OUT_BUF_SIZE); /* 1MB buffer for decompressed data */
size_t path_len = strlen(from_fullpath) + 1;
/* decompressor */
z_stream *strm = NULL;
hdr.cop = FIO_SEND_FILE;
hdr.size = path_len;
elog(VERBOSE, "Thread [%d]: Attempting to open remote compressed WAL file '%s'",
thread_num, from_fullpath);
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(fio_stdout, from_fullpath, path_len), path_len);
for (;;)
{
fio_header hdr;
IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr));
if (hdr.cop == FIO_SEND_FILE_EOF)
{
break;
}
else if (hdr.cop == FIO_ERROR)
{
/* handle error, reported by the agent */
if (hdr.size > 0)
{
IO_CHECK(fio_read_all(fio_stdin, in_buf, hdr.size), hdr.size);
elog(WARNING, "Thread [%d]: %s", thread_num, in_buf);
}
exit_code = hdr.arg;
goto cleanup;
}
else if (hdr.cop == FIO_PAGE)
{
int rc;
Assert(hdr.size <= CHUNK_SIZE);
IO_CHECK(fio_read_all(fio_stdin, in_buf, hdr.size), hdr.size);
/* We have received a chunk of compressed data, lets decompress it */
if (strm == NULL)
{
/* Initialize decompressor */
strm = pgut_malloc(sizeof(z_stream));
memset(strm, 0, sizeof(z_stream));
/* The fields next_in, avail_in initialized before init */
strm->next_in = (Bytef *)in_buf;
strm->avail_in = hdr.size;
rc = inflateInit2(strm, 15 + 16);
if (rc != Z_OK)
{
elog(WARNING, "Thread [%d]: Failed to initialize decompression stream for file '%s': %i: %s",
thread_num, from_fullpath, rc, strm->msg);
exit_code = ZLIB_ERROR;
goto cleanup;
}
}
else
{
strm->next_in = (Bytef *)in_buf;
strm->avail_in = hdr.size;
}
strm->next_out = (Bytef *)out_buf; /* output buffer */
strm->avail_out = OUT_BUF_SIZE; /* free space in output buffer */
/*
* From zlib documentation:
* The application must update next_in and avail_in when avail_in
* has dropped to zero. It must update next_out and avail_out when
* avail_out has dropped to zero.
*/
while (strm->avail_in != 0) /* while there is data in input buffer, decompress it */
{
/* decompress until there is no data to decompress,
* or buffer with uncompressed data is full
*/
rc = inflate(strm, Z_NO_FLUSH);
if (rc == Z_STREAM_END)
/* end of stream */
break;
else if (rc != Z_OK)
{
/* got an error */
elog(WARNING, "Thread [%d]: Decompression failed for file '%s': %i: %s",
thread_num, from_fullpath, rc, strm->msg);
exit_code = ZLIB_ERROR;
goto cleanup;
}
if (strm->avail_out == 0)
{
/* Output buffer is full, write it out */
if (fwrite(out_buf, 1, OUT_BUF_SIZE, out) != OUT_BUF_SIZE)
{
elog(WARNING, "Thread [%d]: Cannot write to file '%s': %s",
thread_num, to_fullpath, strerror(errno));
exit_code = WRITE_FAILED;
goto cleanup;
}
strm->next_out = (Bytef *)out_buf; /* output buffer */
strm->avail_out = OUT_BUF_SIZE;
}
}
/* write out leftovers if any */
if (strm->avail_out != OUT_BUF_SIZE)
{
int len = OUT_BUF_SIZE - strm->avail_out;
if (fwrite(out_buf, 1, len, out) != len)
{
elog(WARNING, "Thread [%d]: Cannot write to file: %s",
thread_num, strerror(errno));
exit_code = WRITE_FAILED;
goto cleanup;
}
}
}
else
{
elog(WARNING, "Thread [%d]: Remote agent returned message of unexpected type: %i",
thread_num, hdr.cop);
exit_code = REMOTE_ERROR;
break;
}
}
cleanup:
if (exit_code < OPEN_FAILED)
fio_disconnect(); /* discard possible pending data in pipe */
if (strm)
{
inflateEnd(strm);
pg_free(strm);
}
pg_free(in_buf);
pg_free(out_buf);
return exit_code;
}
/* Receive chunks of data and write them to destination file.
* Return codes:
* SEND_OK (0)
* FILE_MISSING (-1)
* OPEN_FAILED (-2)
* READ_FAIL (-3)
* WRITE_FAIL (-4)
*/
int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num)
{
fio_header hdr;
int exit_code = SEND_OK;
size_t path_len = strlen(from_fullpath) + 1;
char *buf = pgut_malloc(CHUNK_SIZE); /* buffer */
hdr.cop = FIO_SEND_FILE;
hdr.size = path_len;
elog(VERBOSE, "Thread [%d]: Attempting to open remote WAL file '%s'",
thread_num, from_fullpath);
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(fio_stdout, from_fullpath, path_len), path_len);
for (;;)
{
/* receive data */
IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr));
if (hdr.cop == FIO_SEND_FILE_EOF)
{
break;
}
else if (hdr.cop == FIO_ERROR)
{
/* handle error, reported by the agent */
if (hdr.size > 0)
{
IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size);
elog(WARNING, "Thread [%d]: %s", thread_num, buf);
}
exit_code = hdr.arg;
break;
}
else if (hdr.cop == FIO_PAGE)
{
Assert(hdr.size <= CHUNK_SIZE);
IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size);
/* We have received a chunk of data data, lets write it out */
if (fwrite(buf, 1, hdr.size, out) != hdr.size)
{
elog(WARNING, "Thread [%d]: Cannot write to file '%s': %s",
thread_num, to_fullpath, strerror(errno));
exit_code = WRITE_FAILED;
break;
}
}
else
{
/* TODO: fio_disconnect may get assert fail when running after this */
elog(WARNING, "Thread [%d]: Remote agent returned message of unexpected type: %i",
thread_num, hdr.cop);
exit_code = REMOTE_ERROR;
break;
}
}
if (exit_code < OPEN_FAILED)
fio_disconnect(); /* discard possible pending data in pipe */
pg_free(buf);
return exit_code;
}
/* Send file content
* On error we return FIO_ERROR message with following codes
* FILE_MISSING (-1)
* OPEN_FAILED (-2)
* READ_FAILED (-3)
*
*/
static void fio_send_file_impl(int out, char const* path)
{
FILE *fp;
fio_header hdr;
char *buf = pgut_malloc(CHUNK_SIZE);
ssize_t read_len = 0;
char *errormsg = NULL;
/* open source file for read */
/* TODO: check that file is regular file */
fp = fopen(path, PG_BINARY_R);
if (!fp)
{
hdr.cop = FIO_ERROR;
/* do not send exact wording of ENOENT error message
* because it is a very common error in our case, so
* error code is enough.
*/
if (errno == ENOENT)
{
hdr.arg = FILE_MISSING;
hdr.size = 0;
}
else
{
hdr.arg = OPEN_FAILED;
errormsg = pgut_malloc(MAXPGPATH);
/* Construct the error message */
snprintf(errormsg, MAXPGPATH, "Cannot open source file '%s': %s", path, strerror(errno));
hdr.size = strlen(errormsg) + 1;
}
/* send header and message */
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
if (errormsg)
IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size);
goto cleanup;
}
/* copy content */
for (;;)
{
read_len = fread(buf, 1, CHUNK_SIZE, fp);
/* report error */
if (read_len < 0 || (read_len == 0 && !feof(fp)))
{
hdr.cop = FIO_ERROR;
errormsg = pgut_malloc(MAXPGPATH);
hdr.arg = READ_FAILED;
/* Construct the error message */
snprintf(errormsg, MAXPGPATH, "Cannot read source file '%s': %s", path, strerror(errno));
hdr.size = strlen(errormsg) + 1;
/* send header and message */
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size);
goto cleanup;
}
else if (read_len == 0)
break;
else
{
/* send chunk */
hdr.cop = FIO_PAGE;
hdr.size = read_len;
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
IO_CHECK(fio_write_all(out, buf, read_len), read_len);
}
}
/* we are done, send eof */
hdr.cop = FIO_SEND_FILE_EOF;
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
cleanup:
if (fp)
fclose(fp);
pg_free(buf);
pg_free(errormsg);
return;
}
/* Execute commands at remote host */
void fio_communicate(int in, int out)
{
@ -1643,7 +1998,7 @@ void fio_communicate(int in, int out)
}
switch (hdr.cop) {
case FIO_LOAD: /* Send file content */
fio_send_file(out, buf);
fio_load_file(out, buf);
break;
case FIO_OPENDIR: /* Open directory for traversal */
dir[hdr.handle] = opendir(buf);
@ -1754,6 +2109,9 @@ void fio_communicate(int in, int out)
// buf contain fio_send_request header and bitmap.
fio_send_pages_impl(fd[hdr.handle], out, buf, true);
break;
case FIO_SEND_FILE:
fio_send_file_impl(out, buf);
break;
case FIO_SYNC:
/* open file and fsync it */
tmp_fd = open(buf, O_WRONLY | PG_BINARY, FILE_PERMISSIONS);
@ -1772,7 +2130,10 @@ void fio_communicate(int in, int out)
break;
case FIO_GET_CRC32:
/* calculate crc32 for a file */
crc = pgFileGetCRC(buf, true, true);
if (hdr.arg == 1)
crc = pgFileGetCRCgz(buf, true, true);
else
crc = pgFileGetCRC(buf, true, true);
IO_CHECK(fio_write_all(out, &crc, sizeof(crc)), sizeof(crc));
break;
case FIO_DISCONNECT:

View File

@ -40,6 +40,8 @@ typedef enum
FIO_SEND_PAGES,
FIO_SEND_PAGES_PAGEMAP,
FIO_ERROR,
FIO_SEND_FILE,
// FIO_CHUNK,
FIO_SEND_FILE_EOF,
FIO_SEND_FILE_CORRUPTION,
/* messages for closing connection */
@ -100,7 +102,7 @@ extern int fio_truncate(int fd, off_t size);
extern int fio_close(int fd);
extern void fio_disconnect(void);
extern int fio_sync(char const* path, fio_location location);
extern pg_crc32 fio_get_crc32(const char *file_path, fio_location location);
extern pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decompress);
extern int fio_rename(char const* old_path, char const* new_path, fio_location location);
extern int fio_symlink(char const* target, char const* link_path, fio_location location);

View File

@ -220,7 +220,7 @@ bool launch_agent(void)
return false;
} else {
#endif
elog(LOG, "Spawn agent %d version %s", child_pid, PROGRAM_VERSION);
elog(LOG, "Start SSH client process, pid %d", child_pid);
SYS_CHECK(close(infd[1])); /* These are being used by the child */
SYS_CHECK(close(outfd[0]));
SYS_CHECK(close(errfd[1]));

View File

@ -479,7 +479,7 @@ do_validate_instance(void)
result = scan_parent_chain(current_backup, &tmp_backup);
/* chain is broken */
if (result == 0)
if (result == ChainIsBroken)
{
char *parent_backup_id;
/* determine missing backup ID */
@ -505,7 +505,7 @@ do_validate_instance(void)
continue;
}
/* chain is whole, but at least one parent is invalid */
else if (result == 1)
else if (result == ChainIsInvalid)
{
/* Oldest corrupt backup has a chance for revalidation */
if (current_backup->start_time != tmp_backup->start_time)
@ -630,7 +630,7 @@ do_validate_instance(void)
*/
result = scan_parent_chain(backup, &tmp_backup);
if (result == 1)
if (result == ChainIsInvalid)
{
/* revalidation make sense only if oldest invalid backup is current_backup
*/

View File

@ -281,7 +281,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_pgpro434_4(self):
"""
Check pg_stop_backup_timeout, needed backup_timeout
Check pg_stop_backup_timeout, libpq-timeout requested.
Fixed in commit d84d79668b0c139 and assert fixed by ptrack 1.7
"""
fname = self.id().split('.')[3]
@ -398,15 +398,11 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
log_content)
self.assertIn(
'INFO: pg_probackup archive-push from',
'pg_probackup archive-push WAL file',
log_content)
self.assertIn(
'ERROR: WAL segment ',
log_content)
self.assertIn(
'already exists.',
'WAL file already exists in archive with different checksum',
log_content)
self.assertNotIn(
@ -448,8 +444,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '30s'})
pg_options={'checkpoint_timeout': '30s'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
@ -487,9 +482,13 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.assertIn(
'DETAIL: The failed archive command was:', log_content)
self.assertIn(
'INFO: pg_probackup archive-push from', log_content)
'pg_probackup archive-push WAL file', log_content)
self.assertNotIn(
'WAL file already exists in archive with '
'different checksum, overwriting', log_content)
self.assertIn(
'{0}" already exists.'.format(filename), log_content)
'WAL file already exists in archive with '
'different checksum', log_content)
self.assertNotIn(
'pg_probackup archive-push completed successfully', log_content)
@ -497,7 +496,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.set_archiving(backup_dir, 'node', node, overwrite=True)
node.reload()
self.switch_wal_segment(node)
sleep(2)
sleep(5)
with open(log_file, 'r') as f:
log_content = f.read()
@ -505,6 +504,10 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
'pg_probackup archive-push completed successfully' in log_content,
'Expecting messages about successfull execution archive_command')
self.assertIn(
'WAL file already exists in archive with '
'different checksum, overwriting', log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
@ -520,7 +523,9 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
self.set_archiving(
backup_dir, 'node', node,
log_level='verbose', archive_timeout=60)
node.slow_start()
@ -579,12 +584,9 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
log_file = os.path.join(node.logs_dir, 'postgresql.log')
with open(log_file, 'r') as f:
log_content = f.read()
self.assertIn(
'Cannot open destination temporary WAL file',
log_content)
self.assertIn(
'Reusing stale destination temporary WAL file',
'Reusing stale temp WAL file',
log_content)
# Clean after yourself
@ -602,7 +604,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node, archive_timeout=60)
node.slow_start()
@ -905,8 +907,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '30s',
'archive_timeout': '10s'}
)
'archive_timeout': '10s'})
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
@ -923,6 +925,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,10000) i")
master.pgbench_init(scale=5)
# TAKE FULL ARCHIVE BACKUP FROM MASTER
self.backup_node(backup_dir, 'master', master)
# GET LOGICAL CONTENT FROM MASTER
@ -937,11 +941,11 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
pgdata_replica = self.pgdata_content(replica.data_dir)
self.compare_pgdata(pgdata_master, pgdata_replica)
self.set_replica(master, replica, synchronous=True)
self.set_replica(master, replica, synchronous=False)
# ADD INSTANCE REPLICA
# self.add_instance(backup_dir, 'replica', replica)
# SET ARCHIVING FOR REPLICA
# self.set_archiving(backup_dir, 'replica', replica, replica=True)
self.set_archiving(backup_dir, 'master', replica, replica=True)
replica.slow_start(replica=True)
# CHECK LOGICAL CORRECTNESS on REPLICA
@ -973,6 +977,18 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.assertEqual(
'OK', self.show_pb(backup_dir, 'master', backup_id)['status'])
master.pgbench_init(scale=50)
sleep(10)
replica.promote()
master.pgbench_init(scale=10)
replica.pgbench_init(scale=10)
exit(1)
# Clean after yourself
self.del_test_dir(module_name, fname)
@ -1718,7 +1734,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node, log_level='verbose')
node.slow_start()
backup_id = self.backup_node(backup_dir, 'node', node)
@ -1734,6 +1750,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
node.slow_start()
node.pgbench_init(scale=2)
sleep(5)
show = self.show_archive(backup_dir)
timelines = show[0]['timelines']
@ -1755,12 +1773,571 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
tli13['closest-backup-id'])
self.assertEqual(
'0000000D000000000000001B',
'0000000D000000000000001C',
tli13['max-segno'])
# Clean after yourself
self.del_test_dir(module_name, fname)
@unittest.skip("skip")
# @unittest.expectedFailure
def test_archiving_and_slots(self):
"""
Check that archiving don`t break slot
guarantee.
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'autovacuum': 'off',
'checkpoint_timeout': '30s',
'max_wal_size': '64MB'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node, log_level='verbose')
node.slow_start()
if self.get_version(node) < 100000:
pg_receivexlog_path = self.get_bin_path('pg_receivexlog')
else:
pg_receivexlog_path = self.get_bin_path('pg_receivewal')
# "pg_receivewal --create-slot --slot archive_slot --if-not-exists "
# "&& pg_receivewal --synchronous -Z 1 /tmp/wal --slot archive_slot --no-loop"
self.run_binary(
[
pg_receivexlog_path, '-p', str(node.port), '--synchronous',
'--create-slot', '--slot', 'archive_slot', '--if-not-exists'
])
node.pgbench_init(scale=10)
pg_receivexlog = self.run_binary(
[
pg_receivexlog_path, '-p', str(node.port), '--synchronous',
'-D', os.path.join(backup_dir, 'wal', 'node'),
'--no-loop', '--slot', 'archive_slot',
'-Z', '1'
], asynchronous=True)
if pg_receivexlog.returncode:
self.assertFalse(
True,
'Failed to start pg_receivexlog: {0}'.format(
pg_receivexlog.communicate()[1]))
sleep(2)
pg_receivexlog.kill()
backup_id = self.backup_node(backup_dir, 'node', node)
node.pgbench_init(scale=20)
exit(1)
# Clean after yourself
self.del_test_dir(module_name, fname)
def test_archive_push_sanity(self):
""""""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'archive_mode': 'on',
'archive_command': 'exit 1'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
node.pgbench_init(scale=50)
node.stop()
self.set_archiving(backup_dir, 'node', node)
os.remove(os.path.join(node.logs_dir, 'postgresql.log'))
node.slow_start()
self.backup_node(backup_dir, 'node', node)
with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f:
postgres_log_content = f.read()
# print(postgres_log_content)
# make sure that .backup file is not compressed
self.assertNotIn('.backup.gz', postgres_log_content)
self.assertNotIn('WARNING', postgres_log_content)
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(
backup_dir, 'node', replica,
data_dir=replica.data_dir, options=['-R'])
#self.set_archiving(backup_dir, 'replica', replica, replica=True)
self.set_auto_conf(replica, {'port': replica.port})
self.set_auto_conf(replica, {'archive_mode': 'always'})
self.set_auto_conf(replica, {'hot_standby': 'on'})
replica.slow_start(replica=True)
self.wait_until_replica_catch_with_master(node, replica)
node.pgbench_init(scale=5)
replica.promote()
replica.pgbench_init(scale=10)
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
replica_log_content = f.read()
# make sure that .partial file is not compressed
self.assertNotIn('.partial.gz', replica_log_content)
# make sure that .history file is not compressed
self.assertNotIn('.history.gz', replica_log_content)
self.assertNotIn('WARNING', replica_log_content)
output = self.show_archive(
backup_dir, 'node', as_json=False, as_text=True,
options=['--log-level-console=VERBOSE'])
self.assertNotIn('WARNING', output)
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.expectedFailure
# @unittest.skip("skip")
def test_archive_pg_receivexlog_partial_handling(self):
"""check that archive-get delivers .partial and .gz.partial files"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
self.backup_node(backup_dir, 'node', node, options=['--stream'])
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(
backup_dir, 'node', replica, replica.data_dir, options=['-R'])
self.set_auto_conf(replica, {'port': replica.port})
self.set_replica(node, replica)
self.add_instance(backup_dir, 'replica', replica)
# self.set_archiving(backup_dir, 'replica', replica, replica=True)
replica.slow_start(replica=True)
node.safe_psql('postgres', 'CHECKPOINT')
if self.get_version(replica) < 100000:
pg_receivexlog_path = self.get_bin_path('pg_receivexlog')
else:
pg_receivexlog_path = self.get_bin_path('pg_receivewal')
cmdline = [
pg_receivexlog_path, '-p', str(replica.port), '--synchronous',
'-D', os.path.join(backup_dir, 'wal', 'replica')]
if self.archive_compress and node.major_version >= 10:
cmdline += ['-Z', '1']
pg_receivexlog = self.run_binary(cmdline, asynchronous=True)
if pg_receivexlog.returncode:
self.assertFalse(
True,
'Failed to start pg_receivexlog: {0}'.format(
pg_receivexlog.communicate()[1]))
node.safe_psql(
"postgres",
"create table t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,1000000) i")
# FULL
self.backup_node(backup_dir, 'replica', replica, options=['--stream'])
node.safe_psql(
"postgres",
"insert into t_heap select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(1000000,2000000) i")
node_restored = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node_restored'))
node_restored.cleanup()
self.restore_node(
backup_dir, 'replica', node_restored,
node_restored.data_dir, options=['--recovery-target=latest', '--recovery-target-action=promote'])
self.set_auto_conf(node_restored, {'port': node_restored.port})
self.set_auto_conf(node_restored, {'hot_standby': 'off'})
# it will set node_restored as warm standby.
# with open(os.path.join(node_restored.data_dir, "standby.signal"), 'w') as f:
# f.flush()
# f.close()
node_restored.slow_start()
result = node.safe_psql(
"postgres",
"select sum(id) from t_heap")
result_new = node_restored.safe_psql(
"postgres",
"select sum(id) from t_heap")
self.assertEqual(result, result_new)
# Clean after yourself
pg_receivexlog.kill()
self.del_test_dir(module_name, fname)
def test_multi_timeline_recovery_prefetching(self):
""""""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={'autovacuum': 'off'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
self.backup_node(backup_dir, 'node', node)
node.pgbench_init(scale=50)
target_xid = node.safe_psql(
'postgres',
'select txid_current()').rstrip()
node.pgbench_init(scale=20)
node.stop()
node.cleanup()
self.restore_node(
backup_dir, 'node', node,
options=[
'--recovery-target-xid={0}'.format(target_xid),
'--recovery-target-action=promote'])
node.slow_start()
node.pgbench_init(scale=20)
target_xid = node.safe_psql(
'postgres',
'select txid_current()').rstrip()
node.stop(['-m', 'immediate', '-D', node.data_dir])
node.cleanup()
self.restore_node(
backup_dir, 'node', node,
options=[
# '--recovery-target-xid={0}'.format(target_xid),
'--recovery-target-timeline=2',
# '--recovery-target-action=promote',
'--no-validate'])
node.slow_start()
node.pgbench_init(scale=20)
result = node.safe_psql(
'postgres',
'select * from pgbench_accounts')
node.stop()
node.cleanup()
self.restore_node(
backup_dir, 'node', node,
options=[
# '--recovery-target-xid=100500',
'--recovery-target-timeline=3',
# '--recovery-target-action=promote',
'--no-validate'])
os.remove(os.path.join(node.logs_dir, 'postgresql.log'))
restore_command = self.get_restore_command(backup_dir, 'node', node)
restore_command += ' -j 2 --batch-size=10 --log-level-console=VERBOSE'
if node.major_version >= 12:
node.append_conf(
'probackup_recovery.conf', "restore_command = '{0}'".format(restore_command))
else:
node.append_conf(
'recovery.conf', "restore_command = '{0}'".format(restore_command))
node.slow_start()
result_new = node.safe_psql(
'postgres',
'select * from pgbench_accounts')
self.assertEqual(result, result_new)
with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f:
postgres_log_content = f.read()
# check that requesting of non-existing segment do not
# throwns aways prefetch
self.assertIn(
'pg_probackup archive-get failed to '
'deliver WAL file: 000000030000000000000006',
postgres_log_content)
self.assertIn(
'pg_probackup archive-get failed to '
'deliver WAL file: 000000020000000000000006',
postgres_log_content)
self.assertIn(
'pg_probackup archive-get used prefetched '
'WAL segment 000000010000000000000006, prefetch state: 5/10',
postgres_log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
def test_archive_get_batching_sanity(self):
"""
Make sure that batching works.
.gz file is corrupted and uncompressed is not, check that both
corruption detected and uncompressed file is used.
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={'autovacuum': 'off'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
self.backup_node(backup_dir, 'node', node, options=['--stream'])
node.pgbench_init(scale=50)
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(
backup_dir, 'node', replica, replica.data_dir)
self.set_replica(node, replica, log_shipping=True)
if node.major_version >= 12:
self.set_auto_conf(replica, {'restore_command': 'exit 1'})
else:
replica.append_conf('recovery.conf', "restore_command = 'exit 1'")
replica.slow_start(replica=True)
# at this point replica is consistent
restore_command = self.get_restore_command(backup_dir, 'node', replica)
restore_command += ' -j 2 --batch-size=10'
print(restore_command)
if node.major_version >= 12:
self.set_auto_conf(replica, {'restore_command': restore_command})
else:
replica.append_conf(
'recovery.conf', "restore_command = '{0}'".format(restore_command))
replica.restart()
sleep(5)
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
postgres_log_content = f.read()
self.assertIn(
'pg_probackup archive-get completed successfully, fetched: 10/10',
postgres_log_content)
self.assertIn('used prefetched WAL segment', postgres_log_content)
self.assertIn('prefetch state: 9/10', postgres_log_content)
self.assertIn('prefetch state: 8/10', postgres_log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
def test_archive_get_prefetch_corruption(self):
"""
Make sure that WAL corruption is detected.
And --prefetch-dir is honored.
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={'autovacuum': 'off', 'wal_keep_segments': '200'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
self.backup_node(backup_dir, 'node', node, options=['--stream'])
node.pgbench_init(scale=50)
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(
backup_dir, 'node', replica, replica.data_dir)
self.set_replica(node, replica, log_shipping=True)
if node.major_version >= 12:
self.set_auto_conf(replica, {'restore_command': 'exit 1'})
else:
replica.append_conf('recovery.conf', "restore_command = 'exit 1'")
replica.slow_start(replica=True)
# at this point replica is consistent
restore_command = self.get_restore_command(backup_dir, 'node', replica)
restore_command += ' -j 2 --batch-size=10 --log-level-console=VERBOSE'
#restore_command += ' --batch-size=2 --log-level-console=VERBOSE'
if node.major_version >= 12:
self.set_auto_conf(replica, {'restore_command': restore_command})
else:
replica.append_conf(
'recovery.conf', "restore_command = '{0}'".format(restore_command))
replica.restart()
sleep(5)
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
postgres_log_content = f.read()
self.assertIn(
'pg_probackup archive-get completed successfully, fetched: 10/10',
postgres_log_content)
self.assertIn('used prefetched WAL segment', postgres_log_content)
self.assertIn('prefetch state: 9/10', postgres_log_content)
self.assertIn('prefetch state: 8/10', postgres_log_content)
replica.stop()
# generate WAL, copy it into prefetch directory, then corrupt
# some segment
node.pgbench_init(scale=20)
sleep(10)
# now copy WAL files into prefetch directory and corrupt some of them
archive_dir = os.path.join(backup_dir, 'wal', 'node')
files = os.listdir(archive_dir)
files.sort()
for filename in [files[-4], files[-3], files[-2], files[-1]]:
src_file = os.path.join(archive_dir, filename)
if node.major_version >= 10:
wal_dir = 'pg_wal'
else:
wal_dir = 'pg_xlog'
if filename.endswith('.gz'):
dst_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename[:-3])
with gzip.open(src_file, 'rb') as f_in, open(dst_file, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
else:
dst_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename)
shutil.copyfile(src_file, dst_file)
print(dst_file)
# corrupt file
if files[-2].endswith('.gz'):
filename = files[-2][:-3]
else:
filename = files[-2]
prefetched_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename)
with open(prefetched_file, "rb+", 0) as f:
f.seek(8192*2)
f.write(b"SURIKEN")
f.flush()
f.close
# enable restore_command
restore_command = self.get_restore_command(backup_dir, 'node', replica)
restore_command += ' --batch-size=2 --log-level-console=VERBOSE'
if node.major_version >= 12:
self.set_auto_conf(replica, {'restore_command': restore_command})
else:
replica.append_conf(
'recovery.conf', "restore_command = '{0}'".format(restore_command))
os.remove(os.path.join(replica.logs_dir, 'postgresql.log'))
replica.slow_start(replica=True)
sleep(10)
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
postgres_log_content = f.read()
self.assertIn(
'Prefetched WAL segment {0} is invalid, cannot use it'.format(filename),
postgres_log_content)
self.assertIn(
'LOG: restored log file "{0}" from archive'.format(filename),
postgres_log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
# TODO test with multiple not archived segments.
# TODO corrupted file in archive.
# important - switchpoint may be NullOffset LSN and not actually existing in archive to boot.
# so write WAL validation code accordingly

View File

@ -228,10 +228,9 @@ class BackupTest(ProbackupTest, unittest.TestCase):
"without valid full backup.\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
"ERROR: Valid backup on current timeline 1 is not found. "
"Create new FULL backup before an incremental one.",
e.message,
self.assertTrue(
"WARNING: Valid backup on current timeline 1 is not found" in e.message and
"ERROR: Create new full backup before an incremental one" in e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
@ -2294,10 +2293,9 @@ class BackupTest(ProbackupTest, unittest.TestCase):
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Valid backup on current timeline 1 is not found. '
'Create new FULL backup before an incremental one.',
e.message,
self.assertTrue(
'WARNING: Valid backup on current timeline 1 is not found' in e.message and
'ERROR: Create new full backup before an incremental one' in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
@ -2324,10 +2322,13 @@ class BackupTest(ProbackupTest, unittest.TestCase):
initdb_params=['--data-checksums'],
pg_options={
'archive_timeout': '30s',
'checkpoint_timeout': '1h'})
'archive_mode': 'always',
'checkpoint_timeout': '60s',
'wal_level': 'logical'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_config(backup_dir, 'node', options=['--archive-timeout=60s'])
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
@ -2447,12 +2448,15 @@ class BackupTest(ProbackupTest, unittest.TestCase):
self.restore_node(backup_dir, 'node', replica)
self.set_replica(node, replica)
self.add_instance(backup_dir, 'replica', replica)
self.set_config(
backup_dir, 'replica',
options=['--archive-timeout=120s', '--log-level-console=LOG'])
self.set_archiving(backup_dir, 'replica', replica, replica=True)
self.set_auto_conf(replica, {'hot_standby': 'on'})
# freeze bgwriter to get rid of RUNNING XACTS records
bgwriter_pid = node.auxiliary_pids[ProcessType.BackgroundWriter][0]
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
# bgwriter_pid = node.auxiliary_pids[ProcessType.BackgroundWriter][0]
# gdb_checkpointer = self.gdb_attach(bgwriter_pid)
copy_tree(
os.path.join(backup_dir, 'wal', 'node'),
@ -2460,21 +2464,22 @@ class BackupTest(ProbackupTest, unittest.TestCase):
replica.slow_start(replica=True)
self.switch_wal_segment(node)
self.switch_wal_segment(node)
# self.switch_wal_segment(node)
# self.switch_wal_segment(node)
# FULL backup from replica
self.backup_node(
backup_dir, 'replica', replica,
datname='backupdb', options=['--stream', '-U', 'backup', '--archive-timeout=30s'])
datname='backupdb', options=['-U', 'backup'])
# stream full backup from replica
self.backup_node(
backup_dir, 'replica', replica,
datname='backupdb', options=['--stream', '-U', 'backup'])
# self.switch_wal_segment(node)
self.backup_node(
backup_dir, 'replica', replica, datname='backupdb',
options=['-U', 'backup', '--archive-timeout=300s'])
# PAGE backup from replica
self.switch_wal_segment(node)
self.backup_node(
backup_dir, 'replica', replica, backup_type='page',
datname='backupdb', options=['-U', 'backup', '--archive-timeout=30s'])
@ -2484,20 +2489,22 @@ class BackupTest(ProbackupTest, unittest.TestCase):
datname='backupdb', options=['--stream', '-U', 'backup'])
# DELTA backup from replica
self.switch_wal_segment(node)
self.backup_node(
backup_dir, 'replica', replica, backup_type='delta',
datname='backupdb', options=['-U', 'backup', '--archive-timeout=30s'])
datname='backupdb', options=['-U', 'backup'])
self.backup_node(
backup_dir, 'replica', replica, backup_type='delta',
datname='backupdb', options=['--stream', '-U', 'backup'])
# PTRACK backup from replica
if self.ptrack:
self.switch_wal_segment(node)
self.backup_node(
backup_dir, 'replica', replica, backup_type='delta',
datname='backupdb', options=['-U', 'backup', '--archive-timeout=30s'])
backup_dir, 'replica', replica, backup_type='ptrack',
datname='backupdb', options=['-U', 'backup'])
self.backup_node(
backup_dir, 'replica', replica, backup_type='delta',
backup_dir, 'replica', replica, backup_type='ptrack',
datname='backupdb', options=['--stream', '-U', 'backup'])
# Clean after yourself

View File

@ -339,7 +339,7 @@ class ProbackupTest(object):
options['wal_level'] = 'logical'
options['hot_standby'] = 'off'
options['log_line_prefix'] = '"%t [%p]: [%l-1] "'
options['log_line_prefix'] = '%t [%p]: [%l-1] '
options['log_statement'] = 'none'
options['log_duration'] = 'on'
options['log_min_duration_statement'] = 0
@ -1131,7 +1131,8 @@ class ProbackupTest(object):
def set_archiving(
self, backup_dir, instance, node, replica=False,
overwrite=False, compress=False, old_binary=False):
overwrite=False, compress=False, old_binary=False,
log_level=False, archive_timeout=False):
# parse postgresql.auto.conf
options = {}
@ -1161,12 +1162,26 @@ class ProbackupTest(object):
if overwrite:
options['archive_command'] += '--overwrite '
options['archive_command'] += '--log-level-console=verbose '
options['archive_command'] += '-j 5 '
options['archive_command'] += '--batch-size 10 '
options['archive_command'] += '--no-sync '
if archive_timeout:
options['archive_command'] += '--archive-timeout={0} '.format(
archive_timeout)
if os.name == 'posix':
options['archive_command'] += '--wal-file-path=%p --wal-file-name=%f'
elif os.name == 'nt':
options['archive_command'] += '--wal-file-path="%p" --wal-file-name="%f"'
if log_level:
options['archive_command'] += ' --log-level-console={0}'.format(log_level)
options['archive_command'] += ' --log-level-file={0} '.format(log_level)
self.set_auto_conf(node, options)
def get_restore_command(self, backup_dir, instance, node):
@ -1244,7 +1259,8 @@ class ProbackupTest(object):
def set_replica(
self, master, replica,
replica_name='replica',
synchronous=False
synchronous=False,
log_shipping=False
):
self.set_auto_conf(
@ -1264,19 +1280,22 @@ class ProbackupTest(object):
if os.stat(probackup_recovery_path).st_size > 0:
config = 'probackup_recovery.conf'
self.set_auto_conf(
replica,
{'primary_conninfo': 'user={0} port={1} application_name={2} '
' sslmode=prefer sslcompression=1'.format(
self.user, master.port, replica_name)},
config)
if not log_shipping:
self.set_auto_conf(
replica,
{'primary_conninfo': 'user={0} port={1} application_name={2} '
' sslmode=prefer sslcompression=1'.format(
self.user, master.port, replica_name)},
config)
else:
replica.append_conf('recovery.conf', 'standby_mode = on')
replica.append_conf(
'recovery.conf',
"primary_conninfo = 'user={0} port={1} application_name={2}"
" sslmode=prefer sslcompression=1'".format(
self.user, master.port, replica_name))
if not log_shipping:
replica.append_conf(
'recovery.conf',
"primary_conninfo = 'user={0} port={1} application_name={2}"
" sslmode=prefer sslcompression=1'".format(
self.user, master.port, replica_name))
if synchronous:
self.set_auto_conf(

View File

@ -819,7 +819,7 @@ class PageTest(ProbackupTest, unittest.TestCase):
self.backup_node(backup_dir, 'node', node)
# make some wals
node.pgbench_init(scale=4)
node.pgbench_init(scale=10)
# delete last wal segment
wals_dir = os.path.join(backup_dir, 'wal', 'node')
@ -874,7 +874,6 @@ class PageTest(ProbackupTest, unittest.TestCase):
'INFO: Wait for WAL segment' in e.message and
'to be archived' in e.message and
'Could not read WAL record at' in e.message and
'incorrect resource manager data checksum in record at' in e.message and
'Possible WAL corruption. Error has occured during reading WAL segment' in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
@ -899,7 +898,6 @@ class PageTest(ProbackupTest, unittest.TestCase):
'INFO: Wait for WAL segment' in e.message and
'to be archived' in e.message and
'Could not read WAL record at' in e.message and
'incorrect resource manager data checksum in record at' in e.message and
'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format(
file) in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
@ -942,8 +940,10 @@ class PageTest(ProbackupTest, unittest.TestCase):
self.set_archiving(backup_dir, 'alien_node', alien_node)
alien_node.slow_start()
self.backup_node(backup_dir, 'node', node)
self.backup_node(backup_dir, 'alien_node', alien_node)
self.backup_node(
backup_dir, 'node', node, options=['--stream'])
self.backup_node(
backup_dir, 'alien_node', alien_node, options=['--stream'])
# make some wals
node.safe_psql(
@ -996,8 +996,6 @@ class PageTest(ProbackupTest, unittest.TestCase):
'INFO: Wait for WAL segment' in e.message and
'to be archived' in e.message and
'Could not read WAL record at' in e.message and
'WAL file is from different database system: WAL file database system identifier is' in e.message and
'pg_control database system identifier is' in e.message and
'Possible WAL corruption. Error has occured during reading WAL segment' in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
@ -1181,6 +1179,85 @@ class PageTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
# @unittest.expectedFailure
def test_multi_timeline_page(self):
"""
Check that backup in PAGE mode choose
parent backup correctly:
t12 /---P-->
...
t3 /---->
t2 /---->
t1 -F-----D->
P must have F as parent
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={'autovacuum': 'off'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
node.pgbench_init(scale=50)
full_id = self.backup_node(backup_dir, 'node', node)
pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum'])
pgbench.wait()
self.backup_node(backup_dir, 'node', node, backup_type='delta')
node.cleanup()
self.restore_node(
backup_dir, 'node', node, backup_id=full_id,
options=[
'--recovery-target=immediate',
'--recovery-target-action=promote'])
node.slow_start()
pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum'])
pgbench.wait()
# create timelines
for i in range(2, 12):
node.cleanup()
self.restore_node(
backup_dir, 'node', node, backup_id=full_id,
options=['--recovery-target-timeline={0}'.format(i)])
node.slow_start()
pgbench = node.pgbench(options=['-T', '3', '-c', '1', '--no-vacuum'])
pgbench.wait()
page_id = self.backup_node(
backup_dir, 'node', node, backup_type='page',
options=['--log-level-file=VERBOSE'])
pgdata = self.pgdata_content(node.data_dir)
node.cleanup()
self.restore_node(backup_dir, 'node', node)
pgdata_restored = self.pgdata_content(node.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
show = self.show_archive(backup_dir)
timelines = show[0]['timelines']
# self.assertEqual()
self.assertEqual(
self.show_pb(backup_dir, 'node', page_id)['parent-backup-id'],
full_id)
# Clean after yourself
self.del_test_dir(module_name, fname)
@unittest.skip("skip")
# @unittest.expectedFailure
def test_page_pg_resetxlog(self):

View File

@ -3,10 +3,10 @@ import unittest
from .helpers.ptrack_helpers import ProbackupTest, ProbackupException, idx_ptrack
from datetime import datetime, timedelta
import subprocess
from testgres import QueryException
from testgres import QueryException, StartNodeException
import shutil
import sys
import time
from time import sleep
from threading import Thread
@ -210,46 +210,36 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
"GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;"
)
if self.ptrack:
fnames = []
if node.major_version < 12:
fnames += [
'pg_catalog.oideq(oid, oid)',
'pg_catalog.ptrack_version()',
'pg_catalog.pg_ptrack_clear()',
'pg_catalog.pg_ptrack_control_lsn()',
'pg_catalog.pg_ptrack_get_and_clear_db(oid, oid)',
'pg_catalog.pg_ptrack_get_and_clear(oid, oid)',
'pg_catalog.pg_ptrack_get_block_2(oid, oid, oid, bigint)'
]
else:
# TODO why backup works without these grants ?
# fnames += [
# 'pg_ptrack_get_pagemapset(pg_lsn)',
# 'pg_ptrack_control_lsn()',
# 'pg_ptrack_get_block(oid, oid, oid, bigint)'
# ]
node.safe_psql(
"backupdb",
"CREATE SCHEMA ptrack")
node.safe_psql(
"backupdb",
"CREATE EXTENSION ptrack WITH SCHEMA ptrack")
node.safe_psql(
"backupdb",
"GRANT USAGE ON SCHEMA ptrack TO backup")
if node.major_version < 12:
fnames = [
'pg_catalog.oideq(oid, oid)',
'pg_catalog.ptrack_version()',
'pg_catalog.pg_ptrack_clear()',
'pg_catalog.pg_ptrack_control_lsn()',
'pg_catalog.pg_ptrack_get_and_clear_db(oid, oid)',
'pg_catalog.pg_ptrack_get_and_clear(oid, oid)',
'pg_catalog.pg_ptrack_get_block_2(oid, oid, oid, bigint)'
]
for fname in fnames:
node.safe_psql(
"backupdb",
"GRANT EXECUTE ON FUNCTION {0} TO backup".format(fname))
else:
node.safe_psql(
"backupdb",
"GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup")
"CREATE SCHEMA ptrack")
node.safe_psql(
"backupdb",
"CREATE EXTENSION ptrack WITH SCHEMA ptrack")
node.safe_psql(
"backupdb",
"GRANT USAGE ON SCHEMA ptrack TO backup")
node.safe_psql(
"backupdb",
"GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup")
if ProbackupTest.enterprise:
node.safe_psql(
@ -3848,7 +3838,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
# @unittest.expectedFailure
@unittest.expectedFailure
def test_ptrack_pg_resetxlog(self):
fname = self.id().split('.')[3]
node = self.make_simple_node(
@ -4016,14 +4006,17 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
node.stop(['-m', 'immediate', '-D', node.data_dir])
ptrack_map = os.path.join(node.data_dir, 'global', 'ptrack.map')
ptrack_map_mmap = os.path.join(node.data_dir, 'global', 'ptrack.map.mmap')
# Let`s do index corruption. ptrack.map, ptrack.map.mmap
with open(os.path.join(node.data_dir, 'global', 'ptrack.map'), "rb+", 0) as f:
with open(ptrack_map, "rb+", 0) as f:
f.seek(42)
f.write(b"blablahblahs")
f.flush()
f.close
with open(os.path.join(node.data_dir, 'global', 'ptrack.map.mmap'), "rb+", 0) as f:
with open(ptrack_map_mmap, "rb+", 0) as f:
f.seek(42)
f.write(b"blablahblahs")
f.flush()
@ -4031,13 +4024,97 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
# os.remove(os.path.join(node.logs_dir, node.pg_log_name))
try:
node.slow_start()
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because ptrack.map is corrupted"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except StartNodeException as e:
self.assertIn(
'Cannot start node',
e.message,
'\n Unexpected Error Message: {0}\n'
' CMD: {1}'.format(repr(e.message), self.cmd))
log_file = os.path.join(node.logs_dir, 'postgresql.log')
with open(log_file, 'r') as f:
log_content = f.read()
self.assertIn(
'FATAL: incorrect checksum of file "{0}"'.format(ptrack_map),
log_content)
self.set_auto_conf(node, {'ptrack_map_size': '0'})
node.slow_start()
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='ptrack', options=['--stream'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because instance ptrack is disabled"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Ptrack is disabled',
e.message,
'\n Unexpected Error Message: {0}\n'
' CMD: {1}'.format(repr(e.message), self.cmd))
node.safe_psql(
'postgres',
"update t_heap set id = nextval('t_seq'), text = md5(text), "
"tsvector = md5(repeat(tsvector::text, 10))::tsvector")
node.stop(['-m', 'immediate', '-D', node.data_dir])
self.set_auto_conf(node, {'ptrack_map_size': '32'})
node.slow_start()
sleep(1)
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='ptrack', options=['--stream'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because ptrack map is from future"
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: LSN from ptrack_control',
e.message,
'\n Unexpected Error Message: {0}\n'
' CMD: {1}'.format(repr(e.message), self.cmd))
sleep(1)
self.backup_node(
backup_dir, 'node', node,
backup_type='delta', options=['--stream'])
node.safe_psql(
'postgres',
"update t_heap set id = nextval('t_seq'), text = md5(text), "
"tsvector = md5(repeat(tsvector::text, 10))::tsvector")
self.backup_node(
backup_dir, 'node', node,
backup_type='ptrack', options=['--stream'])
pgdata = self.pgdata_content(node.data_dir)
node.cleanup()
self.restore_node(backup_dir, 'node', node)

View File

@ -571,30 +571,25 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
'Skipped because backup from replica is not supported in PG 9.5')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
self.add_instance(backup_dir, 'node', master)
self.set_archiving(backup_dir, 'node', master)
master.slow_start()
# freeze bgwriter to get rid of RUNNING XACTS records
bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0]
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
self.backup_node(backup_dir, 'master', master)
self.backup_node(backup_dir, 'node', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
self.restore_node(backup_dir, 'node', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
self.set_archiving(backup_dir, 'node', replica, replica=True)
replica.slow_start(replica=True)
@ -602,7 +597,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
self.switch_wal_segment(master)
output = self.backup_node(
backup_dir, 'replica', replica,
backup_dir, 'node', replica, replica.data_dir,
options=[
'--archive-timeout=30',
'--log-level-console=LOG',
@ -611,24 +606,24 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
return_id=False)
self.assertIn(
'LOG: Null offset in stop_backup_lsn value 0/3000000',
'LOG: Null offset in stop_backup_lsn value 0/4000000',
output)
self.assertIn(
'WARNING: WAL segment 000000010000000000000003 could not be streamed in 30 seconds',
'WARNING: WAL segment 000000010000000000000004 could not be streamed in 30 seconds',
output)
self.assertIn(
'WARNING: Failed to get next WAL record after 0/3000000, looking for previous WAL record',
'WARNING: Failed to get next WAL record after 0/4000000, looking for previous WAL record',
output)
self.assertIn(
'LOG: Looking for LSN 0/3000000 in segment: 000000010000000000000002',
'LOG: Looking for LSN 0/4000000 in segment: 000000010000000000000003',
output)
self.assertIn(
'has endpoint 0/3000000 which is '
'equal or greater than requested LSN 0/3000000',
'has endpoint 0/4000000 which is '
'equal or greater than requested LSN 0/4000000',
output)
self.assertIn(
@ -719,19 +714,19 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
log_content = f.read()
self.assertIn(
'LOG: Null offset in stop_backup_lsn value 0/3000000',
'LOG: Null offset in stop_backup_lsn value 0/4000000',
log_content)
self.assertIn(
'LOG: Looking for segment: 000000010000000000000003',
'LOG: Looking for segment: 000000010000000000000004',
log_content)
self.assertIn(
'LOG: First record in WAL segment "000000010000000000000003": 0/3000028',
'LOG: First record in WAL segment "000000010000000000000004": 0/4000028',
log_content)
self.assertIn(
'LOG: current.stop_lsn: 0/3000028',
'LOG: current.stop_lsn: 0/4000028',
log_content)
# Clean after yourself
@ -757,31 +752,26 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
'Skipped because backup from replica is not supported in PG 9.5')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
self.add_instance(backup_dir, 'node', master)
self.set_archiving(backup_dir, 'node', master)
master.slow_start()
self.backup_node(backup_dir, 'master', master)
self.backup_node(backup_dir, 'node', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
self.restore_node(backup_dir, 'node', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
self.set_archiving(backup_dir, 'node', replica, replica=True)
# freeze bgwriter to get rid of RUNNING XACTS records
bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0]
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
replica.slow_start(replica=True)
self.switch_wal_segment(master)
@ -789,7 +779,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
# take backup from replica
output = self.backup_node(
backup_dir, 'replica', replica,
backup_dir, 'node', replica, replica.data_dir,
options=[
'--archive-timeout=30',
'--log-level-console=LOG',
@ -797,24 +787,24 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
return_id=False)
self.assertIn(
'LOG: Null offset in stop_backup_lsn value 0/3000000',
'LOG: Null offset in stop_backup_lsn value 0/4000000',
output)
self.assertIn(
'WARNING: WAL segment 000000010000000000000003 could not be archived in 30 seconds',
'WARNING: WAL segment 000000010000000000000004 could not be archived in 30 seconds',
output)
self.assertIn(
'WARNING: Failed to get next WAL record after 0/3000000, looking for previous WAL record',
'WARNING: Failed to get next WAL record after 0/4000000, looking for previous WAL record',
output)
self.assertIn(
'LOG: Looking for LSN 0/3000000 in segment: 000000010000000000000002',
'LOG: Looking for LSN 0/4000000 in segment: 000000010000000000000003',
output)
self.assertIn(
'has endpoint 0/3000000 which is '
'equal or greater than requested LSN 0/3000000',
'has endpoint 0/4000000 which is '
'equal or greater than requested LSN 0/4000000',
output)
self.assertIn(
@ -846,44 +836,39 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
'Skipped because backup from replica is not supported in PG 9.5')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
self.add_instance(backup_dir, 'node', master)
self.set_archiving(backup_dir, 'node', master)
master.slow_start()
self.backup_node(backup_dir, 'master', master)
self.backup_node(backup_dir, 'node', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
self.restore_node(backup_dir, 'node', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
self.set_archiving(backup_dir, 'node', replica, replica=True)
replica.slow_start(replica=True)
# take backup from replica
self.backup_node(
backup_dir, 'replica', replica,
backup_dir, 'node', replica, replica.data_dir,
options=[
'--archive-timeout=30',
'--log-level-console=verbose',
'--log-level-console=LOG',
'--no-validate'],
return_id=False)
try:
self.backup_node(
backup_dir, 'replica', replica,
backup_dir, 'node', replica, replica.data_dir,
options=[
'--archive-timeout=30',
'--log-level-console=verbose',
'--log-level-console=LOG',
'--no-validate'])
# we should die here because exception is what we expect to happen
self.assertEqual(
@ -893,19 +878,19 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'LOG: Looking for LSN 0/3000060 in segment: 000000010000000000000003',
'LOG: Looking for LSN 0/4000060 in segment: 000000010000000000000004',
e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
self.assertIn(
'INFO: Wait for LSN 0/3000060 in archived WAL segment',
'INFO: Wait for LSN 0/4000060 in archived WAL segment',
e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
self.assertIn(
'ERROR: WAL segment 000000010000000000000003 could not be archived in 30 seconds',
'ERROR: WAL segment 000000010000000000000004 could not be archived in 30 seconds',
e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
@ -1016,7 +1001,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
@unittest.skip("skip")
def test_replica_promote_1(self):
"""
"""
@ -1037,7 +1022,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
# set replica True, so archive_mode 'always' is used.
# set replica True, so archive_mode 'always' is used.
self.set_archiving(backup_dir, 'master', master, replica=True)
master.slow_start()
@ -1091,6 +1076,528 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_promote_2(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
# set replica True, so archive_mode 'always' is used.
self.set_archiving(
backup_dir, 'master', master, replica=True)
master.slow_start()
self.backup_node(backup_dir, 'master', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.set_replica(master, replica)
self.set_auto_conf(replica, {'port': replica.port})
replica.slow_start(replica=True)
master.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,1) i')
self.wait_until_replica_catch_with_master(master, replica)
replica.promote()
replica.safe_psql(
'postgres',
'CHECKPOINT')
# replica.safe_psql(
# 'postgres',
# 'create table t2()')
#
# replica.safe_psql(
# 'postgres',
# 'CHECKPOINT')
self.backup_node(
backup_dir, 'master', replica, data_dir=replica.data_dir,
backup_type='page')
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_promote_3(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
master.slow_start()
self.backup_node(backup_dir, 'master', master, options=['--stream'])
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.set_replica(master, replica)
self.set_auto_conf(replica, {'port': replica.port})
replica.slow_start(replica=True)
master.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(master, replica)
self.add_instance(backup_dir, 'replica', replica)
replica.safe_psql(
'postgres',
'CHECKPOINT')
full_id = self.backup_node(
backup_dir, 'replica',
replica, options=['--stream'])
master.safe_psql(
'postgres',
'CREATE TABLE t2 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(master, replica)
replica.safe_psql(
'postgres',
'CHECKPOINT')
self.backup_node(
backup_dir, 'replica', replica,
backup_type='delta', options=['--stream'])
replica.promote()
replica.safe_psql(
'postgres',
'CHECKPOINT')
# failing, because without archving, it is impossible to
# take multi-timeline backup.
try:
self.backup_node(
backup_dir, 'replica', replica,
backup_type='delta', options=['--stream'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of timeline switch "
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'WARNING: Cannot find valid backup on previous timelines, '
'WAL archive is not available' in e.message and
'ERROR: Create new full backup before an incremental one' in e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_promote_archive_delta(self):
"""
t3 /---D3-->
t2 /------->
t1 --F---D1--D2--
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node1 = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node1'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '30s',
'archive_timeout': '30s',
'autovacuum': 'off'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node1)
self.set_config(
backup_dir, 'node', options=['--archive-timeout=60s'])
self.set_archiving(backup_dir, 'node', node1)
node1.slow_start()
self.backup_node(backup_dir, 'node', node1, options=['--stream'])
# Create replica
node2 = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node2'))
node2.cleanup()
self.restore_node(backup_dir, 'node', node2, node2.data_dir)
# Settings for Replica
self.set_replica(node1, node2)
self.set_auto_conf(node2, {'port': node2.port})
self.set_archiving(backup_dir, 'node', node2, replica=True)
node2.slow_start(replica=True)
node1.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(node1, node2)
node1.safe_psql(
'postgres',
'CREATE TABLE t2 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(node1, node2)
# delta backup on replica on timeline 1
delta1_id = self.backup_node(
backup_dir, 'node', node2, node2.data_dir,
'delta', options=['--stream'])
# delta backup on replica on timeline 1
delta2_id = self.backup_node(
backup_dir, 'node', node2, node2.data_dir, 'delta')
self.change_backup_status(
backup_dir, 'node', delta2_id, 'ERROR')
# node2 is now master
node2.promote()
node2.safe_psql('postgres', 'CHECKPOINT')
node2.safe_psql(
'postgres',
'CREATE TABLE t3 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
# node1 is now replica
node1.cleanup()
# kludge "backup_id=delta1_id"
self.restore_node(
backup_dir, 'node', node1, node1.data_dir,
backup_id=delta1_id,
options=[
'--recovery-target-timeline=2',
'--recovery-target=latest'])
# Settings for Replica
self.set_replica(node2, node1)
self.set_auto_conf(node1, {'port': node1.port})
self.set_archiving(backup_dir, 'node', node1, replica=True)
node1.slow_start(replica=True)
node2.safe_psql(
'postgres',
'CREATE TABLE t4 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,30) i')
self.wait_until_replica_catch_with_master(node2, node1)
# node1 is back to be a master
node1.promote()
node1.safe_psql('postgres', 'CHECKPOINT')
# delta backup on timeline 3
self.backup_node(
backup_dir, 'node', node1, node1.data_dir, 'delta',
options=['--archive-timeout=60'])
pgdata = self.pgdata_content(node1.data_dir)
node1.cleanup()
self.restore_node(backup_dir, 'node', node1, node1.data_dir)
pgdata_restored = self.pgdata_content(node1.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_promote_archive_page(self):
"""
t3 /---P3-->
t2 /------->
t1 --F---P1--P2--
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node1 = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node1'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '30s',
'archive_timeout': '30s',
'autovacuum': 'off'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node1)
self.set_archiving(backup_dir, 'node', node1)
self.set_config(
backup_dir, 'node', options=['--archive-timeout=60s'])
node1.slow_start()
self.backup_node(backup_dir, 'node', node1, options=['--stream'])
# Create replica
node2 = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node2'))
node2.cleanup()
self.restore_node(backup_dir, 'node', node2, node2.data_dir)
# Settings for Replica
self.set_replica(node1, node2)
self.set_auto_conf(node2, {'port': node2.port})
self.set_archiving(backup_dir, 'node', node2, replica=True)
node2.slow_start(replica=True)
node1.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(node1, node2)
node1.safe_psql(
'postgres',
'CREATE TABLE t2 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(node1, node2)
# page backup on replica on timeline 1
page1_id = self.backup_node(
backup_dir, 'node', node2, node2.data_dir,
'page', options=['--stream'])
# page backup on replica on timeline 1
page2_id = self.backup_node(
backup_dir, 'node', node2, node2.data_dir, 'page')
self.change_backup_status(
backup_dir, 'node', page2_id, 'ERROR')
# node2 is now master
node2.promote()
node2.safe_psql('postgres', 'CHECKPOINT')
node2.safe_psql(
'postgres',
'CREATE TABLE t3 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
# node1 is now replica
node1.cleanup()
# kludge "backup_id=page1_id"
self.restore_node(
backup_dir, 'node', node1, node1.data_dir,
backup_id=page1_id,
options=[
'--recovery-target-timeline=2',
'--recovery-target=latest'])
# Settings for Replica
self.set_replica(node2, node1)
self.set_auto_conf(node1, {'port': node1.port})
self.set_archiving(backup_dir, 'node', node1, replica=True)
node1.slow_start(replica=True)
node2.safe_psql(
'postgres',
'CREATE TABLE t4 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,30) i')
self.wait_until_replica_catch_with_master(node2, node1)
# node1 is back to be a master
node1.promote()
node1.safe_psql('postgres', 'CHECKPOINT')
# delta3_id = self.backup_node(
# backup_dir, 'node', node2, node2.data_dir, 'delta')
# page backup on timeline 3
page3_id = self.backup_node(
backup_dir, 'node', node1, node1.data_dir, 'page',
options=['--archive-timeout=60'])
pgdata = self.pgdata_content(node1.data_dir)
node1.cleanup()
self.restore_node(backup_dir, 'node', node1, node1.data_dir)
pgdata_restored = self.pgdata_content(node1.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_parent_choosing(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
master.slow_start()
self.backup_node(backup_dir, 'master', master, options=['--stream'])
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.set_replica(master, replica)
self.set_auto_conf(replica, {'port': replica.port})
replica.slow_start(replica=True)
master.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(master, replica)
self.add_instance(backup_dir, 'replica', replica)
full_id = self.backup_node(
backup_dir, 'replica',
replica, options=['--stream'])
master.safe_psql(
'postgres',
'CREATE TABLE t2 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,20) i')
self.wait_until_replica_catch_with_master(master, replica)
self.backup_node(
backup_dir, 'replica', replica,
backup_type='delta', options=['--stream'])
replica.promote()
replica.safe_psql('postgres', 'CHECKPOINT')
# failing, because without archving, it is impossible to
# take multi-timeline backup.
try:
self.backup_node(
backup_dir, 'replica', replica,
backup_type='delta', options=['--stream'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of timeline switch "
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'WARNING: Cannot find valid backup on previous timelines, '
'WAL archive is not available' in e.message and
'ERROR: Create new full backup before an incremental one' in e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_instance_from_the_past(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
full_id = self.backup_node(backup_dir, 'node', node, options=['--stream'])
node.pgbench_init(scale=10)
self.backup_node(backup_dir, 'node', node, options=['--stream'])
node.cleanup()
self.restore_node(backup_dir, 'node', node, backup_id=full_id)
node.slow_start()
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='delta', options=['--stream'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because instance is from the past "
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'ERROR: Current START LSN' in e.message and
'is lower than START LSN' in e.message and
'It may indicate that we are trying to backup '
'PostgreSQL instance from the past' in e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# TODO:
# null offset STOP LSN and latest record in previous segment is conrecord (manual only)

View File

@ -1712,10 +1712,9 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
"without valid full backup.\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
"ERROR: Valid backup on current timeline 1 is not found. "
"Create new FULL backup before an incremental one.",
e.message,
self.assertTrue(
"WARNING: Valid backup on current timeline 1 is not found" in e.message and
"ERROR: Create new full backup before an incremental one" in e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
@ -2675,7 +2674,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
self.assertIn(
'LOG: Archive backup {0} to stay consistent protect from '
'purge WAL interval between 000000010000000000000004 '
'and 000000010000000000000004 on timeline 1'.format(B1), output)
'and 000000010000000000000005 on timeline 1'.format(B1), output)
start_lsn_B4 = self.show_pb(backup_dir, 'node', B4)['start-lsn']
self.assertIn(
@ -2684,13 +2683,13 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
self.assertIn(
'LOG: Timeline 3 to stay reachable from timeline 1 protect '
'from purge WAL interval between 000000020000000000000005 and '
'000000020000000000000008 on timeline 2', output)
'from purge WAL interval between 000000020000000000000006 and '
'000000020000000000000009 on timeline 2', output)
self.assertIn(
'LOG: Timeline 3 to stay reachable from timeline 1 protect '
'from purge WAL interval between 000000010000000000000004 and '
'000000010000000000000005 on timeline 1', output)
'000000010000000000000006 on timeline 1', output)
show_tli1_before = self.show_archive(backup_dir, 'node', tli=1)
show_tli2_before = self.show_archive(backup_dir, 'node', tli=2)
@ -2745,19 +2744,19 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
self.assertEqual(
show_tli1_after['lost-segments'][0]['begin-segno'],
'000000010000000000000006')
'000000010000000000000007')
self.assertEqual(
show_tli1_after['lost-segments'][0]['end-segno'],
'000000010000000000000009')
'00000001000000000000000A')
self.assertEqual(
show_tli2_after['lost-segments'][0]['begin-segno'],
'000000020000000000000009')
'00000002000000000000000A')
self.assertEqual(
show_tli2_after['lost-segments'][0]['end-segno'],
'000000020000000000000009')
'00000002000000000000000A')
self.validate_pb(backup_dir, 'node')

View File

@ -1786,7 +1786,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
self.assertTrue(
'LOG: archive command failed with exit code 1' in log_content and
'DETAIL: The failed archive command was:' in log_content and
'INFO: pg_probackup archive-push from' in log_content,
'WAL file already exists in archive with different checksum' in log_content,
'Expecting error messages about failed archive_command'
)
self.assertFalse(

24
travis/Dockerfile.in Normal file
View File

@ -0,0 +1,24 @@
FROM ololobus/postgres-dev:stretch
USER root
RUN apt-get update
RUN apt-get -yq install python python-pip python-virtualenv
# Environment
ENV PG_MAJOR=${PG_VERSION} PG_BRANCH=${PG_BRANCH}
ENV LANG=C.UTF-8 PGHOME=/pg/testdir/pgbin
# Make directories
RUN mkdir -p /pg/testdir
COPY run_tests.sh /run.sh
RUN chmod 755 /run.sh
COPY . /pg/testdir
WORKDIR /pg/testdir
# Grant privileges
RUN chown -R postgres:postgres /pg/testdir
USER postgres
ENTRYPOINT MODE=${MODE} /run.sh

View File

@ -0,0 +1,2 @@
tests:
build: .

25
travis/make_dockerfile.sh Executable file
View File

@ -0,0 +1,25 @@
#!/usr/bin/env sh
if [ -z ${PG_VERSION+x} ]; then
echo PG_VERSION is not set!
exit 1
fi
if [ -z ${PG_BRANCH+x} ]; then
echo PG_BRANCH is not set!
exit 1
fi
if [ -z ${MODE+x} ]; then
MODE=basic
fi
echo PG_VERSION=${PG_VERSION}
echo PG_BRANCH=${PG_BRANCH}
echo MODE=${MODE}
sed \
-e 's/${PG_VERSION}/'${PG_VERSION}/g \
-e 's/${PG_BRANCH}/'${PG_BRANCH}/g \
-e 's/${MODE}/'${MODE}/g \
Dockerfile.in > Dockerfile

80
travis/run_tests.sh Executable file
View File

@ -0,0 +1,80 @@
#!/usr/bin/env bash
#
# Copyright (c) 2019-2020, Postgres Professional
#
PG_SRC=$PWD/postgres
# # Here PG_VERSION is provided by postgres:X-alpine docker image
# curl "https://ftp.postgresql.org/pub/source/v$PG_VERSION/postgresql-$PG_VERSION.tar.bz2" -o postgresql.tar.bz2
# echo "$PG_SHA256 *postgresql.tar.bz2" | sha256sum -c -
# mkdir $PG_SRC
# tar \
# --extract \
# --file postgresql.tar.bz2 \
# --directory $PG_SRC \
# --strip-components 1
# Clone Postgres
echo "############### Getting Postgres sources:"
git clone https://github.com/postgres/postgres.git -b $PG_BRANCH --depth=1
# Compile and install Postgres
echo "############### Compiling Postgres:"
cd postgres # Go to postgres dir
./configure --prefix=$PGHOME --enable-debug --enable-cassert --enable-depend --enable-tap-tests
make -s -j$(nproc) install
make -s -j$(nproc) -C contrib/ install
# Override default Postgres instance
export PATH=$PGHOME/bin:$PATH
export LD_LIBRARY_PATH=$PGHOME/lib
export PG_CONFIG=$(which pg_config)
# Get amcheck if missing
if [ ! -d "contrib/amcheck" ]; then
echo "############### Getting missing amcheck:"
git clone https://github.com/petergeoghegan/amcheck.git --depth=1 contrib/amcheck
make USE_PGXS=1 -C contrib/amcheck install
fi
# Get back to testdir
cd ..
# Show pg_config path (just in case)
echo "############### pg_config path:"
which pg_config
# Show pg_config just in case
echo "############### pg_config:"
pg_config
# Build and install pg_probackup (using PG_CPPFLAGS and SHLIB_LINK for gcov)
echo "############### Compiling and installing pg_probackup:"
# make USE_PGXS=1 PG_CPPFLAGS="-coverage" SHLIB_LINK="-coverage" top_srcdir=$CUSTOM_PG_SRC install
make USE_PGXS=1 top_srcdir=$PG_SRC install
# Setup python environment
echo "############### Setting up python env:"
virtualenv pyenv
source pyenv/bin/activate
pip install testgres==1.8.2
echo "############### Testing:"
if [ "$MODE" = "basic" ]; then
export PG_PROBACKUP_TEST_BASIC=ON
python -m unittest -v tests
python -m unittest -v tests.init
else
python -m unittest -v tests.$MODE
fi
# Generate *.gcov files
# gcov src/*.c src/*.h
# Send coverage stats to Codecov
# bash <(curl -s https://codecov.io/bash)