mirror of
https://github.com/postgrespro/pg_probackup.git
synced 2024-11-28 09:33:54 +02:00
Merge branch 'master' into issue_120
This commit is contained in:
commit
3429ef149e
11
.gitignore
vendored
11
.gitignore
vendored
@ -47,3 +47,14 @@
|
||||
|
||||
# Doc files
|
||||
/doc/*html
|
||||
|
||||
# Docker files
|
||||
/docker-compose.yml
|
||||
/Dockerfile
|
||||
/Dockerfile.in
|
||||
/run_tests.sh
|
||||
/make_dockerfile.sh
|
||||
/backup_restore.sh
|
||||
|
||||
# Misc
|
||||
.python-version
|
||||
|
46
.travis.yml
46
.travis.yml
@ -1,7 +1,47 @@
|
||||
sudo: required
|
||||
os: linux
|
||||
|
||||
dist: bionic
|
||||
|
||||
language: c
|
||||
|
||||
services:
|
||||
- docker
|
||||
- docker
|
||||
|
||||
before_install:
|
||||
- cp travis/* .
|
||||
|
||||
install:
|
||||
- ./make_dockerfile.sh
|
||||
- docker-compose build
|
||||
|
||||
script:
|
||||
- docker run -v $(pwd):/tests --rm centos:7 /tests/travis/backup_restore.sh
|
||||
- docker-compose run tests
|
||||
# - docker-compose run $(bash <(curl -s https://codecov.io/env)) tests
|
||||
# - docker run -v $(pwd):/tests --rm centos:7 /tests/travis/backup_restore.sh
|
||||
|
||||
notifications:
|
||||
email:
|
||||
on_success: change
|
||||
on_failure: always
|
||||
|
||||
# Default MODE is basic, i.e. all tests with PG_PROBACKUP_TEST_BASIC=ON
|
||||
env:
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=archive
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=backup
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=compression
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=delta
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=locking
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=merge
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=page
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=replica
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=retention
|
||||
- PG_VERSION=12 PG_BRANCH=REL_12_STABLE MODE=restore
|
||||
- PG_VERSION=11 PG_BRANCH=REL_11_STABLE
|
||||
- PG_VERSION=10 PG_BRANCH=REL_10_STABLE
|
||||
- PG_VERSION=9.6 PG_BRANCH=REL9_6_STABLE
|
||||
- PG_VERSION=9.5 PG_BRANCH=REL9_5_STABLE
|
||||
|
||||
jobs:
|
||||
allow_failures:
|
||||
- if: env(MODE) IN (archive, backup, delta, locking, merge, replica, retention, restore)
|
||||
|
6
Makefile
6
Makefile
@ -15,9 +15,9 @@ OBJS += src/pg_crc.o src/datapagemap.o src/receivelog.o src/streamutil.o \
|
||||
|
||||
EXTRA_CLEAN = src/pg_crc.c src/datapagemap.c src/datapagemap.h \
|
||||
src/receivelog.c src/receivelog.h src/streamutil.c src/streamutil.h \
|
||||
src/xlogreader.c
|
||||
src/xlogreader.c src/instr_time.h
|
||||
|
||||
INCLUDES = src/datapagemap.h src/streamutil.h src/receivelog.h
|
||||
INCLUDES = src/datapagemap.h src/streamutil.h src/receivelog.h src/instr_time.h
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
@ -60,6 +60,8 @@ all: checksrcdir $(INCLUDES);
|
||||
|
||||
$(PROGRAM): $(OBJS)
|
||||
|
||||
src/instr_time.h: $(top_srcdir)/src/include/portability/instr_time.h
|
||||
rm -f $@ && $(LN_S) $(srchome)/src/include/portability/instr_time.h $@
|
||||
src/datapagemap.c: $(top_srcdir)/src/bin/pg_rewind/datapagemap.c
|
||||
rm -f $@ && $(LN_S) $(srchome)/src/bin/pg_rewind/datapagemap.c $@
|
||||
src/datapagemap.h: $(top_srcdir)/src/bin/pg_rewind/datapagemap.h
|
||||
|
@ -1,3 +1,5 @@
|
||||
[![Build Status](https://travis-ci.com/postgrespro/pg_probackup.svg?branch=master)](https://travis-ci.com/postgrespro/pg_probackup)
|
||||
|
||||
# pg_probackup
|
||||
|
||||
`pg_probackup` is a utility to manage backup and recovery of PostgreSQL database clusters. It is designed to perform periodic backups of the PostgreSQL instance that enable you to restore the server in case of a failure.
|
||||
@ -38,8 +40,9 @@ Regardless of the chosen backup type, all backups taken with `pg_probackup` supp
|
||||
|
||||
`PTRACK` backup support provided via following options:
|
||||
* vanilla PostgreSQL compiled with ptrack patch. Currently there are patches for [PostgreSQL 9.6](https://gist.githubusercontent.com/gsmol/5b615c971dfd461c76ef41a118ff4d97/raw/e471251983f14e980041f43bea7709b8246f4178/ptrack_9.6.6_v1.5.patch) and [PostgreSQL 10](https://gist.githubusercontent.com/gsmol/be8ee2a132b88463821021fd910d960e/raw/de24f9499f4f314a4a3e5fae5ed4edb945964df8/ptrack_10.1_v1.5.patch)
|
||||
* Postgres Pro Standard 9.6, 10, 11
|
||||
* Postgres Pro Enterprise 9.6, 10, 11
|
||||
* vanilla PostgreSQL 12 with [ptrack extension](https://github.com/postgrespro/ptrack)
|
||||
* Postgres Pro Standard 9.6, 10, 11, 12
|
||||
* Postgres Pro Enterprise 9.6, 10, 11, 12
|
||||
|
||||
## Limitations
|
||||
|
||||
|
@ -131,7 +131,6 @@ doc/src/sgml/pgprobackup.sgml
|
||||
<arg choice="plain"><option>archive-push</option></arg>
|
||||
<arg choice="plain"><option>-B</option> <replaceable>backup_dir</replaceable></arg>
|
||||
<arg choice="plain"><option>--instance</option> <replaceable>instance_name</replaceable></arg>
|
||||
<arg choice="plain"><option>--wal-file-path</option> <replaceable>wal_file_path</replaceable></arg>
|
||||
<arg choice="plain"><option>--wal-file-name</option> <replaceable>wal_file_name</replaceable></arg>
|
||||
<arg rep="repeat"><replaceable>option</replaceable></arg>
|
||||
</cmdsynopsis>
|
||||
@ -427,14 +426,6 @@ doc/src/sgml/pgprobackup.sgml
|
||||
or <application>libc</application>/<application>libicu</application> versions.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
All backups in the incremental chain must belong to the same
|
||||
timeline. For example, if you have taken incremental backups on a
|
||||
standby server that gets promoted, you have to take another FULL
|
||||
backup.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect2>
|
||||
@ -754,9 +745,10 @@ ALTER ROLE backup WITH REPLICATION;
|
||||
<title>Setting up Continuous WAL Archiving</title>
|
||||
<para>
|
||||
Making backups in PAGE backup mode, performing
|
||||
<link linkend="pbk-performing-point-in-time-pitr-recovery">PITR</link>
|
||||
and making backups with
|
||||
<link linkend="pbk-archive-mode">ARCHIVE</link> WAL delivery mode
|
||||
<link linkend="pbk-performing-point-in-time-pitr-recovery">PITR</link>,
|
||||
making backups with
|
||||
<link linkend="pbk-archive-mode">ARCHIVE</link> WAL delivery mode and
|
||||
running incremental backup after timeline switch
|
||||
require
|
||||
<ulink url="https://postgrespro.com/docs/postgresql/current/continuous-archiving.html">continuous
|
||||
WAL archiving</ulink> to be enabled. To set up continuous
|
||||
@ -786,7 +778,7 @@ ALTER ROLE backup WITH REPLICATION;
|
||||
parameter, as follows:
|
||||
</para>
|
||||
<programlisting>
|
||||
archive_command = '<replaceable>install_dir</replaceable>/pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --wal-file-path=%p --wal-file-name=%f [<replaceable>remote_options</replaceable>]'
|
||||
archive_command = '<replaceable>install_dir</replaceable>/pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --wal-file-name=%f [<replaceable>remote_options</replaceable>]'
|
||||
</programlisting>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
@ -1483,7 +1475,7 @@ pg_probackup checkdb [-B <replaceable>backup_dir</replaceable> [--instance <repl
|
||||
enough to specify the backup instance of this cluster for
|
||||
<application>pg_probackup</application> to determine the required
|
||||
connection options. However, if <literal>-B</literal> and
|
||||
<literal>--instance</literal> options are ommitted, you have to provide
|
||||
<literal>--instance</literal> options are omitted, you have to provide
|
||||
<link linkend="pbk-connection-opts">connection options</link> and
|
||||
<replaceable>data_dir</replaceable> via environment
|
||||
variables or command-line options.
|
||||
@ -2247,7 +2239,7 @@ BACKUP INSTANCE 'node'
|
||||
<para>
|
||||
<literal>MERGED</literal> — the backup data files were
|
||||
successfully merged, but its metadata is in the process
|
||||
of been updated. Only full backup can have this status.
|
||||
of being updated. Only full backups can have this status.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
@ -2372,7 +2364,8 @@ primary_conninfo = 'user=backup passfile=/var/lib/pgsql/.pgpass port=5432 sslmod
|
||||
<listitem>
|
||||
<para>
|
||||
<literal>expire-time</literal> — the point in time
|
||||
when a pinned backup can be removed by retention purge.
|
||||
when a pinned backup can be removed in accordance with retention
|
||||
policy. This attribute is only available for pinned backups.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
@ -2816,17 +2809,19 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> [--instance <replacea
|
||||
<refsect2 id="pbk-configuring-retention-policy">
|
||||
<title>Configuring Retention Policy</title>
|
||||
<para>
|
||||
With <application>pg_probackup</application>, you can set retention policies for backups
|
||||
and WAL archive. All policies can be combined together in any
|
||||
way.
|
||||
With <application>pg_probackup</application>, you can configure
|
||||
retention policy to remove redundant backups, clean up unneeded
|
||||
WAL files, as well as pin specific backups to ensure they are
|
||||
kept for the specified time, as explained in the sections below.
|
||||
All these actions can be combined together in any way.
|
||||
</para>
|
||||
|
||||
<refsect3 id="pbk-retention-policy">
|
||||
<title>Backup Retention Policy</title>
|
||||
<title>Removing Redundant Backups</title>
|
||||
<para>
|
||||
By default, all backup copies created with <application>pg_probackup</application> are
|
||||
stored in the specified backup catalog. To save disk space,
|
||||
you can configure retention policy and periodically clean up
|
||||
redundant backup copies accordingly.
|
||||
you can configure retention policy to remove redundant backup copies.
|
||||
</para>
|
||||
<para>
|
||||
To configure retention policy, set one or more of the
|
||||
@ -2849,56 +2844,51 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> [--instance <replacea
|
||||
<emphasis role="strong">the number of days</emphasis> from the
|
||||
current moment. For example, if
|
||||
<literal>retention-window=7</literal>, <application>pg_probackup</application> must
|
||||
delete all backup copies that are older than seven days, with
|
||||
all the corresponding WAL files.
|
||||
keep at least one backup copy that is older than seven days, with
|
||||
all the corresponding WAL files, and all the backups that follow.
|
||||
</para>
|
||||
<para>
|
||||
If both <option>--retention-redundancy</option> and
|
||||
<option>--retention-window</option> options are set,
|
||||
<application>pg_probackup</application> keeps backup copies that satisfy at least one
|
||||
condition. For example, if you set
|
||||
<literal>--retention-redundancy=2</literal> and
|
||||
<literal>--retention-window=7</literal>, <application>pg_probackup</application> purges
|
||||
the backup catalog to keep only two full backup copies and all
|
||||
backups that are newer than seven days:
|
||||
<option>--retention-window</option> options are set, both these
|
||||
conditions have to be taken into account when purging the backup
|
||||
catalog. For example, if you set <literal>--retention-redundancy=2</literal>
|
||||
and <literal>--retention-window=7</literal>,
|
||||
<application>pg_probackup</application> has to keep two full backup
|
||||
copies, as well as all the backups required to ensure recoverability
|
||||
for the last seven days:
|
||||
</para>
|
||||
<programlisting>
|
||||
pg_probackup set-config -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --retention-redundancy=2 --retention-window=7
|
||||
</programlisting>
|
||||
|
||||
<para>
|
||||
To clean up the backup catalog in accordance with retention
|
||||
policy, run:
|
||||
To clean up the backup catalog in accordance with retention policy,
|
||||
you have to run the <xref linkend="pbk-delete"/> command with
|
||||
<link linkend="pbk-retention-opts">retention flags</link>, as shown
|
||||
below, or use the <xref linkend="pbk-backup"/> command with
|
||||
these flags to process the outdated backup copies right when the new
|
||||
backup is created.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For example, to remove all backup copies that no longer satisfy the
|
||||
defined retention policy, run the following command with the
|
||||
<literal>--delete-expired</literal> flag:
|
||||
</para>
|
||||
<programlisting>
|
||||
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --delete-expired
|
||||
</programlisting>
|
||||
<para>
|
||||
<application>pg_probackup</application> deletes all backup copies that do not conform to
|
||||
the defined retention policy.
|
||||
</para>
|
||||
<para>
|
||||
If you would like to also remove the WAL files that are no
|
||||
longer required for any of the backups, add the
|
||||
longer required for any of the backups, you should also specify the
|
||||
<option>--delete-wal</option> flag:
|
||||
</para>
|
||||
<programlisting>
|
||||
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --delete-expired --delete-wal
|
||||
</programlisting>
|
||||
<note>
|
||||
<para>
|
||||
Alternatively, you can use the
|
||||
<option>--delete-expired</option>,
|
||||
<option>--merge-expired</option>,
|
||||
<option>--delete-wal</option> flags and the
|
||||
<option>--retention-window</option> and
|
||||
<option>--retention-redundancy</option> options together
|
||||
with the <xref linkend="pbk-backup"/> command to
|
||||
remove and merge the outdated backup copies once the new
|
||||
backup is created.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
You can set or override the current retention policy by
|
||||
You can also set or override the current retention policy by
|
||||
specifying <option>--retention-redundancy</option> and
|
||||
<option>--retention-window</option> options directly when
|
||||
running <command>delete</command> or <command>backup</command>
|
||||
@ -2919,6 +2909,7 @@ pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replace
|
||||
<xref linkend="pbk-backup"/> or
|
||||
<xref linkend="pbk-delete"/> commands.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Suppose you have backed up the <replaceable>node</replaceable>
|
||||
instance in the <replaceable>backup_dir</replaceable> directory,
|
||||
@ -2971,9 +2962,10 @@ BACKUP INSTANCE 'node'
|
||||
The <literal>Time</literal> field for the merged backup displays the time
|
||||
required for the merge.
|
||||
</para>
|
||||
|
||||
</refsect3>
|
||||
<refsect3 id="pbk-backup-pinning">
|
||||
<title>Backup Pinning</title>
|
||||
<title>Pinning Backups</title>
|
||||
<para>
|
||||
If you need to keep certain backups longer than the
|
||||
established retention policy allows, you can pin them
|
||||
@ -3012,8 +3004,8 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> --instance <replaceab
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
If the backup is pinned, the <literal>expire-time</literal>
|
||||
attribute displays its expiration time:
|
||||
If the backup is pinned, it has the <literal>expire-time</literal>
|
||||
attribute that displays its expiration time:
|
||||
<programlisting>
|
||||
...
|
||||
recovery-time = '2017-05-16 12:57:31'
|
||||
@ -3023,34 +3015,65 @@ data-bytes = 22288792
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Only pinned backups have the <literal>expire-time</literal>
|
||||
attribute in the backup metadata.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
A pinned incremental backup implicitly pins all
|
||||
its parent backups.
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
You can unpin the backup by setting the
|
||||
<option>--ttl</option> option to zero using the
|
||||
<xref linkend="pbk-set-backup"/> command. For example:
|
||||
You can unpin the backup by setting the <option>--ttl</option> option to zero:
|
||||
</para>
|
||||
<programlisting>
|
||||
pg_probackup set-backup -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> -i <replaceable>backup_id</replaceable> --ttl=0
|
||||
</programlisting>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
A pinned incremental backup implicitly pins all
|
||||
its parent backups. If you unpin such a backup later,
|
||||
its implicitly pinned parents will also be automatically unpinned.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect3>
|
||||
<refsect3 id="pbk-wal-archive-retention-policy">
|
||||
<title>WAL Archive Retention Policy</title>
|
||||
<title>Configuring WAL Archive Retention Policy</title>
|
||||
<para>
|
||||
By default, <application>pg_probackup</application> purges
|
||||
only redundant WAL segments that cannot be applied to any of the
|
||||
backups in the backup catalog. To save disk space,
|
||||
you can configure WAL archive retention policy, which allows to
|
||||
keep WAL of limited depth measured in backups per timeline.
|
||||
When <link linkend="pbk-setting-up-continuous-wal-archiving">continuous
|
||||
WAL archiving</link> is enabled, archived WAL segments can take a lot
|
||||
of disk space. Even if you delete old backup copies from time to time,
|
||||
the <literal>--delete-wal</literal> flag can
|
||||
purge only those WAL segments that do not apply to any of the
|
||||
remaining backups in the backup catalog. However, if point-in-time
|
||||
recovery is critical only for the most recent backups, you can
|
||||
configure WAL archive retention policy to keep WAL archive of
|
||||
limited depth and win back some more disk space.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To configure WAL archive retention policy, you have to run the
|
||||
<xref linkend="pbk-set-config"/> command with the
|
||||
<literal>--wal-depth</literal> option that specifies the number
|
||||
of backups that can be used for PITR.
|
||||
This setting applies to all the timelines, so you should be able to perform
|
||||
PITR for the same number of backups on each timeline, if available.
|
||||
<link linkend="pbk-backup-pinning">Pinned backups</link> are
|
||||
not included into this count: if one of the latest backups
|
||||
is pinned, <application>pg_probackup</application> ensures that
|
||||
PITR is possible for one extra backup.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To remove WAL segments that do not satisfy the defined WAL archive
|
||||
retention policy, you simply have to run the <xref linkend="pbk-delete"/>
|
||||
or <xref linkend="pbk-backup"/> command with the <literal>--delete-wal</literal>
|
||||
flag. For archive backups, WAL segments between <literal>Start LSN</literal>
|
||||
and <literal>Stop LSN</literal> are always kept intact, so such backups
|
||||
remain valid regardless of the <literal>--wal-depth</literal> setting
|
||||
and can still be restored, if required.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
You can also use the <option>--wal-depth</option> option
|
||||
with the <xref linkend="pbk-delete"/> and <xref linkend="pbk-backup"/>
|
||||
commands to override the previously defined WAL archive retention
|
||||
policy and purge old WAL segments on the fly.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Suppose you have backed up the <literal>node</literal>
|
||||
instance in the <replaceable>backup_dir</replaceable> directory and
|
||||
@ -3104,8 +3127,8 @@ ARCHIVE INSTANCE 'node'
|
||||
</programlisting>
|
||||
<para>
|
||||
If you would like, for example, to keep only those WAL
|
||||
segments that can be applied to the last valid backup, use the
|
||||
<option>--wal-depth</option> option:
|
||||
segments that can be applied to the latest valid backup, set the
|
||||
<option>--wal-depth</option> option to 1:
|
||||
</para>
|
||||
<programlisting>
|
||||
pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance node --delete-wal --wal-depth=1
|
||||
@ -3131,12 +3154,6 @@ ARCHIVE INSTANCE 'node'
|
||||
===============================================================================================================================
|
||||
1 0 0/0 000000010000000000000048 000000010000000000000049 1 72kB 228.00 7 OK
|
||||
</programlisting>
|
||||
<note>
|
||||
<para>
|
||||
<link linkend="pbk-backup-pinning">Pinned backups</link> are
|
||||
ignored for the purpose of WAL Archive Retention Policy fulfilment.
|
||||
</para>
|
||||
</note>
|
||||
</refsect3>
|
||||
</refsect2>
|
||||
<refsect2 id="pbk-merging-backups">
|
||||
@ -3152,16 +3169,16 @@ ARCHIVE INSTANCE 'node'
|
||||
pg_probackup merge -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> -i <replaceable>backup_id</replaceable>
|
||||
</programlisting>
|
||||
<para>
|
||||
This command merges the specified incremental backup to its
|
||||
parent full backup, together with all incremental backups
|
||||
between them. If the specified backup ID belong to the full backup,
|
||||
then it will be merged with the closest incremental backup.
|
||||
Once the merge is complete, the incremental
|
||||
backups are removed as redundant. Thus, the merge operation is
|
||||
virtually equivalent to retaking a full backup and removing all
|
||||
the outdated backups, but it allows to save much time,
|
||||
especially for large data volumes, as well as I/O and network traffic
|
||||
if you are using <application>pg_probackup</application> in the
|
||||
This command merges backups that belong to a common incremental backup
|
||||
chain. If you specify a full backup, it will be merged with its first
|
||||
incremental backup. If you specify an incremental backup, it will be
|
||||
merged to its parent full backup, together with all incremental backups
|
||||
between them. Once the merge is complete, the full backup takes in all
|
||||
the merged data, and the incremental backups are removed as redundant.
|
||||
Thus, the merge operation is virtually equivalent to retaking a full
|
||||
backup and removing all the outdated backups, but it allows to save much
|
||||
time, especially for large data volumes, as well as I/O and network
|
||||
traffic if you are using <application>pg_probackup</application> in the
|
||||
<link linkend="pbk-remote-backup">remote</link> mode.
|
||||
</para>
|
||||
<para>
|
||||
@ -3175,8 +3192,10 @@ pg_probackup show -B <replaceable>backup_dir</replaceable> --instance <replaceab
|
||||
</programlisting>
|
||||
<para>
|
||||
If the merge is still in progress, the backup status is
|
||||
displayed as <literal>MERGING</literal> or, at the final stage,
|
||||
<literal>MERGED</literal>. The merge is idempotent, so you can
|
||||
displayed as <literal>MERGING</literal>. For full backups,
|
||||
it can also be shown as <literal>MERGED</literal> while the
|
||||
metadata is being updated at the final stage of the merge.
|
||||
The merge is idempotent, so you can
|
||||
restart the merge if it was interrupted.
|
||||
</para>
|
||||
</refsect2>
|
||||
@ -3581,9 +3600,11 @@ pg_probackup backup -B <replaceable>backup_dir</replaceable> -b <replaceable>bac
|
||||
<listitem>
|
||||
<para>
|
||||
Do not sync backed up files to disk. You can use this flag to speed
|
||||
up backup process. Using this flag can result in data
|
||||
up the backup process. Using this flag can result in data
|
||||
corruption in case of operating system or hardware crash.
|
||||
Corruption can be detected by backup validation.
|
||||
If you use this option, it is recommended to run the
|
||||
<xref linkend="pbk-validate"/> command once the backup is complete
|
||||
to detect possible issues.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -3617,7 +3638,7 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
|
||||
[--force] [--no-sync]
|
||||
[--restore-command=<replaceable>cmdline</replaceable>]
|
||||
[--primary-conninfo=<replaceable>primary_conninfo</replaceable>]
|
||||
[-S | --primary-slot-name=<replaceable>slotname</replaceable>]
|
||||
[-S | --primary-slot-name=<replaceable>slot_name</replaceable>]
|
||||
[<replaceable>recovery_target_options</replaceable>] [<replaceable>logging_options</replaceable>] [<replaceable>remote_options</replaceable>]
|
||||
[<replaceable>partial_restore_options</replaceable>] [<replaceable>remote_wal_archive_options</replaceable>]
|
||||
</programlisting>
|
||||
@ -3662,7 +3683,7 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
|
||||
Sets the
|
||||
<ulink url="https://postgrespro.com/docs/postgresql/current/runtime-config-replication.html#GUC-PRIMARY-CONNINFO">primary_conninfo</ulink>
|
||||
parameter to the specified value.
|
||||
This option will be ignored unless the <option>-R</option> flag if specified.
|
||||
This option will be ignored unless the <option>-R</option> flag is specified.
|
||||
</para>
|
||||
<para>
|
||||
Example: <literal>--primary-conninfo='host=192.168.1.50 port=5432 user=foo password=foopass'</literal>
|
||||
@ -3676,9 +3697,9 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
|
||||
<listitem>
|
||||
<para>
|
||||
Sets the
|
||||
<ulink url="https://postgrespro.com/docs/postgresql/current/runtime-config-replication#GUC-PRIMARY-SLOT-NAME">primary_slot_name</ulink>
|
||||
<ulink url="https://postgrespro.com/docs/postgresql/current/runtime-config-replication.html#GUC-PRIMARY-SLOT-NAME">primary_slot_name</ulink>
|
||||
parameter to the specified value.
|
||||
This option will be ignored unless the <option>-R</option> flag if specified.
|
||||
This option will be ignored unless the <option>-R</option> flag is specified.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -3775,6 +3796,8 @@ pg_probackup restore -B <replaceable>backup_dir</replaceable> --instance <replac
|
||||
Do not sync restored files to disk. You can use this flag to speed
|
||||
up restore process. Using this flag can result in data
|
||||
corruption in case of operating system or hardware crash.
|
||||
If it happens, you have to run the <xref linkend="pbk-restore"/>
|
||||
command again.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -3902,12 +3925,12 @@ pg_probackup merge -B <replaceable>backup_dir</replaceable> --instance <replacea
|
||||
[<replaceable>logging_options</replaceable>]
|
||||
</programlisting>
|
||||
<para>
|
||||
Merges the specified incremental backup to its parent full
|
||||
backup, together with all incremental backups between them, if
|
||||
any. If the specified backup ID belong to the full backup,
|
||||
then it will be merged with the closest incremental backup.
|
||||
As a result, the full backup takes in all the merged
|
||||
data, and the incremental backups are removed as redundant.
|
||||
Merges backups that belong to a common incremental backup
|
||||
chain. If you specify a full backup, it will be merged with its first
|
||||
incremental backup. If you specify an incremental backup, it will be
|
||||
merged to its parent full backup, together with all incremental backups
|
||||
between them. Once the merge is complete, the full backup takes in all
|
||||
the merged data, and the incremental backups are removed as redundant.
|
||||
</para>
|
||||
<para>
|
||||
For details, see the section
|
||||
@ -3941,9 +3964,12 @@ pg_probackup delete -B <replaceable>backup_dir</replaceable> --instance <replace
|
||||
<title>archive-push</title>
|
||||
<programlisting>
|
||||
pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable>
|
||||
--wal-file-path=<replaceable>wal_file_path</replaceable> --wal-file-name=<replaceable>wal_file_name</replaceable>
|
||||
[--help] [--compress] [--compress-algorithm=<replaceable>compression_algorithm</replaceable>]
|
||||
[--compress-level=<replaceable>compression_level</replaceable>] [--overwrite]
|
||||
--wal-file-name=<replaceable>wal_file_name</replaceable> [--wal-file-path=<replaceable>wal_file_path</replaceable>]
|
||||
[--help] [--no-sync] [--compress] [--no-ready-rename] [--overwrite]
|
||||
[-j <replaceable>num_threads</replaceable>] [--batch-size=<replaceable>batch_size</replaceable>]
|
||||
[--archive-timeout=<replaceable>timeout</replaceable>]
|
||||
[--compress-algorithm=<replaceable>compression_algorithm</replaceable>]
|
||||
[--compress-level=<replaceable>compression_level</replaceable>]
|
||||
[<replaceable>remote_options</replaceable>] [<replaceable>logging_options</replaceable>]
|
||||
</programlisting>
|
||||
<para>
|
||||
@ -3954,12 +3980,10 @@ pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <r
|
||||
backup instance and the cluster do not match, this command
|
||||
fails with the following error message: <literal>Refuse to push WAL
|
||||
segment segment_name into archive. Instance parameters
|
||||
mismatch.</literal> For each WAL file moved to the backup catalog, you
|
||||
will see the following message in the <productname>PostgreSQL</productname> log file:
|
||||
<literal>pg_probackup archive-push completed successfully</literal>.
|
||||
mismatch.</literal>
|
||||
</para>
|
||||
<para>
|
||||
If the files to be copied already exist in the backup catalog,
|
||||
If the files to be copied already exists in the backup catalog,
|
||||
<application>pg_probackup</application> computes and compares their checksums. If the
|
||||
checksums match, <command>archive-push</command> skips the corresponding file and
|
||||
returns a successful execution code. Otherwise, <command>archive-push</command>
|
||||
@ -3968,13 +3992,25 @@ pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <r
|
||||
with the <option>--overwrite</option> flag.
|
||||
</para>
|
||||
<para>
|
||||
The files are copied to a temporary file with the
|
||||
<literal>.part</literal> suffix. After the copy is
|
||||
done, atomic rename is performed. This algorithm ensures that a
|
||||
failed <command>archive-push</command> will not stall continuous archiving and
|
||||
that concurrent archiving from multiple sources into a single
|
||||
WAL archive have no risk of archive corruption. WAL segments copied to
|
||||
the archive are synced to disk.
|
||||
Each file is copied to a temporary file with the
|
||||
<literal>.part</literal> suffix. If the temporary file already
|
||||
exists, <application>pg_probackup</application> will wait
|
||||
<option>archive_timeout</option> seconds before discarding it.
|
||||
After the copy is done, atomic rename is performed.
|
||||
This algorithm ensures that a failed <command>archive-push</command>
|
||||
will not stall continuous archiving and that concurrent archiving from
|
||||
multiple sources into a single WAL archive has no risk of archive
|
||||
corruption.
|
||||
</para>
|
||||
<para>
|
||||
To speed up archiving, you can specify the <option>-j</option> option
|
||||
to run <command>archive-push</command> on multiple threads.
|
||||
If you provide the <option>--batch-size</option> option, WAL files
|
||||
will be copied in batches of the specified size.
|
||||
</para>
|
||||
<para>
|
||||
WAL segments copied to the archive are synced to disk unless
|
||||
the <option>--no-sync</option> flag is used.
|
||||
</para>
|
||||
<para>
|
||||
You can use <command>archive-push</command> in the
|
||||
@ -3994,6 +4030,8 @@ pg_probackup archive-push -B <replaceable>backup_dir</replaceable> --instance <r
|
||||
<title>archive-get</title>
|
||||
<programlisting>
|
||||
pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <replaceable>instance_name</replaceable> --wal-file-path=<replaceable>wal_file_path</replaceable> --wal-file-name=<replaceable>wal_file_name</replaceable>
|
||||
[-j <replaceable>num_threads</replaceable>] [--batch-size=<replaceable>batch_size</replaceable>]
|
||||
[--prefetch-dir=<replaceable>prefetch_dir_path</replaceable>] [--no-validate-wal]
|
||||
[--help] [<replaceable>remote_options</replaceable>] [<replaceable>logging_options</replaceable>]
|
||||
</programlisting>
|
||||
<para>
|
||||
@ -4004,6 +4042,17 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
|
||||
restoring backups using a WAL archive. You do not need to set
|
||||
it manually.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To speed up recovery, you can specify the <option>-j</option> option
|
||||
to run <command>archive-get</command> on multiple threads.
|
||||
If you provide the <option>--batch-size</option> option, WAL segments
|
||||
will be copied in batches of the specified size.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For details, see section <link linkend="pbk-archiving-options">Archiving Options</link>.
|
||||
</para>
|
||||
</refsect3>
|
||||
</refsect2>
|
||||
<refsect2 id="pbk-options">
|
||||
@ -4080,7 +4129,8 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
|
||||
<para>
|
||||
Sets the number of parallel threads for <command>backup</command>,
|
||||
<command>restore</command>, <command>merge</command>,
|
||||
<command>validate</command>, and <command>checkdb</command> processes.
|
||||
<command>validate</command>, <command>checkdb</command>, and
|
||||
<command>archive-push</command> processes.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -4130,7 +4180,7 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
|
||||
The <literal>immediate</literal> value stops the recovery
|
||||
after reaching the consistent state of the specified
|
||||
backup, or the latest available backup if the
|
||||
<option>-i</option>/<option>--backup_id</option> option is omitted.
|
||||
<option>-i</option>/<option>--backup-id</option> option is omitted.
|
||||
This is the default behavior for STREAM backups.
|
||||
</para>
|
||||
</listitem>
|
||||
@ -4739,6 +4789,78 @@ pg_probackup archive-get -B <replaceable>backup_dir</replaceable> --instance <re
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--batch-size=<replaceable>batch_size</replaceable></option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Sets the maximum number of files that can be copied into the archive
|
||||
by a single <command>archive-push</command> process, or from
|
||||
the archive by a single <command>archive-get</command> process.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--archive-timeout=<replaceable>wait_time</replaceable></option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Sets the timeout for considering existing <literal>.part</literal>
|
||||
files to be stale. By default, <application>pg_probackup</application>
|
||||
waits 300 seconds.
|
||||
This option can be used only with <xref linkend="pbk-archive-push"/> command.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--no-ready-rename</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Do not rename status files in the <literal>archive_status</literal> directory.
|
||||
This option should be used only if <parameter>archive_command</parameter>
|
||||
contains multiple commands.
|
||||
This option can be used only with <xref linkend="pbk-archive-push"/> command.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--no-sync</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Do not sync copied WAL files to disk. You can use this flag to speed
|
||||
up archiving process. Using this flag can result in WAL archive
|
||||
corruption in case of operating system or hardware crash.
|
||||
This option can be used only with <xref linkend="pbk-archive-push"/> command.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--prefetch-dir=<replaceable>path</replaceable></option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Directory used to store prefetched WAL segments if <option>--batch-size</option> option is used.
|
||||
Directory must be located on the same filesystem and on the same mountpoint the
|
||||
<literal>PGDATA/pg_wal</literal> is located.
|
||||
By default files are stored in <literal>PGDATA/pg_wal/pbk_prefetch</literal> directory.
|
||||
This option can be used only with <xref linkend="pbk-archive-get"/> command.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--no-validate-wal</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Do not validate prefetched WAL file before using it.
|
||||
Use this option if you want to increase the speed of recovery.
|
||||
This option can be used only with <xref linkend="pbk-archive-get"/> command.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</para>
|
||||
</refsect3>
|
||||
@ -5176,7 +5298,7 @@ INFO: Backup PZ7YK2 completed
|
||||
<step id="pbk-lets-take-a-look-at-the-backup-catalog">
|
||||
<title>Let's take a look at the backup catalog:</title>
|
||||
<programlisting>
|
||||
[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11'
|
||||
[backupman@backup_host] pg_probackup-11 show -B /mnt/backups --instance 'pg-11'
|
||||
|
||||
BACKUP INSTANCE 'pg-11'
|
||||
==================================================================================================================================
|
||||
@ -5267,7 +5389,7 @@ remote-host = postgres_host
|
||||
<step id="pbk-lets-take-a-look-at-the-backup-catalog-1">
|
||||
<title>Let's take a look at the backup catalog:</title>
|
||||
<programlisting>
|
||||
[backupman@backup_host] pg_probackup-11 backup -B /mnt/backups --instance 'pg-11'
|
||||
[backupman@backup_host] pg_probackup-11 show -B /mnt/backups --instance 'pg-11'
|
||||
|
||||
====================================================================================================================================
|
||||
Instance Version ID Recovery Time Mode WAL Mode TLI Time Data WAL Zratio Start LSN Stop LSN Status
|
||||
|
1986
src/archive.c
1986
src/archive.c
File diff suppressed because it is too large
Load Diff
72
src/backup.c
72
src/backup.c
@ -153,6 +153,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
|
||||
PGconn *master_conn = NULL;
|
||||
PGconn *pg_startbackup_conn = NULL;
|
||||
|
||||
/* used for multitimeline incremental backup */
|
||||
parray *tli_list = NULL;
|
||||
|
||||
|
||||
/* for fancy reporting */
|
||||
time_t start_time, end_time;
|
||||
char pretty_time[20];
|
||||
@ -181,17 +185,43 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
|
||||
current.backup_mode == BACKUP_MODE_DIFF_PTRACK ||
|
||||
current.backup_mode == BACKUP_MODE_DIFF_DELTA)
|
||||
{
|
||||
char prev_backup_filelist_path[MAXPGPATH];
|
||||
|
||||
/* get list of backups already taken */
|
||||
backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID);
|
||||
|
||||
prev_backup = catalog_get_last_data_backup(backup_list, current.tli, current.start_time);
|
||||
if (prev_backup == NULL)
|
||||
elog(ERROR, "Valid backup on current timeline %X is not found. "
|
||||
"Create new FULL backup before an incremental one.",
|
||||
{
|
||||
/* try to setup multi-timeline backup chain */
|
||||
elog(WARNING, "Valid backup on current timeline %u is not found, "
|
||||
"try to look up on previous timelines",
|
||||
current.tli);
|
||||
|
||||
tli_list = catalog_get_timelines(&instance_config);
|
||||
|
||||
if (parray_num(tli_list) == 0)
|
||||
elog(WARNING, "Cannot find valid backup on previous timelines, "
|
||||
"WAL archive is not available");
|
||||
else
|
||||
{
|
||||
prev_backup = get_multi_timeline_parent(backup_list, tli_list, current.tli,
|
||||
current.start_time, &instance_config);
|
||||
|
||||
if (prev_backup == NULL)
|
||||
elog(WARNING, "Cannot find valid backup on previous timelines");
|
||||
}
|
||||
|
||||
/* failed to find suitable parent, error out */
|
||||
if (!prev_backup)
|
||||
elog(ERROR, "Create new full backup before an incremental one");
|
||||
}
|
||||
}
|
||||
|
||||
if (prev_backup)
|
||||
{
|
||||
char prev_backup_filelist_path[MAXPGPATH];
|
||||
|
||||
elog(INFO, "Parent backup: %s", base36enc(prev_backup->start_time));
|
||||
|
||||
join_path_components(prev_backup_filelist_path, prev_backup->root_dir,
|
||||
DATABASE_FILE_LIST);
|
||||
/* Files of previous backup needed by DELTA backup */
|
||||
@ -378,8 +408,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
|
||||
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE ||
|
||||
current.backup_mode == BACKUP_MODE_DIFF_PTRACK)
|
||||
{
|
||||
elog(INFO, "Compiling pagemap of changed blocks");
|
||||
bool pagemap_isok = true;
|
||||
|
||||
time(&start_time);
|
||||
elog(INFO, "Extracting pagemap of changed blocks");
|
||||
|
||||
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
|
||||
{
|
||||
@ -388,8 +420,9 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
|
||||
* reading WAL segments present in archives up to the point
|
||||
* where this backup has started.
|
||||
*/
|
||||
extractPageMap(arclog_path, current.tli, instance_config.xlog_seg_size,
|
||||
prev_backup->start_lsn, current.start_lsn);
|
||||
pagemap_isok = extractPageMap(arclog_path, instance_config.xlog_seg_size,
|
||||
prev_backup->start_lsn, prev_backup->tli,
|
||||
current.start_lsn, current.tli, tli_list);
|
||||
}
|
||||
else if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK)
|
||||
{
|
||||
@ -407,8 +440,14 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
|
||||
}
|
||||
|
||||
time(&end_time);
|
||||
elog(INFO, "Pagemap compiled, time elapsed %.0f sec",
|
||||
difftime(end_time, start_time));
|
||||
|
||||
/* TODO: add ms precision */
|
||||
if (pagemap_isok)
|
||||
elog(INFO, "Pagemap successfully extracted, time elapsed: %.0f sec",
|
||||
difftime(end_time, start_time));
|
||||
else
|
||||
elog(ERROR, "Pagemap extraction failed, time elasped: %.0f sec",
|
||||
difftime(end_time, start_time));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -667,6 +706,15 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
|
||||
elog(INFO, "Backup files are synced, time elapsed: %s", pretty_time);
|
||||
}
|
||||
|
||||
/* be paranoid about instance been from the past */
|
||||
if (current.backup_mode != BACKUP_MODE_FULL &&
|
||||
current.stop_lsn < prev_backup->stop_lsn)
|
||||
elog(ERROR, "Current backup STOP LSN %X/%X is lower than STOP LSN %X/%X of previous backup %s. "
|
||||
"It may indicate that we are trying to backup PostgreSQL instance from the past.",
|
||||
(uint32) (current.stop_lsn >> 32), (uint32) (current.stop_lsn),
|
||||
(uint32) (prev_backup->stop_lsn >> 32), (uint32) (prev_backup->stop_lsn),
|
||||
base36enc(prev_backup->stop_lsn));
|
||||
|
||||
/* clean external directories list */
|
||||
if (external_dirs)
|
||||
free_dir_list(external_dirs);
|
||||
@ -678,6 +726,12 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
|
||||
parray_free(backup_list);
|
||||
}
|
||||
|
||||
if (tli_list)
|
||||
{
|
||||
parray_walk(tli_list, timelineInfoFree);
|
||||
parray_free(tli_list);
|
||||
}
|
||||
|
||||
parray_walk(backup_files_list, pgFileFree);
|
||||
parray_free(backup_files_list);
|
||||
backup_files_list = NULL;
|
||||
|
178
src/catalog.c
178
src/catalog.c
@ -42,6 +42,24 @@ timelineInfoNew(TimeLineID tli)
|
||||
return tlinfo;
|
||||
}
|
||||
|
||||
/* free timelineInfo object */
|
||||
void
|
||||
timelineInfoFree(void *tliInfo)
|
||||
{
|
||||
timelineInfo *tli = (timelineInfo *) tliInfo;
|
||||
|
||||
parray_walk(tli->xlog_filelist, pgFileFree);
|
||||
parray_free(tli->xlog_filelist);
|
||||
|
||||
if (tli->backups)
|
||||
{
|
||||
parray_walk(tli->backups, pgBackupFree);
|
||||
parray_free(tli->backups);
|
||||
}
|
||||
|
||||
pfree(tliInfo);
|
||||
}
|
||||
|
||||
/* Iterate over locked backups and delete locks files */
|
||||
static void
|
||||
unlink_lock_atexit(void)
|
||||
@ -597,7 +615,7 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
|
||||
switch (scan_parent_chain(backup, &tmp_backup))
|
||||
{
|
||||
/* broken chain */
|
||||
case 0:
|
||||
case ChainIsBroken:
|
||||
invalid_backup_id = base36enc_dup(tmp_backup->parent_backup);
|
||||
|
||||
elog(WARNING, "Backup %s has missing parent: %s. Cannot be a parent",
|
||||
@ -606,7 +624,7 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
|
||||
continue;
|
||||
|
||||
/* chain is intact, but at least one parent is invalid */
|
||||
case 1:
|
||||
case ChainIsInvalid:
|
||||
invalid_backup_id = base36enc_dup(tmp_backup->start_time);
|
||||
|
||||
elog(WARNING, "Backup %s has invalid parent: %s. Cannot be a parent",
|
||||
@ -615,17 +633,13 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
|
||||
continue;
|
||||
|
||||
/* chain is ok */
|
||||
case 2:
|
||||
case ChainIsOk:
|
||||
/* Yes, we could call is_parent() earlier - after choosing the ancestor,
|
||||
* but this way we have an opportunity to detect and report all possible
|
||||
* anomalies.
|
||||
*/
|
||||
if (is_parent(full_backup->start_time, backup, true))
|
||||
{
|
||||
elog(INFO, "Parent backup: %s",
|
||||
base36enc(backup->start_time));
|
||||
return backup;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* skip yourself */
|
||||
@ -641,6 +655,150 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* For multi-timeline chain, look up suitable parent for incremental backup.
|
||||
* Multi-timeline chain has full backup and one or more descendants located
|
||||
* on different timelines.
|
||||
*/
|
||||
pgBackup *
|
||||
get_multi_timeline_parent(parray *backup_list, parray *tli_list,
|
||||
TimeLineID current_tli, time_t current_start_time,
|
||||
InstanceConfig *instance)
|
||||
{
|
||||
int i;
|
||||
timelineInfo *my_tlinfo = NULL;
|
||||
timelineInfo *tmp_tlinfo = NULL;
|
||||
pgBackup *ancestor_backup = NULL;
|
||||
|
||||
/* there are no timelines in the archive */
|
||||
if (parray_num(tli_list) == 0)
|
||||
return NULL;
|
||||
|
||||
/* look for current timelineInfo */
|
||||
for (i = 0; i < parray_num(tli_list); i++)
|
||||
{
|
||||
timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i);
|
||||
|
||||
if (tlinfo->tli == current_tli)
|
||||
{
|
||||
my_tlinfo = tlinfo;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (my_tlinfo == NULL)
|
||||
return NULL;
|
||||
|
||||
/* Locate tlinfo of suitable full backup.
|
||||
* Consider this example:
|
||||
* t3 s2-------X <-! We are here
|
||||
* /
|
||||
* t2 s1----D---*----E--->
|
||||
* /
|
||||
* t1--A--B--*---C------->
|
||||
*
|
||||
* A, E - full backups
|
||||
* B, C, D - incremental backups
|
||||
*
|
||||
* We must find A.
|
||||
*/
|
||||
tmp_tlinfo = my_tlinfo;
|
||||
while (tmp_tlinfo->parent_link)
|
||||
{
|
||||
/* if timeline has backups, iterate over them */
|
||||
if (tmp_tlinfo->parent_link->backups)
|
||||
{
|
||||
for (i = 0; i < parray_num(tmp_tlinfo->parent_link->backups); i++)
|
||||
{
|
||||
pgBackup *backup = (pgBackup *) parray_get(tmp_tlinfo->parent_link->backups, i);
|
||||
|
||||
if (backup->backup_mode == BACKUP_MODE_FULL &&
|
||||
(backup->status == BACKUP_STATUS_OK ||
|
||||
backup->status == BACKUP_STATUS_DONE) &&
|
||||
backup->stop_lsn <= tmp_tlinfo->switchpoint)
|
||||
{
|
||||
ancestor_backup = backup;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ancestor_backup)
|
||||
break;
|
||||
|
||||
tmp_tlinfo = tmp_tlinfo->parent_link;
|
||||
}
|
||||
|
||||
/* failed to find valid FULL backup on parent timelines */
|
||||
if (!ancestor_backup)
|
||||
return NULL;
|
||||
else
|
||||
elog(LOG, "Latest valid full backup: %s, tli: %i",
|
||||
base36enc(ancestor_backup->start_time), ancestor_backup->tli);
|
||||
|
||||
/* At this point we found suitable full backup,
|
||||
* now we must find his latest child, suitable to be
|
||||
* parent of current incremental backup.
|
||||
* Consider this example:
|
||||
* t3 s2-------X <-! We are here
|
||||
* /
|
||||
* t2 s1----D---*----E--->
|
||||
* /
|
||||
* t1--A--B--*---C------->
|
||||
*
|
||||
* A, E - full backups
|
||||
* B, C, D - incremental backups
|
||||
*
|
||||
* We found A, now we must find D.
|
||||
*/
|
||||
|
||||
/* Optimistically, look on current timeline for valid incremental backup, child of ancestor */
|
||||
if (my_tlinfo->backups)
|
||||
{
|
||||
/* backups are sorted in descending order and we need latest valid */
|
||||
for (i = 0; i < parray_num(my_tlinfo->backups); i++)
|
||||
{
|
||||
pgBackup *tmp_backup = NULL;
|
||||
pgBackup *backup = (pgBackup *) parray_get(my_tlinfo->backups, i);
|
||||
|
||||
/* found suitable parent */
|
||||
if (scan_parent_chain(backup, &tmp_backup) == ChainIsOk &&
|
||||
is_parent(ancestor_backup->start_time, backup, false))
|
||||
return backup;
|
||||
}
|
||||
}
|
||||
|
||||
/* Iterate over parent timelines and look for a valid backup, child of ancestor */
|
||||
tmp_tlinfo = my_tlinfo;
|
||||
while (tmp_tlinfo->parent_link)
|
||||
{
|
||||
|
||||
/* if timeline has backups, iterate over them */
|
||||
if (tmp_tlinfo->parent_link->backups)
|
||||
{
|
||||
for (i = 0; i < parray_num(tmp_tlinfo->parent_link->backups); i++)
|
||||
{
|
||||
pgBackup *tmp_backup = NULL;
|
||||
pgBackup *backup = (pgBackup *) parray_get(tmp_tlinfo->parent_link->backups, i);
|
||||
|
||||
/* We are not interested in backups
|
||||
* located outside of our timeline history
|
||||
*/
|
||||
if (backup->stop_lsn > tmp_tlinfo->switchpoint)
|
||||
continue;
|
||||
|
||||
if (scan_parent_chain(backup, &tmp_backup) == ChainIsOk &&
|
||||
is_parent(ancestor_backup->start_time, backup, true))
|
||||
return backup;
|
||||
}
|
||||
}
|
||||
|
||||
tmp_tlinfo = tmp_tlinfo->parent_link;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* create backup directory in $BACKUP_PATH */
|
||||
int
|
||||
pgBackupCreateDir(pgBackup *backup)
|
||||
@ -2225,18 +2383,18 @@ scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup)
|
||||
{
|
||||
/* Set oldest child backup in chain */
|
||||
*result_backup = target_backup;
|
||||
return 0;
|
||||
return ChainIsBroken;
|
||||
}
|
||||
|
||||
/* chain is ok, but some backups are invalid */
|
||||
if (invalid_backup)
|
||||
{
|
||||
*result_backup = invalid_backup;
|
||||
return 1;
|
||||
return ChainIsInvalid;
|
||||
}
|
||||
|
||||
*result_backup = target_backup;
|
||||
return 2;
|
||||
return ChainIsOk;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -803,7 +803,7 @@ backup_non_data_file(pgFile *file, pgFile *prev_file,
|
||||
file->mtime <= parent_backup_time)
|
||||
{
|
||||
|
||||
file->crc = fio_get_crc32(from_fullpath, FIO_DB_HOST);
|
||||
file->crc = fio_get_crc32(from_fullpath, FIO_DB_HOST, false);
|
||||
|
||||
/* ...and checksum is the same... */
|
||||
if (EQ_TRADITIONAL_CRC32(file->crc, prev_file->crc))
|
||||
@ -1069,7 +1069,7 @@ restore_non_data_file_internal(FILE *in, FILE *out, pgFile *file,
|
||||
break;
|
||||
|
||||
if (read_len < 0)
|
||||
elog(ERROR, "Cannot read backup mode file \"%s\": %s",
|
||||
elog(ERROR, "Cannot read backup file \"%s\": %s",
|
||||
from_fullpath, strerror(errno));
|
||||
|
||||
if (fio_fwrite(out, buf, read_len) != read_len)
|
||||
|
66
src/dir.c
66
src/dir.c
@ -315,6 +315,72 @@ pgFileGetCRC(const char *file_path, bool use_crc32c, bool missing_ok)
|
||||
return crc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the local file to compute its CRC.
|
||||
* We cannot make decision about file decompression because
|
||||
* user may ask to backup already compressed files and we should be
|
||||
* obvious about it.
|
||||
*/
|
||||
pg_crc32
|
||||
pgFileGetCRCgz(const char *file_path, bool use_crc32c, bool missing_ok)
|
||||
{
|
||||
gzFile fp;
|
||||
pg_crc32 crc = 0;
|
||||
char buf[STDIO_BUFSIZE];
|
||||
int len = 0;
|
||||
int err;
|
||||
|
||||
INIT_FILE_CRC32(use_crc32c, crc);
|
||||
|
||||
/* open file in binary read mode */
|
||||
fp = gzopen(file_path, PG_BINARY_R);
|
||||
if (fp == NULL)
|
||||
{
|
||||
if (errno == ENOENT)
|
||||
{
|
||||
if (missing_ok)
|
||||
{
|
||||
FIN_FILE_CRC32(use_crc32c, crc);
|
||||
return crc;
|
||||
}
|
||||
}
|
||||
|
||||
elog(ERROR, "Cannot open file \"%s\": %s",
|
||||
file_path, strerror(errno));
|
||||
}
|
||||
|
||||
/* calc CRC of file */
|
||||
for (;;)
|
||||
{
|
||||
if (interrupted)
|
||||
elog(ERROR, "interrupted during CRC calculation");
|
||||
|
||||
len = gzread(fp, &buf, sizeof(buf));
|
||||
|
||||
if (len <= 0)
|
||||
{
|
||||
/* we either run into eof or error */
|
||||
if (gzeof(fp))
|
||||
break;
|
||||
else
|
||||
{
|
||||
const char *err_str = NULL;
|
||||
|
||||
err_str = gzerror(fp, &err);
|
||||
elog(ERROR, "Cannot read from compressed file %s", err_str);
|
||||
}
|
||||
}
|
||||
|
||||
/* update CRC */
|
||||
COMP_FILE_CRC32(use_crc32c, crc, buf, len);
|
||||
}
|
||||
|
||||
FIN_FILE_CRC32(use_crc32c, crc);
|
||||
gzclose(fp);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
void
|
||||
pgFileFree(void *file)
|
||||
{
|
||||
|
31
src/help.c
31
src/help.c
@ -214,10 +214,11 @@ help_pg_probackup(void)
|
||||
printf(_(" [--help]\n"));
|
||||
|
||||
printf(_("\n %s archive-push -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
|
||||
printf(_(" --wal-file-path=wal-file-path\n"));
|
||||
printf(_(" --wal-file-name=wal-file-name\n"));
|
||||
printf(_(" [--overwrite]\n"));
|
||||
printf(_(" [--compress]\n"));
|
||||
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
|
||||
printf(_(" [--archive-timeout=timeout]\n"));
|
||||
printf(_(" [--no-ready-rename] [--no-sync]\n"));
|
||||
printf(_(" [--overwrite] [--compress]\n"));
|
||||
printf(_(" [--compress-algorithm=compress-algorithm]\n"));
|
||||
printf(_(" [--compress-level=compress-level]\n"));
|
||||
printf(_(" [--remote-proto] [--remote-host]\n"));
|
||||
@ -228,6 +229,8 @@ help_pg_probackup(void)
|
||||
printf(_("\n %s archive-get -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
|
||||
printf(_(" --wal-file-path=wal-file-path\n"));
|
||||
printf(_(" --wal-file-name=wal-file-name\n"));
|
||||
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
|
||||
printf(_(" [--no-validate-wal]\n"));
|
||||
printf(_(" [--remote-proto] [--remote-host]\n"));
|
||||
printf(_(" [--remote-port] [--remote-path] [--remote-user]\n"));
|
||||
printf(_(" [--ssh-options]\n"));
|
||||
@ -869,10 +872,11 @@ static void
|
||||
help_archive_push(void)
|
||||
{
|
||||
printf(_("\n%s archive-push -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
|
||||
printf(_(" --wal-file-path=wal-file-path\n"));
|
||||
printf(_(" --wal-file-name=wal-file-name\n"));
|
||||
printf(_(" [--overwrite]\n"));
|
||||
printf(_(" [--compress]\n"));
|
||||
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
|
||||
printf(_(" [--archive-timeout=timeout]\n"));
|
||||
printf(_(" [--no-ready-rename] [--no-sync]\n"));
|
||||
printf(_(" [--overwrite] [--compress]\n"));
|
||||
printf(_(" [--compress-algorithm=compress-algorithm]\n"));
|
||||
printf(_(" [--compress-level=compress-level]\n"));
|
||||
printf(_(" [--remote-proto] [--remote-host]\n"));
|
||||
@ -881,10 +885,13 @@ help_archive_push(void)
|
||||
|
||||
printf(_(" -B, --backup-path=backup-path location of the backup storage area\n"));
|
||||
printf(_(" --instance=instance_name name of the instance to delete\n"));
|
||||
printf(_(" --wal-file-path=wal-file-path\n"));
|
||||
printf(_(" relative path name of the WAL file on the server\n"));
|
||||
printf(_(" --wal-file-name=wal-file-name\n"));
|
||||
printf(_(" name of the WAL file to retrieve from the server\n"));
|
||||
printf(_(" name of the file to copy into WAL archive\n"));
|
||||
printf(_(" -j, --threads=NUM number of parallel threads\n"));
|
||||
printf(_(" --batch-size=NUM number of files to be copied\n"));
|
||||
printf(_(" --archive-timeout=timeout wait timeout before discarding stale temp file(default: 5min)\n"));
|
||||
printf(_(" --no-ready-rename do not rename '.ready' files in 'archive_status' directory\n"));
|
||||
printf(_(" --no-sync do not sync WAL file to disk\n"));
|
||||
printf(_(" --overwrite overwrite archived WAL file\n"));
|
||||
|
||||
printf(_("\n Compression options:\n"));
|
||||
@ -912,6 +919,8 @@ help_archive_get(void)
|
||||
printf(_("\n%s archive-get -B backup-path --instance=instance_name\n"), PROGRAM_NAME);
|
||||
printf(_(" --wal-file-path=wal-file-path\n"));
|
||||
printf(_(" --wal-file-name=wal-file-name\n"));
|
||||
printf(_(" [-j num-threads] [--batch-size=batch_size]\n"));
|
||||
printf(_(" [--no-validate-wal]\n"));
|
||||
printf(_(" [--remote-proto] [--remote-host]\n"));
|
||||
printf(_(" [--remote-port] [--remote-path] [--remote-user]\n"));
|
||||
printf(_(" [--ssh-options]\n\n"));
|
||||
@ -922,6 +931,10 @@ help_archive_get(void)
|
||||
printf(_(" relative destination path name of the WAL file on the server\n"));
|
||||
printf(_(" --wal-file-name=wal-file-name\n"));
|
||||
printf(_(" name of the WAL file to retrieve from the archive\n"));
|
||||
printf(_(" -j, --threads=NUM number of parallel threads\n"));
|
||||
printf(_(" --batch-size=NUM number of files to be prefetched\n"));
|
||||
printf(_(" --prefetch-dir=path location of the store area for prefetched WAL files\n"));
|
||||
printf(_(" --no-validate-wal skip validation of prefetched WAL file before using it\n"));
|
||||
|
||||
printf(_("\n Remote options:\n"));
|
||||
printf(_(" --remote-proto=protocol remote protocol to use\n"));
|
||||
|
248
src/parsexlog.c
248
src/parsexlog.c
@ -138,6 +138,9 @@ typedef struct
|
||||
*/
|
||||
bool got_target;
|
||||
|
||||
/* Should we read record, located at endpoint position */
|
||||
bool inclusive_endpoint;
|
||||
|
||||
/*
|
||||
* Return value from the thread.
|
||||
* 0 means there is no error, 1 - there is an error.
|
||||
@ -162,7 +165,8 @@ static bool RunXLogThreads(const char *archivedir,
|
||||
XLogRecPtr startpoint, XLogRecPtr endpoint,
|
||||
bool consistent_read,
|
||||
xlog_record_function process_record,
|
||||
XLogRecTarget *last_rec);
|
||||
XLogRecTarget *last_rec,
|
||||
bool inclusive_endpoint);
|
||||
//static XLogReaderState *InitXLogThreadRead(xlog_thread_arg *arg);
|
||||
static bool SwitchThreadToNextWal(XLogReaderState *xlogreader,
|
||||
xlog_thread_arg *arg);
|
||||
@ -231,18 +235,121 @@ static XLogRecPtr wal_target_lsn = InvalidXLogRecPtr;
|
||||
* Pagemap extracting is processed using threads. Each thread reads single WAL
|
||||
* file.
|
||||
*/
|
||||
void
|
||||
extractPageMap(const char *archivedir, TimeLineID tli, uint32 wal_seg_size,
|
||||
XLogRecPtr startpoint, XLogRecPtr endpoint)
|
||||
bool
|
||||
extractPageMap(const char *archivedir, uint32 wal_seg_size,
|
||||
XLogRecPtr startpoint, TimeLineID start_tli,
|
||||
XLogRecPtr endpoint, TimeLineID end_tli,
|
||||
parray *tli_list)
|
||||
{
|
||||
bool extract_isok = true;
|
||||
bool extract_isok = false;
|
||||
|
||||
extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId,
|
||||
InvalidXLogRecPtr, tli, wal_seg_size,
|
||||
startpoint, endpoint, false, extractPageInfo,
|
||||
NULL);
|
||||
if (!extract_isok)
|
||||
elog(ERROR, "Pagemap compiling failed");
|
||||
if (start_tli == end_tli)
|
||||
/* easy case */
|
||||
extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId,
|
||||
InvalidXLogRecPtr, end_tli, wal_seg_size,
|
||||
startpoint, endpoint, false, extractPageInfo,
|
||||
NULL, true);
|
||||
else
|
||||
{
|
||||
/* We have to process WAL located on several different xlog intervals,
|
||||
* located on different timelines.
|
||||
*
|
||||
* Consider this example:
|
||||
* t3 C-----X <!- We are here
|
||||
* /
|
||||
* t2 B---*-->
|
||||
* /
|
||||
* t1 -A----*------->
|
||||
*
|
||||
* A - prev backup START_LSN
|
||||
* B - switchpoint for t2, available as t2->switchpoint
|
||||
* C - switch for t3, available as t3->switchpoint
|
||||
* X - current backup START_LSN
|
||||
*
|
||||
* Intervals to be parsed:
|
||||
* - [A,B) on t1
|
||||
* - [B,C) on t2
|
||||
* - [C,X] on t3
|
||||
*/
|
||||
int i;
|
||||
parray *interval_list = parray_new();
|
||||
timelineInfo *end_tlinfo = NULL;
|
||||
timelineInfo *tmp_tlinfo = NULL;
|
||||
XLogRecPtr prev_switchpoint = InvalidXLogRecPtr;
|
||||
lsnInterval *wal_interval = NULL;
|
||||
|
||||
/* We must find TLI information about final timeline (t3 in example) */
|
||||
for (i = 0; i < parray_num(tli_list); i++)
|
||||
{
|
||||
tmp_tlinfo = parray_get(tli_list, i);
|
||||
|
||||
if (tmp_tlinfo->tli == end_tli)
|
||||
{
|
||||
end_tlinfo = tmp_tlinfo;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Iterate over timelines backward,
|
||||
* starting with end_tli and ending with start_tli.
|
||||
* For every timeline calculate LSN-interval that must be parsed.
|
||||
*/
|
||||
|
||||
tmp_tlinfo = end_tlinfo;
|
||||
while (tmp_tlinfo)
|
||||
{
|
||||
wal_interval = pgut_malloc(sizeof(lsnInterval));
|
||||
wal_interval->tli = tmp_tlinfo->tli;
|
||||
|
||||
if (tmp_tlinfo->tli == end_tli)
|
||||
{
|
||||
wal_interval->begin_lsn = tmp_tlinfo->switchpoint;
|
||||
wal_interval->end_lsn = endpoint;
|
||||
}
|
||||
else if (tmp_tlinfo->tli == start_tli)
|
||||
{
|
||||
wal_interval->begin_lsn = startpoint;
|
||||
wal_interval->end_lsn = prev_switchpoint;
|
||||
}
|
||||
else
|
||||
{
|
||||
wal_interval->begin_lsn = tmp_tlinfo->switchpoint;
|
||||
wal_interval->end_lsn = prev_switchpoint;
|
||||
}
|
||||
|
||||
prev_switchpoint = tmp_tlinfo->switchpoint;
|
||||
tmp_tlinfo = tmp_tlinfo->parent_link;
|
||||
|
||||
parray_append(interval_list, wal_interval);
|
||||
}
|
||||
|
||||
for (i = parray_num(interval_list) - 1; i >= 0; i--)
|
||||
{
|
||||
bool inclusive_endpoint;
|
||||
wal_interval = parray_get(interval_list, i);
|
||||
|
||||
/* In case of replica promotion, endpoints of intermediate
|
||||
* timelines can be unreachable.
|
||||
*/
|
||||
inclusive_endpoint = false;
|
||||
|
||||
/* ... but not the end timeline */
|
||||
if (wal_interval->tli == end_tli)
|
||||
inclusive_endpoint = true;
|
||||
|
||||
extract_isok = RunXLogThreads(archivedir, 0, InvalidTransactionId,
|
||||
InvalidXLogRecPtr, wal_interval->tli, wal_seg_size,
|
||||
wal_interval->begin_lsn, wal_interval->end_lsn,
|
||||
false, extractPageInfo, NULL, inclusive_endpoint);
|
||||
if (!extract_isok)
|
||||
break;
|
||||
|
||||
pg_free(wal_interval);
|
||||
}
|
||||
pg_free(interval_list);
|
||||
}
|
||||
|
||||
return extract_isok;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -262,7 +369,7 @@ validate_backup_wal_from_start_to_stop(pgBackup *backup,
|
||||
got_endpoint = RunXLogThreads(archivedir, 0, InvalidTransactionId,
|
||||
InvalidXLogRecPtr, tli, xlog_seg_size,
|
||||
backup->start_lsn, backup->stop_lsn,
|
||||
false, NULL, NULL);
|
||||
false, NULL, NULL, true);
|
||||
|
||||
if (!got_endpoint)
|
||||
{
|
||||
@ -349,7 +456,7 @@ validate_wal(pgBackup *backup, const char *archivedir,
|
||||
* If recovery target is provided, ensure that archive files exist in
|
||||
* archive directory.
|
||||
*/
|
||||
if (dir_is_empty(archivedir, FIO_BACKUP_HOST))
|
||||
if (dir_is_empty(archivedir, FIO_LOCAL_HOST))
|
||||
elog(ERROR, "WAL archive is empty. You cannot restore backup to a recovery target without WAL archive.");
|
||||
|
||||
/*
|
||||
@ -373,7 +480,7 @@ validate_wal(pgBackup *backup, const char *archivedir,
|
||||
all_wal = all_wal ||
|
||||
RunXLogThreads(archivedir, target_time, target_xid, target_lsn,
|
||||
tli, wal_seg_size, backup->stop_lsn,
|
||||
InvalidXLogRecPtr, true, validateXLogRecord, &last_rec);
|
||||
InvalidXLogRecPtr, true, validateXLogRecord, &last_rec, true);
|
||||
if (last_rec.rec_time > 0)
|
||||
time2iso(last_timestamp, lengthof(last_timestamp),
|
||||
timestamptz_to_time_t(last_rec.rec_time));
|
||||
@ -753,20 +860,35 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
|
||||
if (!reader_data->xlogexists)
|
||||
{
|
||||
char xlogfname[MAXFNAMELEN];
|
||||
char partial_file[MAXPGPATH];
|
||||
|
||||
GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno,
|
||||
wal_seg_size);
|
||||
snprintf(reader_data->xlogpath, MAXPGPATH, "%s/%s", wal_archivedir,
|
||||
xlogfname);
|
||||
GetXLogFileName(xlogfname, reader_data->tli, reader_data->xlogsegno, wal_seg_size);
|
||||
|
||||
if (fileExists(reader_data->xlogpath, FIO_BACKUP_HOST))
|
||||
snprintf(reader_data->xlogpath, MAXPGPATH, "%s/%s", wal_archivedir, xlogfname);
|
||||
snprintf(reader_data->gz_xlogpath, MAXPGPATH, "%s.gz", reader_data->xlogpath);
|
||||
|
||||
/* We fall back to using .partial segment in case if we are running
|
||||
* multi-timeline incremental backup right after standby promotion.
|
||||
* TODO: it should be explicitly enabled.
|
||||
*/
|
||||
snprintf(partial_file, MAXPGPATH, "%s.partial", reader_data->xlogpath);
|
||||
|
||||
/* If segment do not exists, but the same
|
||||
* segment with '.partial' suffix does, use it instead */
|
||||
if (!fileExists(reader_data->xlogpath, FIO_LOCAL_HOST) &&
|
||||
fileExists(partial_file, FIO_LOCAL_HOST))
|
||||
{
|
||||
snprintf(reader_data->xlogpath, MAXPGPATH, "%s", partial_file);
|
||||
}
|
||||
|
||||
if (fileExists(reader_data->xlogpath, FIO_LOCAL_HOST))
|
||||
{
|
||||
elog(LOG, "Thread [%d]: Opening WAL segment \"%s\"",
|
||||
reader_data->thread_num, reader_data->xlogpath);
|
||||
|
||||
reader_data->xlogexists = true;
|
||||
reader_data->xlogfile = fio_open(reader_data->xlogpath,
|
||||
O_RDONLY | PG_BINARY, FIO_BACKUP_HOST);
|
||||
O_RDONLY | PG_BINARY, FIO_LOCAL_HOST);
|
||||
|
||||
if (reader_data->xlogfile < 0)
|
||||
{
|
||||
@ -778,29 +900,23 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
|
||||
}
|
||||
#ifdef HAVE_LIBZ
|
||||
/* Try to open compressed WAL segment */
|
||||
else
|
||||
else if (fileExists(reader_data->gz_xlogpath, FIO_LOCAL_HOST))
|
||||
{
|
||||
snprintf(reader_data->gz_xlogpath, sizeof(reader_data->gz_xlogpath),
|
||||
"%s.gz", reader_data->xlogpath);
|
||||
if (fileExists(reader_data->gz_xlogpath, FIO_BACKUP_HOST))
|
||||
{
|
||||
elog(LOG, "Thread [%d]: Opening compressed WAL segment \"%s\"",
|
||||
reader_data->thread_num, reader_data->gz_xlogpath);
|
||||
elog(LOG, "Thread [%d]: Opening compressed WAL segment \"%s\"",
|
||||
reader_data->thread_num, reader_data->gz_xlogpath);
|
||||
|
||||
reader_data->xlogexists = true;
|
||||
reader_data->gz_xlogfile = fio_gzopen(reader_data->gz_xlogpath,
|
||||
"rb", -1, FIO_BACKUP_HOST);
|
||||
if (reader_data->gz_xlogfile == NULL)
|
||||
{
|
||||
elog(WARNING, "Thread [%d]: Could not open compressed WAL segment \"%s\": %s",
|
||||
reader_data->thread_num, reader_data->gz_xlogpath,
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
reader_data->xlogexists = true;
|
||||
reader_data->gz_xlogfile = fio_gzopen(reader_data->gz_xlogpath,
|
||||
"rb", -1, FIO_LOCAL_HOST);
|
||||
if (reader_data->gz_xlogfile == NULL)
|
||||
{
|
||||
elog(WARNING, "Thread [%d]: Could not open compressed WAL segment \"%s\": %s",
|
||||
reader_data->thread_num, reader_data->gz_xlogpath,
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Exit without error if WAL segment doesn't exist */
|
||||
if (!reader_data->xlogexists)
|
||||
return -1;
|
||||
@ -923,7 +1039,7 @@ RunXLogThreads(const char *archivedir, time_t target_time,
|
||||
TransactionId target_xid, XLogRecPtr target_lsn, TimeLineID tli,
|
||||
uint32 segment_size, XLogRecPtr startpoint, XLogRecPtr endpoint,
|
||||
bool consistent_read, xlog_record_function process_record,
|
||||
XLogRecTarget *last_rec)
|
||||
XLogRecTarget *last_rec, bool inclusive_endpoint)
|
||||
{
|
||||
pthread_t *threads;
|
||||
xlog_thread_arg *thread_args;
|
||||
@ -932,17 +1048,25 @@ RunXLogThreads(const char *archivedir, time_t target_time,
|
||||
XLogSegNo endSegNo = 0;
|
||||
bool result = true;
|
||||
|
||||
if (!XRecOffIsValid(startpoint))
|
||||
if (!XRecOffIsValid(startpoint) && !XRecOffIsNull(startpoint))
|
||||
elog(ERROR, "Invalid startpoint value %X/%X",
|
||||
(uint32) (startpoint >> 32), (uint32) (startpoint));
|
||||
|
||||
if (!XLogRecPtrIsInvalid(endpoint))
|
||||
{
|
||||
if (!XRecOffIsValid(endpoint))
|
||||
// if (XRecOffIsNull(endpoint) && !inclusive_endpoint)
|
||||
if (XRecOffIsNull(endpoint))
|
||||
{
|
||||
GetXLogSegNo(endpoint, endSegNo, segment_size);
|
||||
endSegNo--;
|
||||
}
|
||||
else if (!XRecOffIsValid(endpoint))
|
||||
{
|
||||
elog(ERROR, "Invalid endpoint value %X/%X",
|
||||
(uint32) (endpoint >> 32), (uint32) (endpoint));
|
||||
|
||||
GetXLogSegNo(endpoint, endSegNo, segment_size);
|
||||
}
|
||||
else
|
||||
GetXLogSegNo(endpoint, endSegNo, segment_size);
|
||||
}
|
||||
|
||||
/* Initialize static variables for workers */
|
||||
@ -977,6 +1101,7 @@ RunXLogThreads(const char *archivedir, time_t target_time,
|
||||
arg->startpoint = startpoint;
|
||||
arg->endpoint = endpoint;
|
||||
arg->endSegNo = endSegNo;
|
||||
arg->inclusive_endpoint = inclusive_endpoint;
|
||||
arg->got_target = false;
|
||||
/* By default there is some error */
|
||||
arg->ret = 1;
|
||||
@ -1192,6 +1317,18 @@ XLogThreadWorker(void *arg)
|
||||
reader_data->thread_num,
|
||||
(uint32) (errptr >> 32), (uint32) (errptr));
|
||||
|
||||
/* In we failed to read record located at endpoint position,
|
||||
* and endpoint is not inclusive, do not consider this as an error.
|
||||
*/
|
||||
if (!thread_arg->inclusive_endpoint &&
|
||||
errptr == thread_arg->endpoint)
|
||||
{
|
||||
elog(LOG, "Thread [%d]: Endpoint %X/%X is not inclusive, switch to the next timeline",
|
||||
reader_data->thread_num,
|
||||
(uint32) (thread_arg->endpoint >> 32), (uint32) (thread_arg->endpoint));
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we don't have all WAL files from prev backup start_lsn to current
|
||||
* start_lsn, we won't be able to build page map and PAGE backup will
|
||||
@ -1583,3 +1720,28 @@ getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool validate_wal_segment(TimeLineID tli, XLogSegNo segno, const char *prefetch_dir, uint32 wal_seg_size)
|
||||
{
|
||||
XLogRecPtr startpoint;
|
||||
XLogRecPtr endpoint;
|
||||
|
||||
bool rc;
|
||||
int tmp_num_threads = num_threads;
|
||||
num_threads = 1;
|
||||
|
||||
/* calculate startpoint and endpoint */
|
||||
GetXLogRecPtr(segno, 0, wal_seg_size, startpoint);
|
||||
GetXLogRecPtr(segno+1, 0, wal_seg_size, endpoint);
|
||||
|
||||
/* disable multi-threading */
|
||||
num_threads = 1;
|
||||
|
||||
rc = RunXLogThreads(prefetch_dir, 0, InvalidTransactionId,
|
||||
InvalidXLogRecPtr, tli, wal_seg_size,
|
||||
startpoint, endpoint, false, NULL, NULL, true);
|
||||
|
||||
num_threads = tmp_num_threads;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -125,9 +125,15 @@ bool compress_shortcut = false;
|
||||
char *instance_name;
|
||||
|
||||
/* archive push options */
|
||||
int batch_size = 1;
|
||||
static char *wal_file_path;
|
||||
static char *wal_file_name;
|
||||
static bool file_overwrite = false;
|
||||
static bool file_overwrite = false;
|
||||
static bool no_ready_rename = false;
|
||||
|
||||
/* archive get options */
|
||||
static char *prefetch_dir;
|
||||
bool no_validate_wal = false;
|
||||
|
||||
/* show options */
|
||||
ShowFormat show_format = SHOW_PLAIN;
|
||||
@ -172,7 +178,6 @@ static ConfigOption cmd_options[] =
|
||||
{ 'f', 'b', "backup-mode", opt_backup_mode, SOURCE_CMD_STRICT },
|
||||
{ 'b', 'C', "smooth-checkpoint", &smooth_checkpoint, SOURCE_CMD_STRICT },
|
||||
{ 's', 'S', "slot", &replication_slot, SOURCE_CMD_STRICT },
|
||||
{ 's', 'S', "primary-slot-name",&replication_slot, SOURCE_CMD_STRICT },
|
||||
{ 'b', 181, "temp-slot", &temp_slot, SOURCE_CMD_STRICT },
|
||||
{ 'b', 182, "delete-wal", &delete_wal, SOURCE_CMD_STRICT },
|
||||
{ 'b', 183, "delete-expired", &delete_expired, SOURCE_CMD_STRICT },
|
||||
@ -189,13 +194,14 @@ static ConfigOption cmd_options[] =
|
||||
{ 'f', 155, "external-mapping", opt_externaldir_map, SOURCE_CMD_STRICT },
|
||||
{ 's', 141, "recovery-target-name", &target_name, SOURCE_CMD_STRICT },
|
||||
{ 's', 142, "recovery-target-action", &target_action, SOURCE_CMD_STRICT },
|
||||
{ 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMD_STRICT },
|
||||
{ 'b', 143, "no-validate", &no_validate, SOURCE_CMD_STRICT },
|
||||
{ 'b', 154, "skip-block-validation", &skip_block_validation, SOURCE_CMD_STRICT },
|
||||
{ 'b', 156, "skip-external-dirs", &skip_external_dirs, SOURCE_CMD_STRICT },
|
||||
{ 'f', 158, "db-include", opt_datname_include_list, SOURCE_CMD_STRICT },
|
||||
{ 'f', 159, "db-exclude", opt_datname_exclude_list, SOURCE_CMD_STRICT },
|
||||
{ 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMD_STRICT },
|
||||
{ 's', 160, "primary-conninfo", &primary_conninfo, SOURCE_CMD_STRICT },
|
||||
{ 's', 'S', "primary-slot-name",&replication_slot, SOURCE_CMD_STRICT },
|
||||
/* checkdb options */
|
||||
{ 'b', 195, "amcheck", &need_amcheck, SOURCE_CMD_STRICT },
|
||||
{ 'b', 196, "heapallindexed", &heapallindexed, SOURCE_CMD_STRICT },
|
||||
@ -218,9 +224,14 @@ static ConfigOption cmd_options[] =
|
||||
{ 's', 150, "wal-file-path", &wal_file_path, SOURCE_CMD_STRICT },
|
||||
{ 's', 151, "wal-file-name", &wal_file_name, SOURCE_CMD_STRICT },
|
||||
{ 'b', 152, "overwrite", &file_overwrite, SOURCE_CMD_STRICT },
|
||||
{ 'b', 153, "no-ready-rename", &no_ready_rename, SOURCE_CMD_STRICT },
|
||||
{ 'i', 162, "batch-size", &batch_size, SOURCE_CMD_STRICT },
|
||||
/* archive-get options */
|
||||
{ 's', 163, "prefetch-dir", &prefetch_dir, SOURCE_CMD_STRICT },
|
||||
{ 'b', 164, "no-validate-wal", &no_validate_wal, SOURCE_CMD_STRICT },
|
||||
/* show options */
|
||||
{ 'f', 153, "format", opt_show_format, SOURCE_CMD_STRICT },
|
||||
{ 'b', 161, "archive", &show_archive, SOURCE_CMD_STRICT },
|
||||
{ 'f', 165, "format", opt_show_format, SOURCE_CMD_STRICT },
|
||||
{ 'b', 166, "archive", &show_archive, SOURCE_CMD_STRICT },
|
||||
/* set-backup options */
|
||||
{ 'I', 170, "ttl", &ttl, SOURCE_CMD_STRICT, SOURCE_DEFAULT, 0, OPTION_UNIT_S, option_get_value},
|
||||
{ 's', 171, "expire-time", &expire_time_string, SOURCE_CMD_STRICT },
|
||||
@ -264,9 +275,6 @@ main(int argc, char *argv[])
|
||||
{
|
||||
char *command = NULL,
|
||||
*command_name;
|
||||
/* Check if backup_path is directory. */
|
||||
struct stat stat_buf;
|
||||
int rc;
|
||||
|
||||
PROGRAM_NAME_FULL = argv[0];
|
||||
|
||||
@ -446,11 +454,6 @@ main(int argc, char *argv[])
|
||||
/* Ensure that backup_path is an absolute path */
|
||||
if (!is_absolute_path(backup_path))
|
||||
elog(ERROR, "-B, --backup-path must be an absolute path");
|
||||
|
||||
/* Ensure that backup_path is a path to a directory */
|
||||
rc = stat(backup_path, &stat_buf);
|
||||
if (rc != -1 && !S_ISDIR(stat_buf.st_mode))
|
||||
elog(ERROR, "-B, --backup-path must be a path to directory");
|
||||
}
|
||||
|
||||
/* Ensure that backup_path is an absolute path */
|
||||
@ -502,12 +505,16 @@ main(int argc, char *argv[])
|
||||
|
||||
/*
|
||||
* Ensure that requested backup instance exists.
|
||||
* for all commands except init, which doesn't take this parameter
|
||||
* and add-instance which creates new instance.
|
||||
* for all commands except init, which doesn't take this parameter,
|
||||
* add-instance which creates new instance
|
||||
* and archive-get, which just do not require it at this point
|
||||
*/
|
||||
if (backup_subcmd != INIT_CMD && backup_subcmd != ADD_INSTANCE_CMD)
|
||||
if (backup_subcmd != INIT_CMD && backup_subcmd != ADD_INSTANCE_CMD &&
|
||||
backup_subcmd != ARCHIVE_GET_CMD)
|
||||
{
|
||||
if (fio_access(backup_instance_path, F_OK, FIO_BACKUP_HOST) != 0)
|
||||
struct stat st;
|
||||
|
||||
if (fio_stat(backup_instance_path, &st, true, FIO_BACKUP_HOST) != 0)
|
||||
{
|
||||
elog(WARNING, "Failed to access directory \"%s\": %s",
|
||||
backup_instance_path, strerror(errno));
|
||||
@ -516,6 +523,12 @@ main(int argc, char *argv[])
|
||||
elog(ERROR, "Instance '%s' does not exist in this backup catalog",
|
||||
instance_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Ensure that backup_path is a path to a directory */
|
||||
if (!S_ISDIR(st.st_mode))
|
||||
elog(ERROR, "-B, --backup-path must be a path to directory");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -531,7 +544,8 @@ main(int argc, char *argv[])
|
||||
config_get_opt_env(instance_options);
|
||||
|
||||
/* Read options from configuration file */
|
||||
if (backup_subcmd != ADD_INSTANCE_CMD)
|
||||
if (backup_subcmd != ADD_INSTANCE_CMD &&
|
||||
backup_subcmd != ARCHIVE_GET_CMD)
|
||||
{
|
||||
join_path_components(path, backup_instance_path,
|
||||
BACKUP_CATALOG_CONF_FILE);
|
||||
@ -748,17 +762,22 @@ main(int argc, char *argv[])
|
||||
if (num_threads < 1)
|
||||
num_threads = 1;
|
||||
|
||||
if (batch_size < 1)
|
||||
batch_size = 1;
|
||||
|
||||
compress_init();
|
||||
|
||||
/* do actual operation */
|
||||
switch (backup_subcmd)
|
||||
{
|
||||
case ARCHIVE_PUSH_CMD:
|
||||
return do_archive_push(&instance_config, wal_file_path,
|
||||
wal_file_name, file_overwrite);
|
||||
do_archive_push(&instance_config, wal_file_path, wal_file_name,
|
||||
batch_size, file_overwrite, no_sync, no_ready_rename);
|
||||
break;
|
||||
case ARCHIVE_GET_CMD:
|
||||
return do_archive_get(&instance_config,
|
||||
wal_file_path, wal_file_name);
|
||||
do_archive_get(&instance_config, prefetch_dir,
|
||||
wal_file_path, wal_file_name, batch_size, !no_validate_wal);
|
||||
break;
|
||||
case ADD_INSTANCE_CMD:
|
||||
return do_add_instance(&instance_config);
|
||||
case DELETE_INSTANCE_CMD:
|
||||
|
@ -67,7 +67,6 @@ extern const char *PROGRAM_EMAIL;
|
||||
#define DATABASE_MAP "database_map"
|
||||
|
||||
/* Timeout defaults */
|
||||
#define PARTIAL_WAL_TIMER 60
|
||||
#define ARCHIVE_TIMEOUT_DEFAULT 300
|
||||
#define REPLICA_TIMEOUT_DEFAULT 300
|
||||
|
||||
@ -475,7 +474,7 @@ struct timelineInfo {
|
||||
TimeLineID tli; /* this timeline */
|
||||
TimeLineID parent_tli; /* parent timeline. 0 if none */
|
||||
timelineInfo *parent_link; /* link to parent timeline */
|
||||
XLogRecPtr switchpoint; /* if this timeline has a parent
|
||||
XLogRecPtr switchpoint; /* if this timeline has a parent, then
|
||||
* switchpoint contains switchpoint LSN,
|
||||
* otherwise 0 */
|
||||
XLogSegNo begin_segno; /* first present segment in this timeline */
|
||||
@ -501,6 +500,13 @@ typedef struct xlogInterval
|
||||
XLogSegNo end_segno;
|
||||
} xlogInterval;
|
||||
|
||||
typedef struct lsnInterval
|
||||
{
|
||||
TimeLineID tli;
|
||||
XLogRecPtr begin_lsn;
|
||||
XLogRecPtr end_lsn;
|
||||
} lsnInterval;
|
||||
|
||||
typedef enum xlogFileType
|
||||
{
|
||||
SEGMENT,
|
||||
@ -573,6 +579,9 @@ typedef struct BackupPageHeader
|
||||
|
||||
#define GetXLogSegNoFromScrath(logSegNo, log, seg, wal_segsz_bytes) \
|
||||
logSegNo = (uint64) log * XLogSegmentsPerXLogId(wal_segsz_bytes) + seg
|
||||
|
||||
#define GetXLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \
|
||||
XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes)
|
||||
#else
|
||||
#define GetXLogSegNo(xlrp, logSegNo, wal_segsz_bytes) \
|
||||
XLByteToSeg(xlrp, logSegNo)
|
||||
@ -589,6 +598,9 @@ typedef struct BackupPageHeader
|
||||
|
||||
#define GetXLogSegNoFromScrath(logSegNo, log, seg, wal_segsz_bytes) \
|
||||
logSegNo = (uint64) log * XLogSegmentsPerXLogId + seg
|
||||
|
||||
#define GetXLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \
|
||||
XLogFromFileName(fname, tli, logSegNo)
|
||||
#endif
|
||||
|
||||
#define IsSshProtocol() (instance_config.remote.host && strcmp(instance_config.remote.proto, "ssh") == 0)
|
||||
@ -692,10 +704,11 @@ extern int do_init(void);
|
||||
extern int do_add_instance(InstanceConfig *instance);
|
||||
|
||||
/* in archive.c */
|
||||
extern int do_archive_push(InstanceConfig *instance, char *wal_file_path,
|
||||
char *wal_file_name, bool overwrite);
|
||||
extern int do_archive_get(InstanceConfig *instance, char *wal_file_path,
|
||||
char *wal_file_name);
|
||||
extern void do_archive_push(InstanceConfig *instance, char *wal_file_path,
|
||||
char *wal_file_name, int batch_size, bool overwrite,
|
||||
bool no_sync, bool no_ready_rename);
|
||||
extern void do_archive_get(InstanceConfig *instance, const char *prefetch_dir_arg, char *wal_file_path,
|
||||
char *wal_file_name, int batch_size, bool validate_wal);
|
||||
|
||||
/* in configure.c */
|
||||
extern void do_show_config(void);
|
||||
@ -758,6 +771,10 @@ extern void catalog_lock_backup_list(parray *backup_list, int from_idx,
|
||||
extern pgBackup *catalog_get_last_data_backup(parray *backup_list,
|
||||
TimeLineID tli,
|
||||
time_t current_start_time);
|
||||
extern pgBackup *get_multi_timeline_parent(parray *backup_list, parray *tli_list,
|
||||
TimeLineID current_tli, time_t current_start_time,
|
||||
InstanceConfig *instance);
|
||||
extern void timelineInfoFree(void *tliInfo);
|
||||
extern parray *catalog_get_timelines(InstanceConfig *instance);
|
||||
extern void do_set_backup(const char *instance_name, time_t backup_id,
|
||||
pgSetBackupParams *set_backup_params);
|
||||
@ -784,6 +801,11 @@ extern int pgBackupCompareIdEqual(const void *l, const void *r);
|
||||
|
||||
extern pgBackup* find_parent_full_backup(pgBackup *current_backup);
|
||||
extern int scan_parent_chain(pgBackup *current_backup, pgBackup **result_backup);
|
||||
/* return codes for scan_parent_chain */
|
||||
#define ChainIsBroken 0
|
||||
#define ChainIsInvalid 1
|
||||
#define ChainIsOk 2
|
||||
|
||||
extern bool is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive);
|
||||
extern bool is_prolific(parray *backup_list, pgBackup *target_backup);
|
||||
extern bool in_backup_list(parray *backup_list, pgBackup *target_backup);
|
||||
@ -847,6 +869,7 @@ extern void pgFileDelete(pgFile *file, const char *full_path);
|
||||
extern void pgFileFree(void *file);
|
||||
|
||||
extern pg_crc32 pgFileGetCRC(const char *file_path, bool missing_ok, bool use_crc32c);
|
||||
extern pg_crc32 pgFileGetCRCgz(const char *file_path, bool missing_ok, bool use_crc32c);
|
||||
|
||||
extern int pgFileCompareName(const void *f1, const void *f2);
|
||||
extern int pgFileComparePath(const void *f1, const void *f2);
|
||||
@ -892,13 +915,16 @@ extern bool create_empty_file(fio_location from_location, const char *to_root,
|
||||
extern bool check_file_pages(pgFile *file, XLogRecPtr stop_lsn,
|
||||
uint32 checksum_version, uint32 backup_version);
|
||||
/* parsexlog.c */
|
||||
extern void extractPageMap(const char *archivedir,
|
||||
TimeLineID tli, uint32 seg_size,
|
||||
XLogRecPtr startpoint, XLogRecPtr endpoint);
|
||||
extern bool extractPageMap(const char *archivedir, uint32 wal_seg_size,
|
||||
XLogRecPtr startpoint, TimeLineID start_tli,
|
||||
XLogRecPtr endpoint, TimeLineID end_tli,
|
||||
parray *tli_list);
|
||||
extern void validate_wal(pgBackup *backup, const char *archivedir,
|
||||
time_t target_time, TransactionId target_xid,
|
||||
XLogRecPtr target_lsn, TimeLineID tli,
|
||||
uint32 seg_size);
|
||||
extern bool validate_wal_segment(TimeLineID tli, XLogSegNo segno,
|
||||
const char *prefetch_dir, uint32 wal_seg_size);
|
||||
extern bool read_recovery_info(const char *archivedir, TimeLineID tli,
|
||||
uint32 seg_size,
|
||||
XLogRecPtr start_lsn, XLogRecPtr stop_lsn,
|
||||
@ -941,7 +967,7 @@ extern int32 do_decompress(void* dst, size_t dst_size, void const* src, size_t
|
||||
CompressAlg alg, const char **errormsg);
|
||||
|
||||
extern void pretty_size(int64 size, char *buf, size_t len);
|
||||
extern void pretty_time_interval(int64 num_seconds, char *buf, size_t len);
|
||||
extern void pretty_time_interval(double time, char *buf, size_t len);
|
||||
|
||||
extern PGconn *pgdata_basic_setup(ConnectionOptions conn_opt, PGNodeInfo *nodeInfo);
|
||||
extern void check_system_identifiers(PGconn *conn, char *pgdata);
|
||||
@ -968,12 +994,23 @@ extern parray * pg_ptrack_get_pagemapset(PGconn *backup_conn, const char *ptrack
|
||||
extern int fio_send_pages(FILE* in, FILE* out, pgFile *file, XLogRecPtr horizonLsn,
|
||||
int calg, int clevel, uint32 checksum_version,
|
||||
datapagemap_t *pagemap, BlockNumber* err_blknum, char **errormsg);
|
||||
|
||||
/* return codes for fio_send_pages */
|
||||
#define WRITE_FAILED (-1)
|
||||
#define REMOTE_ERROR (-2)
|
||||
#define PAGE_CORRUPTION (-3)
|
||||
#define SEND_OK (-4)
|
||||
#define OUT_BUF_SIZE (1024 * 1024)
|
||||
extern int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num);
|
||||
extern int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num);
|
||||
|
||||
/* return codes for fio_send_pages() and fio_send_file() */
|
||||
#define SEND_OK (0)
|
||||
#define FILE_MISSING (-1)
|
||||
#define OPEN_FAILED (-2)
|
||||
#define READ_FAILED (-3)
|
||||
#define WRITE_FAILED (-4)
|
||||
#define ZLIB_ERROR (-5)
|
||||
#define REMOTE_ERROR (-6)
|
||||
#define PAGE_CORRUPTION (-8)
|
||||
|
||||
/* Check if specified location is local for current node */
|
||||
extern bool fio_is_remote(fio_location location);
|
||||
|
||||
extern void get_header_errormsg(Page page, char **errormsg);
|
||||
extern void get_checksum_errormsg(Page page, char **errormsg,
|
||||
|
@ -251,7 +251,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
|
||||
|
||||
result = scan_parent_chain(dest_backup, &tmp_backup);
|
||||
|
||||
if (result == 0)
|
||||
if (result == ChainIsBroken)
|
||||
{
|
||||
/* chain is broken, determine missing backup ID
|
||||
* and orphinize all his descendants
|
||||
@ -290,7 +290,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
|
||||
/* No point in doing futher */
|
||||
elog(ERROR, "%s of backup %s failed.", action, base36enc(dest_backup->start_time));
|
||||
}
|
||||
else if (result == 1)
|
||||
else if (result == ChainIsInvalid)
|
||||
{
|
||||
/* chain is intact, but at least one parent is invalid */
|
||||
set_orphan_status(backups, tmp_backup);
|
||||
@ -403,7 +403,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
|
||||
*/
|
||||
validate_wal(dest_backup, arclog_path, rt->target_time,
|
||||
rt->target_xid, rt->target_lsn,
|
||||
base_full_backup->tli, instance_config.xlog_seg_size);
|
||||
dest_backup->tli, instance_config.xlog_seg_size);
|
||||
}
|
||||
/* Orphanize every OK descendant of corrupted backup */
|
||||
else
|
||||
@ -1326,7 +1326,7 @@ satisfy_timeline(const parray *timelines, const pgBackup *backup)
|
||||
timeline = (TimeLineHistoryEntry *) parray_get(timelines, i);
|
||||
if (backup->tli == timeline->tli &&
|
||||
(XLogRecPtrIsInvalid(timeline->end) ||
|
||||
backup->stop_lsn < timeline->end))
|
||||
backup->stop_lsn <= timeline->end))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
28
src/show.c
28
src/show.c
@ -191,14 +191,18 @@ pretty_size(int64 size, char *buf, size_t len)
|
||||
}
|
||||
|
||||
void
|
||||
pretty_time_interval(int64 num_seconds, char *buf, size_t len)
|
||||
pretty_time_interval(double time, char *buf, size_t len)
|
||||
{
|
||||
int seconds = 0;
|
||||
int minutes = 0;
|
||||
int hours = 0;
|
||||
int days = 0;
|
||||
int num_seconds = 0;
|
||||
int milliseconds = 0;
|
||||
int seconds = 0;
|
||||
int minutes = 0;
|
||||
int hours = 0;
|
||||
int days = 0;
|
||||
|
||||
if (num_seconds <= 0)
|
||||
num_seconds = (int) time;
|
||||
|
||||
if (time <= 0)
|
||||
{
|
||||
strncpy(buf, "0", len);
|
||||
return;
|
||||
@ -214,6 +218,7 @@ pretty_time_interval(int64 num_seconds, char *buf, size_t len)
|
||||
num_seconds %= 60;
|
||||
|
||||
seconds = num_seconds;
|
||||
milliseconds = (int)((time - (int) time) * 1000.0);
|
||||
|
||||
if (days > 0)
|
||||
{
|
||||
@ -233,7 +238,16 @@ pretty_time_interval(int64 num_seconds, char *buf, size_t len)
|
||||
return;
|
||||
}
|
||||
|
||||
snprintf(buf, len, "%ds", seconds);
|
||||
if (seconds > 0)
|
||||
{
|
||||
if (milliseconds > 0)
|
||||
snprintf(buf, len, "%ds:%dms", seconds, milliseconds);
|
||||
else
|
||||
snprintf(buf, len, "%ds", seconds);
|
||||
return;
|
||||
}
|
||||
|
||||
snprintf(buf, len, "%dms", milliseconds);
|
||||
return;
|
||||
}
|
||||
|
||||
|
397
src/utils/file.c
397
src/utils/file.c
@ -14,6 +14,7 @@
|
||||
|
||||
#define PRINTF_BUF_SIZE 1024
|
||||
#define FILE_PERMISSIONS 0600
|
||||
#define CHUNK_SIZE 1024 * 128
|
||||
|
||||
static __thread unsigned long fio_fdset = 0;
|
||||
static __thread void* fio_stdin_buffer;
|
||||
@ -136,7 +137,7 @@ static int remove_file_or_dir(char const* path)
|
||||
#endif
|
||||
|
||||
/* Check if specified location is local for current node */
|
||||
static bool fio_is_remote(fio_location location)
|
||||
bool fio_is_remote(fio_location location)
|
||||
{
|
||||
bool is_remote = MyLocation != FIO_LOCAL_HOST
|
||||
&& location != FIO_LOCAL_HOST
|
||||
@ -340,7 +341,10 @@ int fio_open(char const* path, int mode, fio_location location)
|
||||
hdr.cop = FIO_OPEN;
|
||||
hdr.handle = i;
|
||||
hdr.size = strlen(path) + 1;
|
||||
hdr.arg = mode & ~O_EXCL;
|
||||
hdr.arg = mode;
|
||||
// hdr.arg = mode & ~O_EXCL;
|
||||
// elog(INFO, "PATH: %s MODE: %i, %i", path, mode, O_EXCL);
|
||||
// elog(INFO, "MODE: %i", hdr.arg);
|
||||
fio_fdset |= 1 << i;
|
||||
|
||||
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
@ -490,6 +494,7 @@ int fio_close(int fd)
|
||||
fio_fdset &= ~(1 << hdr.handle);
|
||||
|
||||
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
/* Note, that file is closed without waiting for confirmation */
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -865,6 +870,8 @@ int fio_rename(char const* old_path, char const* new_path, fio_location location
|
||||
IO_CHECK(fio_write_all(fio_stdout, old_path, old_path_len), old_path_len);
|
||||
IO_CHECK(fio_write_all(fio_stdout, new_path, new_path_len), new_path_len);
|
||||
|
||||
//TODO: wait for confirmation.
|
||||
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
@ -916,7 +923,7 @@ int fio_sync(char const* path, fio_location location)
|
||||
}
|
||||
|
||||
/* Get crc32 of file */
|
||||
pg_crc32 fio_get_crc32(const char *file_path, fio_location location)
|
||||
pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decompress)
|
||||
{
|
||||
if (fio_is_remote(location))
|
||||
{
|
||||
@ -926,6 +933,10 @@ pg_crc32 fio_get_crc32(const char *file_path, fio_location location)
|
||||
hdr.cop = FIO_GET_CRC32;
|
||||
hdr.handle = -1;
|
||||
hdr.size = path_len;
|
||||
hdr.arg = 0;
|
||||
|
||||
if (decompress)
|
||||
hdr.arg = 1;
|
||||
|
||||
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
IO_CHECK(fio_write_all(fio_stdout, file_path, path_len), path_len);
|
||||
@ -934,7 +945,12 @@ pg_crc32 fio_get_crc32(const char *file_path, fio_location location)
|
||||
return crc;
|
||||
}
|
||||
else
|
||||
return pgFileGetCRC(file_path, true, true);
|
||||
{
|
||||
if (decompress)
|
||||
return pgFileGetCRCgz(file_path, true, true);
|
||||
else
|
||||
return pgFileGetCRC(file_path, true, true);
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove file */
|
||||
@ -1011,7 +1027,6 @@ int fio_chmod(char const* path, int mode, fio_location location)
|
||||
|
||||
#ifdef HAVE_LIBZ
|
||||
|
||||
|
||||
#define ZLIB_BUFFER_SIZE (64*1024)
|
||||
#define MAX_WBITS 15 /* 32K LZ77 window */
|
||||
#define DEF_MEM_LEVEL 8
|
||||
@ -1027,6 +1042,7 @@ typedef struct fioGZFile
|
||||
Bytef buf[ZLIB_BUFFER_SIZE];
|
||||
} fioGZFile;
|
||||
|
||||
/* On error returns NULL and errno should be checked */
|
||||
gzFile
|
||||
fio_gzopen(char const* path, char const* mode, int level, fio_location location)
|
||||
{
|
||||
@ -1037,6 +1053,7 @@ fio_gzopen(char const* path, char const* mode, int level, fio_location location)
|
||||
memset(&gz->strm, 0, sizeof(gz->strm));
|
||||
gz->eof = 0;
|
||||
gz->errnum = Z_OK;
|
||||
/* check if file opened for writing */
|
||||
if (strcmp(mode, PG_BINARY_W) == 0) /* compress */
|
||||
{
|
||||
gz->strm.next_out = gz->buf;
|
||||
@ -1049,14 +1066,12 @@ fio_gzopen(char const* path, char const* mode, int level, fio_location location)
|
||||
if (rc == Z_OK)
|
||||
{
|
||||
gz->compress = 1;
|
||||
if (fio_access(path, F_OK, location) == 0)
|
||||
gz->fd = fio_open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, location);
|
||||
if (gz->fd < 0)
|
||||
{
|
||||
elog(LOG, "File %s exists", path);
|
||||
free(gz);
|
||||
errno = EEXIST;
|
||||
return NULL;
|
||||
}
|
||||
gz->fd = fio_open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, location);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1069,21 +1084,27 @@ fio_gzopen(char const* path, char const* mode, int level, fio_location location)
|
||||
{
|
||||
gz->compress = 0;
|
||||
gz->fd = fio_open(path, O_RDONLY | PG_BINARY, location);
|
||||
if (gz->fd < 0)
|
||||
{
|
||||
free(gz);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (rc != Z_OK)
|
||||
{
|
||||
free(gz);
|
||||
return NULL;
|
||||
elog(ERROR, "zlib internal error when opening file %s: %s",
|
||||
path, gz->strm.msg);
|
||||
}
|
||||
return (gzFile)((size_t)gz + FIO_GZ_REMOTE_MARKER);
|
||||
}
|
||||
else
|
||||
{
|
||||
gzFile file;
|
||||
/* check if file opened for writing */
|
||||
if (strcmp(mode, PG_BINARY_W) == 0)
|
||||
{
|
||||
int fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, FILE_PERMISSIONS);
|
||||
int fd = open(path, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY, FILE_PERMISSIONS);
|
||||
if (fd < 0)
|
||||
return NULL;
|
||||
file = gzdopen(fd, mode);
|
||||
@ -1143,7 +1164,8 @@ fio_gzread(gzFile f, void *buf, unsigned size)
|
||||
{
|
||||
gz->strm.next_in = gz->buf;
|
||||
}
|
||||
rc = fio_read(gz->fd, gz->strm.next_in + gz->strm.avail_in, gz->buf + ZLIB_BUFFER_SIZE - gz->strm.next_in - gz->strm.avail_in);
|
||||
rc = fio_read(gz->fd, gz->strm.next_in + gz->strm.avail_in,
|
||||
gz->buf + ZLIB_BUFFER_SIZE - gz->strm.next_in - gz->strm.avail_in);
|
||||
if (rc > 0)
|
||||
{
|
||||
gz->strm.avail_in += rc;
|
||||
@ -1282,8 +1304,10 @@ z_off_t fio_gzseek(gzFile f, z_off_t offset, int whence)
|
||||
|
||||
#endif
|
||||
|
||||
/* Send file content */
|
||||
static void fio_send_file(int out, char const* path)
|
||||
/* Send file content
|
||||
* Note: it should not be used for large files.
|
||||
*/
|
||||
static void fio_load_file(int out, char const* path)
|
||||
{
|
||||
int fd = open(path, O_RDONLY);
|
||||
fio_header hdr;
|
||||
@ -1440,7 +1464,7 @@ int fio_send_pages(FILE* in, FILE* out, pgFile *file, XLogRecPtr horizonLsn,
|
||||
file->uncompressed_size += BLCKSZ;
|
||||
}
|
||||
else
|
||||
elog(ERROR, "Remote agent returned message of unknown type");
|
||||
elog(ERROR, "Remote agent returned message of unexpected type: %i", hdr.cop);
|
||||
}
|
||||
|
||||
return n_blocks_read;
|
||||
@ -1607,6 +1631,337 @@ cleanup:
|
||||
return;
|
||||
}
|
||||
|
||||
/* Receive chunks of compressed data, decompress them and write to
|
||||
* destination file.
|
||||
* Return codes:
|
||||
* FILE_MISSING (-1)
|
||||
* OPEN_FAILED (-2)
|
||||
* READ_FAILED (-3)
|
||||
* WRITE_FAILED (-4)
|
||||
* ZLIB_ERROR (-5)
|
||||
* REMOTE_ERROR (-6)
|
||||
*/
|
||||
int fio_send_file_gz(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num)
|
||||
{
|
||||
fio_header hdr;
|
||||
int exit_code = SEND_OK;
|
||||
char *in_buf = pgut_malloc(CHUNK_SIZE); /* buffer for compressed data */
|
||||
char *out_buf = pgut_malloc(OUT_BUF_SIZE); /* 1MB buffer for decompressed data */
|
||||
size_t path_len = strlen(from_fullpath) + 1;
|
||||
/* decompressor */
|
||||
z_stream *strm = NULL;
|
||||
|
||||
hdr.cop = FIO_SEND_FILE;
|
||||
hdr.size = path_len;
|
||||
|
||||
elog(VERBOSE, "Thread [%d]: Attempting to open remote compressed WAL file '%s'",
|
||||
thread_num, from_fullpath);
|
||||
|
||||
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
IO_CHECK(fio_write_all(fio_stdout, from_fullpath, path_len), path_len);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
fio_header hdr;
|
||||
IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
|
||||
if (hdr.cop == FIO_SEND_FILE_EOF)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (hdr.cop == FIO_ERROR)
|
||||
{
|
||||
/* handle error, reported by the agent */
|
||||
if (hdr.size > 0)
|
||||
{
|
||||
IO_CHECK(fio_read_all(fio_stdin, in_buf, hdr.size), hdr.size);
|
||||
elog(WARNING, "Thread [%d]: %s", thread_num, in_buf);
|
||||
}
|
||||
exit_code = hdr.arg;
|
||||
goto cleanup;
|
||||
}
|
||||
else if (hdr.cop == FIO_PAGE)
|
||||
{
|
||||
int rc;
|
||||
Assert(hdr.size <= CHUNK_SIZE);
|
||||
IO_CHECK(fio_read_all(fio_stdin, in_buf, hdr.size), hdr.size);
|
||||
|
||||
/* We have received a chunk of compressed data, lets decompress it */
|
||||
if (strm == NULL)
|
||||
{
|
||||
/* Initialize decompressor */
|
||||
strm = pgut_malloc(sizeof(z_stream));
|
||||
memset(strm, 0, sizeof(z_stream));
|
||||
|
||||
/* The fields next_in, avail_in initialized before init */
|
||||
strm->next_in = (Bytef *)in_buf;
|
||||
strm->avail_in = hdr.size;
|
||||
|
||||
rc = inflateInit2(strm, 15 + 16);
|
||||
|
||||
if (rc != Z_OK)
|
||||
{
|
||||
elog(WARNING, "Thread [%d]: Failed to initialize decompression stream for file '%s': %i: %s",
|
||||
thread_num, from_fullpath, rc, strm->msg);
|
||||
exit_code = ZLIB_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
strm->next_in = (Bytef *)in_buf;
|
||||
strm->avail_in = hdr.size;
|
||||
}
|
||||
|
||||
strm->next_out = (Bytef *)out_buf; /* output buffer */
|
||||
strm->avail_out = OUT_BUF_SIZE; /* free space in output buffer */
|
||||
|
||||
/*
|
||||
* From zlib documentation:
|
||||
* The application must update next_in and avail_in when avail_in
|
||||
* has dropped to zero. It must update next_out and avail_out when
|
||||
* avail_out has dropped to zero.
|
||||
*/
|
||||
while (strm->avail_in != 0) /* while there is data in input buffer, decompress it */
|
||||
{
|
||||
/* decompress until there is no data to decompress,
|
||||
* or buffer with uncompressed data is full
|
||||
*/
|
||||
rc = inflate(strm, Z_NO_FLUSH);
|
||||
if (rc == Z_STREAM_END)
|
||||
/* end of stream */
|
||||
break;
|
||||
else if (rc != Z_OK)
|
||||
{
|
||||
/* got an error */
|
||||
elog(WARNING, "Thread [%d]: Decompression failed for file '%s': %i: %s",
|
||||
thread_num, from_fullpath, rc, strm->msg);
|
||||
exit_code = ZLIB_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (strm->avail_out == 0)
|
||||
{
|
||||
/* Output buffer is full, write it out */
|
||||
if (fwrite(out_buf, 1, OUT_BUF_SIZE, out) != OUT_BUF_SIZE)
|
||||
{
|
||||
elog(WARNING, "Thread [%d]: Cannot write to file '%s': %s",
|
||||
thread_num, to_fullpath, strerror(errno));
|
||||
exit_code = WRITE_FAILED;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
strm->next_out = (Bytef *)out_buf; /* output buffer */
|
||||
strm->avail_out = OUT_BUF_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
/* write out leftovers if any */
|
||||
if (strm->avail_out != OUT_BUF_SIZE)
|
||||
{
|
||||
int len = OUT_BUF_SIZE - strm->avail_out;
|
||||
|
||||
if (fwrite(out_buf, 1, len, out) != len)
|
||||
{
|
||||
elog(WARNING, "Thread [%d]: Cannot write to file: %s",
|
||||
thread_num, strerror(errno));
|
||||
exit_code = WRITE_FAILED;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(WARNING, "Thread [%d]: Remote agent returned message of unexpected type: %i",
|
||||
thread_num, hdr.cop);
|
||||
exit_code = REMOTE_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (exit_code < OPEN_FAILED)
|
||||
fio_disconnect(); /* discard possible pending data in pipe */
|
||||
|
||||
if (strm)
|
||||
{
|
||||
inflateEnd(strm);
|
||||
pg_free(strm);
|
||||
}
|
||||
|
||||
pg_free(in_buf);
|
||||
pg_free(out_buf);
|
||||
return exit_code;
|
||||
}
|
||||
|
||||
/* Receive chunks of data and write them to destination file.
|
||||
* Return codes:
|
||||
* SEND_OK (0)
|
||||
* FILE_MISSING (-1)
|
||||
* OPEN_FAILED (-2)
|
||||
* READ_FAIL (-3)
|
||||
* WRITE_FAIL (-4)
|
||||
*/
|
||||
int fio_send_file(const char *from_fullpath, const char *to_fullpath, FILE* out, int thread_num)
|
||||
{
|
||||
fio_header hdr;
|
||||
int exit_code = SEND_OK;
|
||||
size_t path_len = strlen(from_fullpath) + 1;
|
||||
char *buf = pgut_malloc(CHUNK_SIZE); /* buffer */
|
||||
|
||||
hdr.cop = FIO_SEND_FILE;
|
||||
hdr.size = path_len;
|
||||
|
||||
elog(VERBOSE, "Thread [%d]: Attempting to open remote WAL file '%s'",
|
||||
thread_num, from_fullpath);
|
||||
|
||||
IO_CHECK(fio_write_all(fio_stdout, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
IO_CHECK(fio_write_all(fio_stdout, from_fullpath, path_len), path_len);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* receive data */
|
||||
IO_CHECK(fio_read_all(fio_stdin, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
|
||||
if (hdr.cop == FIO_SEND_FILE_EOF)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (hdr.cop == FIO_ERROR)
|
||||
{
|
||||
/* handle error, reported by the agent */
|
||||
if (hdr.size > 0)
|
||||
{
|
||||
IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size);
|
||||
elog(WARNING, "Thread [%d]: %s", thread_num, buf);
|
||||
}
|
||||
exit_code = hdr.arg;
|
||||
break;
|
||||
}
|
||||
else if (hdr.cop == FIO_PAGE)
|
||||
{
|
||||
Assert(hdr.size <= CHUNK_SIZE);
|
||||
IO_CHECK(fio_read_all(fio_stdin, buf, hdr.size), hdr.size);
|
||||
|
||||
/* We have received a chunk of data data, lets write it out */
|
||||
if (fwrite(buf, 1, hdr.size, out) != hdr.size)
|
||||
{
|
||||
elog(WARNING, "Thread [%d]: Cannot write to file '%s': %s",
|
||||
thread_num, to_fullpath, strerror(errno));
|
||||
exit_code = WRITE_FAILED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* TODO: fio_disconnect may get assert fail when running after this */
|
||||
elog(WARNING, "Thread [%d]: Remote agent returned message of unexpected type: %i",
|
||||
thread_num, hdr.cop);
|
||||
exit_code = REMOTE_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (exit_code < OPEN_FAILED)
|
||||
fio_disconnect(); /* discard possible pending data in pipe */
|
||||
|
||||
pg_free(buf);
|
||||
return exit_code;
|
||||
}
|
||||
|
||||
/* Send file content
|
||||
* On error we return FIO_ERROR message with following codes
|
||||
* FILE_MISSING (-1)
|
||||
* OPEN_FAILED (-2)
|
||||
* READ_FAILED (-3)
|
||||
*
|
||||
*/
|
||||
static void fio_send_file_impl(int out, char const* path)
|
||||
{
|
||||
FILE *fp;
|
||||
fio_header hdr;
|
||||
char *buf = pgut_malloc(CHUNK_SIZE);
|
||||
ssize_t read_len = 0;
|
||||
char *errormsg = NULL;
|
||||
|
||||
/* open source file for read */
|
||||
/* TODO: check that file is regular file */
|
||||
fp = fopen(path, PG_BINARY_R);
|
||||
if (!fp)
|
||||
{
|
||||
hdr.cop = FIO_ERROR;
|
||||
|
||||
/* do not send exact wording of ENOENT error message
|
||||
* because it is a very common error in our case, so
|
||||
* error code is enough.
|
||||
*/
|
||||
if (errno == ENOENT)
|
||||
{
|
||||
hdr.arg = FILE_MISSING;
|
||||
hdr.size = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
hdr.arg = OPEN_FAILED;
|
||||
errormsg = pgut_malloc(MAXPGPATH);
|
||||
/* Construct the error message */
|
||||
snprintf(errormsg, MAXPGPATH, "Cannot open source file '%s': %s", path, strerror(errno));
|
||||
hdr.size = strlen(errormsg) + 1;
|
||||
}
|
||||
|
||||
/* send header and message */
|
||||
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
if (errormsg)
|
||||
IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size);
|
||||
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* copy content */
|
||||
for (;;)
|
||||
{
|
||||
read_len = fread(buf, 1, CHUNK_SIZE, fp);
|
||||
|
||||
/* report error */
|
||||
if (read_len < 0 || (read_len == 0 && !feof(fp)))
|
||||
{
|
||||
hdr.cop = FIO_ERROR;
|
||||
errormsg = pgut_malloc(MAXPGPATH);
|
||||
hdr.arg = READ_FAILED;
|
||||
/* Construct the error message */
|
||||
snprintf(errormsg, MAXPGPATH, "Cannot read source file '%s': %s", path, strerror(errno));
|
||||
hdr.size = strlen(errormsg) + 1;
|
||||
/* send header and message */
|
||||
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
IO_CHECK(fio_write_all(out, errormsg, hdr.size), hdr.size);
|
||||
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
else if (read_len == 0)
|
||||
break;
|
||||
else
|
||||
{
|
||||
/* send chunk */
|
||||
hdr.cop = FIO_PAGE;
|
||||
hdr.size = read_len;
|
||||
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
IO_CHECK(fio_write_all(out, buf, read_len), read_len);
|
||||
}
|
||||
}
|
||||
|
||||
/* we are done, send eof */
|
||||
hdr.cop = FIO_SEND_FILE_EOF;
|
||||
IO_CHECK(fio_write_all(out, &hdr, sizeof(hdr)), sizeof(hdr));
|
||||
|
||||
cleanup:
|
||||
if (fp)
|
||||
fclose(fp);
|
||||
pg_free(buf);
|
||||
pg_free(errormsg);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Execute commands at remote host */
|
||||
void fio_communicate(int in, int out)
|
||||
{
|
||||
@ -1643,7 +1998,7 @@ void fio_communicate(int in, int out)
|
||||
}
|
||||
switch (hdr.cop) {
|
||||
case FIO_LOAD: /* Send file content */
|
||||
fio_send_file(out, buf);
|
||||
fio_load_file(out, buf);
|
||||
break;
|
||||
case FIO_OPENDIR: /* Open directory for traversal */
|
||||
dir[hdr.handle] = opendir(buf);
|
||||
@ -1754,6 +2109,9 @@ void fio_communicate(int in, int out)
|
||||
// buf contain fio_send_request header and bitmap.
|
||||
fio_send_pages_impl(fd[hdr.handle], out, buf, true);
|
||||
break;
|
||||
case FIO_SEND_FILE:
|
||||
fio_send_file_impl(out, buf);
|
||||
break;
|
||||
case FIO_SYNC:
|
||||
/* open file and fsync it */
|
||||
tmp_fd = open(buf, O_WRONLY | PG_BINARY, FILE_PERMISSIONS);
|
||||
@ -1772,7 +2130,10 @@ void fio_communicate(int in, int out)
|
||||
break;
|
||||
case FIO_GET_CRC32:
|
||||
/* calculate crc32 for a file */
|
||||
crc = pgFileGetCRC(buf, true, true);
|
||||
if (hdr.arg == 1)
|
||||
crc = pgFileGetCRCgz(buf, true, true);
|
||||
else
|
||||
crc = pgFileGetCRC(buf, true, true);
|
||||
IO_CHECK(fio_write_all(out, &crc, sizeof(crc)), sizeof(crc));
|
||||
break;
|
||||
case FIO_DISCONNECT:
|
||||
|
@ -40,6 +40,8 @@ typedef enum
|
||||
FIO_SEND_PAGES,
|
||||
FIO_SEND_PAGES_PAGEMAP,
|
||||
FIO_ERROR,
|
||||
FIO_SEND_FILE,
|
||||
// FIO_CHUNK,
|
||||
FIO_SEND_FILE_EOF,
|
||||
FIO_SEND_FILE_CORRUPTION,
|
||||
/* messages for closing connection */
|
||||
@ -100,7 +102,7 @@ extern int fio_truncate(int fd, off_t size);
|
||||
extern int fio_close(int fd);
|
||||
extern void fio_disconnect(void);
|
||||
extern int fio_sync(char const* path, fio_location location);
|
||||
extern pg_crc32 fio_get_crc32(const char *file_path, fio_location location);
|
||||
extern pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decompress);
|
||||
|
||||
extern int fio_rename(char const* old_path, char const* new_path, fio_location location);
|
||||
extern int fio_symlink(char const* target, char const* link_path, fio_location location);
|
||||
|
@ -220,7 +220,7 @@ bool launch_agent(void)
|
||||
return false;
|
||||
} else {
|
||||
#endif
|
||||
elog(LOG, "Spawn agent %d version %s", child_pid, PROGRAM_VERSION);
|
||||
elog(LOG, "Start SSH client process, pid %d", child_pid);
|
||||
SYS_CHECK(close(infd[1])); /* These are being used by the child */
|
||||
SYS_CHECK(close(outfd[0]));
|
||||
SYS_CHECK(close(errfd[1]));
|
||||
|
@ -479,7 +479,7 @@ do_validate_instance(void)
|
||||
result = scan_parent_chain(current_backup, &tmp_backup);
|
||||
|
||||
/* chain is broken */
|
||||
if (result == 0)
|
||||
if (result == ChainIsBroken)
|
||||
{
|
||||
char *parent_backup_id;
|
||||
/* determine missing backup ID */
|
||||
@ -505,7 +505,7 @@ do_validate_instance(void)
|
||||
continue;
|
||||
}
|
||||
/* chain is whole, but at least one parent is invalid */
|
||||
else if (result == 1)
|
||||
else if (result == ChainIsInvalid)
|
||||
{
|
||||
/* Oldest corrupt backup has a chance for revalidation */
|
||||
if (current_backup->start_time != tmp_backup->start_time)
|
||||
@ -630,7 +630,7 @@ do_validate_instance(void)
|
||||
*/
|
||||
result = scan_parent_chain(backup, &tmp_backup);
|
||||
|
||||
if (result == 1)
|
||||
if (result == ChainIsInvalid)
|
||||
{
|
||||
/* revalidation make sense only if oldest invalid backup is current_backup
|
||||
*/
|
||||
|
625
tests/archive.py
625
tests/archive.py
@ -281,7 +281,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
# @unittest.skip("skip")
|
||||
def test_pgpro434_4(self):
|
||||
"""
|
||||
Check pg_stop_backup_timeout, needed backup_timeout
|
||||
Check pg_stop_backup_timeout, libpq-timeout requested.
|
||||
Fixed in commit d84d79668b0c139 and assert fixed by ptrack 1.7
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
@ -398,15 +398,11 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
log_content)
|
||||
|
||||
self.assertIn(
|
||||
'INFO: pg_probackup archive-push from',
|
||||
'pg_probackup archive-push WAL file',
|
||||
log_content)
|
||||
|
||||
self.assertIn(
|
||||
'ERROR: WAL segment ',
|
||||
log_content)
|
||||
|
||||
self.assertIn(
|
||||
'already exists.',
|
||||
'WAL file already exists in archive with different checksum',
|
||||
log_content)
|
||||
|
||||
self.assertNotIn(
|
||||
@ -448,8 +444,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={
|
||||
'checkpoint_timeout': '30s'})
|
||||
pg_options={'checkpoint_timeout': '30s'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
@ -487,9 +482,13 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
self.assertIn(
|
||||
'DETAIL: The failed archive command was:', log_content)
|
||||
self.assertIn(
|
||||
'INFO: pg_probackup archive-push from', log_content)
|
||||
'pg_probackup archive-push WAL file', log_content)
|
||||
self.assertNotIn(
|
||||
'WAL file already exists in archive with '
|
||||
'different checksum, overwriting', log_content)
|
||||
self.assertIn(
|
||||
'{0}" already exists.'.format(filename), log_content)
|
||||
'WAL file already exists in archive with '
|
||||
'different checksum', log_content)
|
||||
|
||||
self.assertNotIn(
|
||||
'pg_probackup archive-push completed successfully', log_content)
|
||||
@ -497,7 +496,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
self.set_archiving(backup_dir, 'node', node, overwrite=True)
|
||||
node.reload()
|
||||
self.switch_wal_segment(node)
|
||||
sleep(2)
|
||||
sleep(5)
|
||||
|
||||
with open(log_file, 'r') as f:
|
||||
log_content = f.read()
|
||||
@ -505,6 +504,10 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
'pg_probackup archive-push completed successfully' in log_content,
|
||||
'Expecting messages about successfull execution archive_command')
|
||||
|
||||
self.assertIn(
|
||||
'WAL file already exists in archive with '
|
||||
'different checksum, overwriting', log_content)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
@ -520,7 +523,9 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
self.set_archiving(
|
||||
backup_dir, 'node', node,
|
||||
log_level='verbose', archive_timeout=60)
|
||||
|
||||
node.slow_start()
|
||||
|
||||
@ -579,12 +584,9 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
log_file = os.path.join(node.logs_dir, 'postgresql.log')
|
||||
with open(log_file, 'r') as f:
|
||||
log_content = f.read()
|
||||
self.assertIn(
|
||||
'Cannot open destination temporary WAL file',
|
||||
log_content)
|
||||
|
||||
self.assertIn(
|
||||
'Reusing stale destination temporary WAL file',
|
||||
'Reusing stale temp WAL file',
|
||||
log_content)
|
||||
|
||||
# Clean after yourself
|
||||
@ -602,7 +604,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node, archive_timeout=60)
|
||||
|
||||
node.slow_start()
|
||||
|
||||
@ -905,8 +907,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={
|
||||
'checkpoint_timeout': '30s',
|
||||
'archive_timeout': '10s'}
|
||||
)
|
||||
'archive_timeout': '10s'})
|
||||
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
@ -923,6 +925,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
"md5(repeat(i::text,10))::tsvector as tsvector "
|
||||
"from generate_series(0,10000) i")
|
||||
|
||||
master.pgbench_init(scale=5)
|
||||
|
||||
# TAKE FULL ARCHIVE BACKUP FROM MASTER
|
||||
self.backup_node(backup_dir, 'master', master)
|
||||
# GET LOGICAL CONTENT FROM MASTER
|
||||
@ -937,11 +941,11 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
pgdata_replica = self.pgdata_content(replica.data_dir)
|
||||
self.compare_pgdata(pgdata_master, pgdata_replica)
|
||||
|
||||
self.set_replica(master, replica, synchronous=True)
|
||||
self.set_replica(master, replica, synchronous=False)
|
||||
# ADD INSTANCE REPLICA
|
||||
# self.add_instance(backup_dir, 'replica', replica)
|
||||
# SET ARCHIVING FOR REPLICA
|
||||
# self.set_archiving(backup_dir, 'replica', replica, replica=True)
|
||||
self.set_archiving(backup_dir, 'master', replica, replica=True)
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
# CHECK LOGICAL CORRECTNESS on REPLICA
|
||||
@ -973,6 +977,18 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
self.assertEqual(
|
||||
'OK', self.show_pb(backup_dir, 'master', backup_id)['status'])
|
||||
|
||||
master.pgbench_init(scale=50)
|
||||
|
||||
sleep(10)
|
||||
|
||||
replica.promote()
|
||||
|
||||
master.pgbench_init(scale=10)
|
||||
replica.pgbench_init(scale=10)
|
||||
|
||||
|
||||
exit(1)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
@ -1718,7 +1734,7 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node, log_level='verbose')
|
||||
node.slow_start()
|
||||
|
||||
backup_id = self.backup_node(backup_dir, 'node', node)
|
||||
@ -1734,6 +1750,8 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
node.slow_start()
|
||||
node.pgbench_init(scale=2)
|
||||
|
||||
sleep(5)
|
||||
|
||||
show = self.show_archive(backup_dir)
|
||||
|
||||
timelines = show[0]['timelines']
|
||||
@ -1755,12 +1773,571 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
tli13['closest-backup-id'])
|
||||
|
||||
self.assertEqual(
|
||||
'0000000D000000000000001B',
|
||||
'0000000D000000000000001C',
|
||||
tli13['max-segno'])
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
@unittest.skip("skip")
|
||||
# @unittest.expectedFailure
|
||||
def test_archiving_and_slots(self):
|
||||
"""
|
||||
Check that archiving don`t break slot
|
||||
guarantee.
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={
|
||||
'autovacuum': 'off',
|
||||
'checkpoint_timeout': '30s',
|
||||
'max_wal_size': '64MB'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node, log_level='verbose')
|
||||
node.slow_start()
|
||||
|
||||
if self.get_version(node) < 100000:
|
||||
pg_receivexlog_path = self.get_bin_path('pg_receivexlog')
|
||||
else:
|
||||
pg_receivexlog_path = self.get_bin_path('pg_receivewal')
|
||||
|
||||
# "pg_receivewal --create-slot --slot archive_slot --if-not-exists "
|
||||
# "&& pg_receivewal --synchronous -Z 1 /tmp/wal --slot archive_slot --no-loop"
|
||||
|
||||
self.run_binary(
|
||||
[
|
||||
pg_receivexlog_path, '-p', str(node.port), '--synchronous',
|
||||
'--create-slot', '--slot', 'archive_slot', '--if-not-exists'
|
||||
])
|
||||
|
||||
node.pgbench_init(scale=10)
|
||||
|
||||
pg_receivexlog = self.run_binary(
|
||||
[
|
||||
pg_receivexlog_path, '-p', str(node.port), '--synchronous',
|
||||
'-D', os.path.join(backup_dir, 'wal', 'node'),
|
||||
'--no-loop', '--slot', 'archive_slot',
|
||||
'-Z', '1'
|
||||
], asynchronous=True)
|
||||
|
||||
if pg_receivexlog.returncode:
|
||||
self.assertFalse(
|
||||
True,
|
||||
'Failed to start pg_receivexlog: {0}'.format(
|
||||
pg_receivexlog.communicate()[1]))
|
||||
|
||||
sleep(2)
|
||||
|
||||
pg_receivexlog.kill()
|
||||
|
||||
backup_id = self.backup_node(backup_dir, 'node', node)
|
||||
node.pgbench_init(scale=20)
|
||||
|
||||
exit(1)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_archive_push_sanity(self):
|
||||
""""""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={
|
||||
'archive_mode': 'on',
|
||||
'archive_command': 'exit 1'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
|
||||
node.slow_start()
|
||||
|
||||
node.pgbench_init(scale=50)
|
||||
node.stop()
|
||||
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
os.remove(os.path.join(node.logs_dir, 'postgresql.log'))
|
||||
node.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f:
|
||||
postgres_log_content = f.read()
|
||||
|
||||
# print(postgres_log_content)
|
||||
# make sure that .backup file is not compressed
|
||||
self.assertNotIn('.backup.gz', postgres_log_content)
|
||||
self.assertNotIn('WARNING', postgres_log_content)
|
||||
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
|
||||
self.restore_node(
|
||||
backup_dir, 'node', replica,
|
||||
data_dir=replica.data_dir, options=['-R'])
|
||||
|
||||
#self.set_archiving(backup_dir, 'replica', replica, replica=True)
|
||||
self.set_auto_conf(replica, {'port': replica.port})
|
||||
self.set_auto_conf(replica, {'archive_mode': 'always'})
|
||||
self.set_auto_conf(replica, {'hot_standby': 'on'})
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
self.wait_until_replica_catch_with_master(node, replica)
|
||||
|
||||
node.pgbench_init(scale=5)
|
||||
|
||||
replica.promote()
|
||||
replica.pgbench_init(scale=10)
|
||||
|
||||
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
|
||||
replica_log_content = f.read()
|
||||
|
||||
# make sure that .partial file is not compressed
|
||||
self.assertNotIn('.partial.gz', replica_log_content)
|
||||
# make sure that .history file is not compressed
|
||||
self.assertNotIn('.history.gz', replica_log_content)
|
||||
self.assertNotIn('WARNING', replica_log_content)
|
||||
|
||||
output = self.show_archive(
|
||||
backup_dir, 'node', as_json=False, as_text=True,
|
||||
options=['--log-level-console=VERBOSE'])
|
||||
|
||||
self.assertNotIn('WARNING', output)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.expectedFailure
|
||||
# @unittest.skip("skip")
|
||||
def test_archive_pg_receivexlog_partial_handling(self):
|
||||
"""check that archive-get delivers .partial and .gz.partial files"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
|
||||
node.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'node', node, options=['--stream'])
|
||||
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
|
||||
self.restore_node(
|
||||
backup_dir, 'node', replica, replica.data_dir, options=['-R'])
|
||||
self.set_auto_conf(replica, {'port': replica.port})
|
||||
self.set_replica(node, replica)
|
||||
|
||||
self.add_instance(backup_dir, 'replica', replica)
|
||||
# self.set_archiving(backup_dir, 'replica', replica, replica=True)
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
node.safe_psql('postgres', 'CHECKPOINT')
|
||||
|
||||
if self.get_version(replica) < 100000:
|
||||
pg_receivexlog_path = self.get_bin_path('pg_receivexlog')
|
||||
else:
|
||||
pg_receivexlog_path = self.get_bin_path('pg_receivewal')
|
||||
|
||||
cmdline = [
|
||||
pg_receivexlog_path, '-p', str(replica.port), '--synchronous',
|
||||
'-D', os.path.join(backup_dir, 'wal', 'replica')]
|
||||
|
||||
if self.archive_compress and node.major_version >= 10:
|
||||
cmdline += ['-Z', '1']
|
||||
|
||||
pg_receivexlog = self.run_binary(cmdline, asynchronous=True)
|
||||
|
||||
if pg_receivexlog.returncode:
|
||||
self.assertFalse(
|
||||
True,
|
||||
'Failed to start pg_receivexlog: {0}'.format(
|
||||
pg_receivexlog.communicate()[1]))
|
||||
|
||||
node.safe_psql(
|
||||
"postgres",
|
||||
"create table t_heap as select i as id, md5(i::text) as text, "
|
||||
"md5(repeat(i::text,10))::tsvector as tsvector "
|
||||
"from generate_series(0,1000000) i")
|
||||
|
||||
# FULL
|
||||
self.backup_node(backup_dir, 'replica', replica, options=['--stream'])
|
||||
|
||||
node.safe_psql(
|
||||
"postgres",
|
||||
"insert into t_heap select i as id, md5(i::text) as text, "
|
||||
"md5(repeat(i::text,10))::tsvector as tsvector "
|
||||
"from generate_series(1000000,2000000) i")
|
||||
|
||||
node_restored = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node_restored'))
|
||||
node_restored.cleanup()
|
||||
|
||||
self.restore_node(
|
||||
backup_dir, 'replica', node_restored,
|
||||
node_restored.data_dir, options=['--recovery-target=latest', '--recovery-target-action=promote'])
|
||||
self.set_auto_conf(node_restored, {'port': node_restored.port})
|
||||
self.set_auto_conf(node_restored, {'hot_standby': 'off'})
|
||||
|
||||
# it will set node_restored as warm standby.
|
||||
# with open(os.path.join(node_restored.data_dir, "standby.signal"), 'w') as f:
|
||||
# f.flush()
|
||||
# f.close()
|
||||
|
||||
node_restored.slow_start()
|
||||
|
||||
result = node.safe_psql(
|
||||
"postgres",
|
||||
"select sum(id) from t_heap")
|
||||
|
||||
result_new = node_restored.safe_psql(
|
||||
"postgres",
|
||||
"select sum(id) from t_heap")
|
||||
|
||||
self.assertEqual(result, result_new)
|
||||
|
||||
# Clean after yourself
|
||||
pg_receivexlog.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_multi_timeline_recovery_prefetching(self):
|
||||
""""""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={'autovacuum': 'off'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
|
||||
node.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
node.pgbench_init(scale=50)
|
||||
|
||||
target_xid = node.safe_psql(
|
||||
'postgres',
|
||||
'select txid_current()').rstrip()
|
||||
|
||||
node.pgbench_init(scale=20)
|
||||
|
||||
node.stop()
|
||||
node.cleanup()
|
||||
|
||||
self.restore_node(
|
||||
backup_dir, 'node', node,
|
||||
options=[
|
||||
'--recovery-target-xid={0}'.format(target_xid),
|
||||
'--recovery-target-action=promote'])
|
||||
|
||||
node.slow_start()
|
||||
|
||||
node.pgbench_init(scale=20)
|
||||
|
||||
target_xid = node.safe_psql(
|
||||
'postgres',
|
||||
'select txid_current()').rstrip()
|
||||
|
||||
node.stop(['-m', 'immediate', '-D', node.data_dir])
|
||||
node.cleanup()
|
||||
|
||||
self.restore_node(
|
||||
backup_dir, 'node', node,
|
||||
options=[
|
||||
# '--recovery-target-xid={0}'.format(target_xid),
|
||||
'--recovery-target-timeline=2',
|
||||
# '--recovery-target-action=promote',
|
||||
'--no-validate'])
|
||||
node.slow_start()
|
||||
|
||||
node.pgbench_init(scale=20)
|
||||
result = node.safe_psql(
|
||||
'postgres',
|
||||
'select * from pgbench_accounts')
|
||||
node.stop()
|
||||
node.cleanup()
|
||||
|
||||
self.restore_node(
|
||||
backup_dir, 'node', node,
|
||||
options=[
|
||||
# '--recovery-target-xid=100500',
|
||||
'--recovery-target-timeline=3',
|
||||
# '--recovery-target-action=promote',
|
||||
'--no-validate'])
|
||||
os.remove(os.path.join(node.logs_dir, 'postgresql.log'))
|
||||
|
||||
restore_command = self.get_restore_command(backup_dir, 'node', node)
|
||||
restore_command += ' -j 2 --batch-size=10 --log-level-console=VERBOSE'
|
||||
|
||||
if node.major_version >= 12:
|
||||
node.append_conf(
|
||||
'probackup_recovery.conf', "restore_command = '{0}'".format(restore_command))
|
||||
else:
|
||||
node.append_conf(
|
||||
'recovery.conf', "restore_command = '{0}'".format(restore_command))
|
||||
|
||||
node.slow_start()
|
||||
|
||||
result_new = node.safe_psql(
|
||||
'postgres',
|
||||
'select * from pgbench_accounts')
|
||||
|
||||
self.assertEqual(result, result_new)
|
||||
|
||||
with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f:
|
||||
postgres_log_content = f.read()
|
||||
|
||||
# check that requesting of non-existing segment do not
|
||||
# throwns aways prefetch
|
||||
self.assertIn(
|
||||
'pg_probackup archive-get failed to '
|
||||
'deliver WAL file: 000000030000000000000006',
|
||||
postgres_log_content)
|
||||
|
||||
self.assertIn(
|
||||
'pg_probackup archive-get failed to '
|
||||
'deliver WAL file: 000000020000000000000006',
|
||||
postgres_log_content)
|
||||
|
||||
self.assertIn(
|
||||
'pg_probackup archive-get used prefetched '
|
||||
'WAL segment 000000010000000000000006, prefetch state: 5/10',
|
||||
postgres_log_content)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_archive_get_batching_sanity(self):
|
||||
"""
|
||||
Make sure that batching works.
|
||||
.gz file is corrupted and uncompressed is not, check that both
|
||||
corruption detected and uncompressed file is used.
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={'autovacuum': 'off'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
|
||||
node.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'node', node, options=['--stream'])
|
||||
|
||||
node.pgbench_init(scale=50)
|
||||
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
|
||||
self.restore_node(
|
||||
backup_dir, 'node', replica, replica.data_dir)
|
||||
self.set_replica(node, replica, log_shipping=True)
|
||||
|
||||
if node.major_version >= 12:
|
||||
self.set_auto_conf(replica, {'restore_command': 'exit 1'})
|
||||
else:
|
||||
replica.append_conf('recovery.conf', "restore_command = 'exit 1'")
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
# at this point replica is consistent
|
||||
restore_command = self.get_restore_command(backup_dir, 'node', replica)
|
||||
|
||||
restore_command += ' -j 2 --batch-size=10'
|
||||
|
||||
print(restore_command)
|
||||
|
||||
if node.major_version >= 12:
|
||||
self.set_auto_conf(replica, {'restore_command': restore_command})
|
||||
else:
|
||||
replica.append_conf(
|
||||
'recovery.conf', "restore_command = '{0}'".format(restore_command))
|
||||
|
||||
replica.restart()
|
||||
|
||||
sleep(5)
|
||||
|
||||
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
|
||||
postgres_log_content = f.read()
|
||||
|
||||
self.assertIn(
|
||||
'pg_probackup archive-get completed successfully, fetched: 10/10',
|
||||
postgres_log_content)
|
||||
self.assertIn('used prefetched WAL segment', postgres_log_content)
|
||||
self.assertIn('prefetch state: 9/10', postgres_log_content)
|
||||
self.assertIn('prefetch state: 8/10', postgres_log_content)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_archive_get_prefetch_corruption(self):
|
||||
"""
|
||||
Make sure that WAL corruption is detected.
|
||||
And --prefetch-dir is honored.
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={'autovacuum': 'off', 'wal_keep_segments': '200'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
|
||||
node.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'node', node, options=['--stream'])
|
||||
|
||||
node.pgbench_init(scale=50)
|
||||
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
|
||||
self.restore_node(
|
||||
backup_dir, 'node', replica, replica.data_dir)
|
||||
self.set_replica(node, replica, log_shipping=True)
|
||||
|
||||
if node.major_version >= 12:
|
||||
self.set_auto_conf(replica, {'restore_command': 'exit 1'})
|
||||
else:
|
||||
replica.append_conf('recovery.conf', "restore_command = 'exit 1'")
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
# at this point replica is consistent
|
||||
restore_command = self.get_restore_command(backup_dir, 'node', replica)
|
||||
|
||||
restore_command += ' -j 2 --batch-size=10 --log-level-console=VERBOSE'
|
||||
#restore_command += ' --batch-size=2 --log-level-console=VERBOSE'
|
||||
|
||||
if node.major_version >= 12:
|
||||
self.set_auto_conf(replica, {'restore_command': restore_command})
|
||||
else:
|
||||
replica.append_conf(
|
||||
'recovery.conf', "restore_command = '{0}'".format(restore_command))
|
||||
|
||||
replica.restart()
|
||||
|
||||
sleep(5)
|
||||
|
||||
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
|
||||
postgres_log_content = f.read()
|
||||
|
||||
self.assertIn(
|
||||
'pg_probackup archive-get completed successfully, fetched: 10/10',
|
||||
postgres_log_content)
|
||||
self.assertIn('used prefetched WAL segment', postgres_log_content)
|
||||
self.assertIn('prefetch state: 9/10', postgres_log_content)
|
||||
self.assertIn('prefetch state: 8/10', postgres_log_content)
|
||||
|
||||
replica.stop()
|
||||
|
||||
# generate WAL, copy it into prefetch directory, then corrupt
|
||||
# some segment
|
||||
node.pgbench_init(scale=20)
|
||||
sleep(10)
|
||||
|
||||
# now copy WAL files into prefetch directory and corrupt some of them
|
||||
archive_dir = os.path.join(backup_dir, 'wal', 'node')
|
||||
files = os.listdir(archive_dir)
|
||||
files.sort()
|
||||
|
||||
for filename in [files[-4], files[-3], files[-2], files[-1]]:
|
||||
src_file = os.path.join(archive_dir, filename)
|
||||
|
||||
if node.major_version >= 10:
|
||||
wal_dir = 'pg_wal'
|
||||
else:
|
||||
wal_dir = 'pg_xlog'
|
||||
|
||||
if filename.endswith('.gz'):
|
||||
dst_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename[:-3])
|
||||
with gzip.open(src_file, 'rb') as f_in, open(dst_file, 'wb') as f_out:
|
||||
shutil.copyfileobj(f_in, f_out)
|
||||
else:
|
||||
dst_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename)
|
||||
shutil.copyfile(src_file, dst_file)
|
||||
|
||||
print(dst_file)
|
||||
|
||||
# corrupt file
|
||||
if files[-2].endswith('.gz'):
|
||||
filename = files[-2][:-3]
|
||||
else:
|
||||
filename = files[-2]
|
||||
|
||||
prefetched_file = os.path.join(replica.data_dir, wal_dir, 'pbk_prefetch', filename)
|
||||
|
||||
with open(prefetched_file, "rb+", 0) as f:
|
||||
f.seek(8192*2)
|
||||
f.write(b"SURIKEN")
|
||||
f.flush()
|
||||
f.close
|
||||
|
||||
# enable restore_command
|
||||
restore_command = self.get_restore_command(backup_dir, 'node', replica)
|
||||
restore_command += ' --batch-size=2 --log-level-console=VERBOSE'
|
||||
|
||||
if node.major_version >= 12:
|
||||
self.set_auto_conf(replica, {'restore_command': restore_command})
|
||||
else:
|
||||
replica.append_conf(
|
||||
'recovery.conf', "restore_command = '{0}'".format(restore_command))
|
||||
|
||||
os.remove(os.path.join(replica.logs_dir, 'postgresql.log'))
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
sleep(10)
|
||||
|
||||
with open(os.path.join(replica.logs_dir, 'postgresql.log'), 'r') as f:
|
||||
postgres_log_content = f.read()
|
||||
|
||||
self.assertIn(
|
||||
'Prefetched WAL segment {0} is invalid, cannot use it'.format(filename),
|
||||
postgres_log_content)
|
||||
|
||||
self.assertIn(
|
||||
'LOG: restored log file "{0}" from archive'.format(filename),
|
||||
postgres_log_content)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# TODO test with multiple not archived segments.
|
||||
# TODO corrupted file in archive.
|
||||
|
||||
# important - switchpoint may be NullOffset LSN and not actually existing in archive to boot.
|
||||
# so write WAL validation code accordingly
|
||||
|
||||
|
@ -228,10 +228,9 @@ class BackupTest(ProbackupTest, unittest.TestCase):
|
||||
"without valid full backup.\n Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertIn(
|
||||
"ERROR: Valid backup on current timeline 1 is not found. "
|
||||
"Create new FULL backup before an incremental one.",
|
||||
e.message,
|
||||
self.assertTrue(
|
||||
"WARNING: Valid backup on current timeline 1 is not found" in e.message and
|
||||
"ERROR: Create new full backup before an incremental one" in e.message,
|
||||
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
@ -2294,10 +2293,9 @@ class BackupTest(ProbackupTest, unittest.TestCase):
|
||||
"\n Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertIn(
|
||||
'ERROR: Valid backup on current timeline 1 is not found. '
|
||||
'Create new FULL backup before an incremental one.',
|
||||
e.message,
|
||||
self.assertTrue(
|
||||
'WARNING: Valid backup on current timeline 1 is not found' in e.message and
|
||||
'ERROR: Create new full backup before an incremental one' in e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
@ -2324,10 +2322,13 @@ class BackupTest(ProbackupTest, unittest.TestCase):
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={
|
||||
'archive_timeout': '30s',
|
||||
'checkpoint_timeout': '1h'})
|
||||
'archive_mode': 'always',
|
||||
'checkpoint_timeout': '60s',
|
||||
'wal_level': 'logical'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_config(backup_dir, 'node', options=['--archive-timeout=60s'])
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
node.slow_start()
|
||||
|
||||
@ -2447,12 +2448,15 @@ class BackupTest(ProbackupTest, unittest.TestCase):
|
||||
self.restore_node(backup_dir, 'node', replica)
|
||||
self.set_replica(node, replica)
|
||||
self.add_instance(backup_dir, 'replica', replica)
|
||||
self.set_config(
|
||||
backup_dir, 'replica',
|
||||
options=['--archive-timeout=120s', '--log-level-console=LOG'])
|
||||
self.set_archiving(backup_dir, 'replica', replica, replica=True)
|
||||
self.set_auto_conf(replica, {'hot_standby': 'on'})
|
||||
|
||||
# freeze bgwriter to get rid of RUNNING XACTS records
|
||||
bgwriter_pid = node.auxiliary_pids[ProcessType.BackgroundWriter][0]
|
||||
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
|
||||
# bgwriter_pid = node.auxiliary_pids[ProcessType.BackgroundWriter][0]
|
||||
# gdb_checkpointer = self.gdb_attach(bgwriter_pid)
|
||||
|
||||
copy_tree(
|
||||
os.path.join(backup_dir, 'wal', 'node'),
|
||||
@ -2460,21 +2464,22 @@ class BackupTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
self.switch_wal_segment(node)
|
||||
self.switch_wal_segment(node)
|
||||
# self.switch_wal_segment(node)
|
||||
# self.switch_wal_segment(node)
|
||||
|
||||
# FULL backup from replica
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica,
|
||||
datname='backupdb', options=['--stream', '-U', 'backup', '--archive-timeout=30s'])
|
||||
datname='backupdb', options=['-U', 'backup'])
|
||||
|
||||
# stream full backup from replica
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica,
|
||||
datname='backupdb', options=['--stream', '-U', 'backup'])
|
||||
|
||||
# self.switch_wal_segment(node)
|
||||
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica, datname='backupdb',
|
||||
options=['-U', 'backup', '--archive-timeout=300s'])
|
||||
|
||||
# PAGE backup from replica
|
||||
self.switch_wal_segment(node)
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica, backup_type='page',
|
||||
datname='backupdb', options=['-U', 'backup', '--archive-timeout=30s'])
|
||||
@ -2484,20 +2489,22 @@ class BackupTest(ProbackupTest, unittest.TestCase):
|
||||
datname='backupdb', options=['--stream', '-U', 'backup'])
|
||||
|
||||
# DELTA backup from replica
|
||||
self.switch_wal_segment(node)
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica, backup_type='delta',
|
||||
datname='backupdb', options=['-U', 'backup', '--archive-timeout=30s'])
|
||||
datname='backupdb', options=['-U', 'backup'])
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica, backup_type='delta',
|
||||
datname='backupdb', options=['--stream', '-U', 'backup'])
|
||||
|
||||
# PTRACK backup from replica
|
||||
if self.ptrack:
|
||||
self.switch_wal_segment(node)
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica, backup_type='delta',
|
||||
datname='backupdb', options=['-U', 'backup', '--archive-timeout=30s'])
|
||||
backup_dir, 'replica', replica, backup_type='ptrack',
|
||||
datname='backupdb', options=['-U', 'backup'])
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica, backup_type='delta',
|
||||
backup_dir, 'replica', replica, backup_type='ptrack',
|
||||
datname='backupdb', options=['--stream', '-U', 'backup'])
|
||||
|
||||
# Clean after yourself
|
||||
|
@ -339,7 +339,7 @@ class ProbackupTest(object):
|
||||
options['wal_level'] = 'logical'
|
||||
options['hot_standby'] = 'off'
|
||||
|
||||
options['log_line_prefix'] = '"%t [%p]: [%l-1] "'
|
||||
options['log_line_prefix'] = '%t [%p]: [%l-1] '
|
||||
options['log_statement'] = 'none'
|
||||
options['log_duration'] = 'on'
|
||||
options['log_min_duration_statement'] = 0
|
||||
@ -1131,7 +1131,8 @@ class ProbackupTest(object):
|
||||
|
||||
def set_archiving(
|
||||
self, backup_dir, instance, node, replica=False,
|
||||
overwrite=False, compress=False, old_binary=False):
|
||||
overwrite=False, compress=False, old_binary=False,
|
||||
log_level=False, archive_timeout=False):
|
||||
|
||||
# parse postgresql.auto.conf
|
||||
options = {}
|
||||
@ -1161,12 +1162,26 @@ class ProbackupTest(object):
|
||||
if overwrite:
|
||||
options['archive_command'] += '--overwrite '
|
||||
|
||||
options['archive_command'] += '--log-level-console=verbose '
|
||||
options['archive_command'] += '-j 5 '
|
||||
options['archive_command'] += '--batch-size 10 '
|
||||
options['archive_command'] += '--no-sync '
|
||||
|
||||
if archive_timeout:
|
||||
options['archive_command'] += '--archive-timeout={0} '.format(
|
||||
archive_timeout)
|
||||
|
||||
if os.name == 'posix':
|
||||
options['archive_command'] += '--wal-file-path=%p --wal-file-name=%f'
|
||||
|
||||
elif os.name == 'nt':
|
||||
options['archive_command'] += '--wal-file-path="%p" --wal-file-name="%f"'
|
||||
|
||||
if log_level:
|
||||
options['archive_command'] += ' --log-level-console={0}'.format(log_level)
|
||||
options['archive_command'] += ' --log-level-file={0} '.format(log_level)
|
||||
|
||||
|
||||
self.set_auto_conf(node, options)
|
||||
|
||||
def get_restore_command(self, backup_dir, instance, node):
|
||||
@ -1244,7 +1259,8 @@ class ProbackupTest(object):
|
||||
def set_replica(
|
||||
self, master, replica,
|
||||
replica_name='replica',
|
||||
synchronous=False
|
||||
synchronous=False,
|
||||
log_shipping=False
|
||||
):
|
||||
|
||||
self.set_auto_conf(
|
||||
@ -1264,19 +1280,22 @@ class ProbackupTest(object):
|
||||
if os.stat(probackup_recovery_path).st_size > 0:
|
||||
config = 'probackup_recovery.conf'
|
||||
|
||||
self.set_auto_conf(
|
||||
replica,
|
||||
{'primary_conninfo': 'user={0} port={1} application_name={2} '
|
||||
' sslmode=prefer sslcompression=1'.format(
|
||||
self.user, master.port, replica_name)},
|
||||
config)
|
||||
if not log_shipping:
|
||||
self.set_auto_conf(
|
||||
replica,
|
||||
{'primary_conninfo': 'user={0} port={1} application_name={2} '
|
||||
' sslmode=prefer sslcompression=1'.format(
|
||||
self.user, master.port, replica_name)},
|
||||
config)
|
||||
else:
|
||||
replica.append_conf('recovery.conf', 'standby_mode = on')
|
||||
replica.append_conf(
|
||||
'recovery.conf',
|
||||
"primary_conninfo = 'user={0} port={1} application_name={2}"
|
||||
" sslmode=prefer sslcompression=1'".format(
|
||||
self.user, master.port, replica_name))
|
||||
|
||||
if not log_shipping:
|
||||
replica.append_conf(
|
||||
'recovery.conf',
|
||||
"primary_conninfo = 'user={0} port={1} application_name={2}"
|
||||
" sslmode=prefer sslcompression=1'".format(
|
||||
self.user, master.port, replica_name))
|
||||
|
||||
if synchronous:
|
||||
self.set_auto_conf(
|
||||
|
@ -819,7 +819,7 @@ class PageTest(ProbackupTest, unittest.TestCase):
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
# make some wals
|
||||
node.pgbench_init(scale=4)
|
||||
node.pgbench_init(scale=10)
|
||||
|
||||
# delete last wal segment
|
||||
wals_dir = os.path.join(backup_dir, 'wal', 'node')
|
||||
@ -874,7 +874,6 @@ class PageTest(ProbackupTest, unittest.TestCase):
|
||||
'INFO: Wait for WAL segment' in e.message and
|
||||
'to be archived' in e.message and
|
||||
'Could not read WAL record at' in e.message and
|
||||
'incorrect resource manager data checksum in record at' in e.message and
|
||||
'Possible WAL corruption. Error has occured during reading WAL segment' in e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
@ -899,7 +898,6 @@ class PageTest(ProbackupTest, unittest.TestCase):
|
||||
'INFO: Wait for WAL segment' in e.message and
|
||||
'to be archived' in e.message and
|
||||
'Could not read WAL record at' in e.message and
|
||||
'incorrect resource manager data checksum in record at' in e.message and
|
||||
'Possible WAL corruption. Error has occured during reading WAL segment "{0}"'.format(
|
||||
file) in e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
@ -942,8 +940,10 @@ class PageTest(ProbackupTest, unittest.TestCase):
|
||||
self.set_archiving(backup_dir, 'alien_node', alien_node)
|
||||
alien_node.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'alien_node', alien_node)
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, options=['--stream'])
|
||||
self.backup_node(
|
||||
backup_dir, 'alien_node', alien_node, options=['--stream'])
|
||||
|
||||
# make some wals
|
||||
node.safe_psql(
|
||||
@ -996,8 +996,6 @@ class PageTest(ProbackupTest, unittest.TestCase):
|
||||
'INFO: Wait for WAL segment' in e.message and
|
||||
'to be archived' in e.message and
|
||||
'Could not read WAL record at' in e.message and
|
||||
'WAL file is from different database system: WAL file database system identifier is' in e.message and
|
||||
'pg_control database system identifier is' in e.message and
|
||||
'Possible WAL corruption. Error has occured during reading WAL segment' in e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
@ -1181,6 +1179,85 @@ class PageTest(ProbackupTest, unittest.TestCase):
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
# @unittest.expectedFailure
|
||||
def test_multi_timeline_page(self):
|
||||
"""
|
||||
Check that backup in PAGE mode choose
|
||||
parent backup correctly:
|
||||
t12 /---P-->
|
||||
...
|
||||
t3 /---->
|
||||
t2 /---->
|
||||
t1 -F-----D->
|
||||
|
||||
P must have F as parent
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={'autovacuum': 'off'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
node.slow_start()
|
||||
|
||||
node.pgbench_init(scale=50)
|
||||
full_id = self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum'])
|
||||
pgbench.wait()
|
||||
|
||||
self.backup_node(backup_dir, 'node', node, backup_type='delta')
|
||||
|
||||
node.cleanup()
|
||||
self.restore_node(
|
||||
backup_dir, 'node', node, backup_id=full_id,
|
||||
options=[
|
||||
'--recovery-target=immediate',
|
||||
'--recovery-target-action=promote'])
|
||||
|
||||
node.slow_start()
|
||||
|
||||
pgbench = node.pgbench(options=['-T', '20', '-c', '1', '--no-vacuum'])
|
||||
pgbench.wait()
|
||||
|
||||
# create timelines
|
||||
for i in range(2, 12):
|
||||
node.cleanup()
|
||||
self.restore_node(
|
||||
backup_dir, 'node', node, backup_id=full_id,
|
||||
options=['--recovery-target-timeline={0}'.format(i)])
|
||||
node.slow_start()
|
||||
pgbench = node.pgbench(options=['-T', '3', '-c', '1', '--no-vacuum'])
|
||||
pgbench.wait()
|
||||
|
||||
page_id = self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page',
|
||||
options=['--log-level-file=VERBOSE'])
|
||||
|
||||
pgdata = self.pgdata_content(node.data_dir)
|
||||
node.cleanup()
|
||||
self.restore_node(backup_dir, 'node', node)
|
||||
pgdata_restored = self.pgdata_content(node.data_dir)
|
||||
self.compare_pgdata(pgdata, pgdata_restored)
|
||||
|
||||
show = self.show_archive(backup_dir)
|
||||
|
||||
timelines = show[0]['timelines']
|
||||
|
||||
# self.assertEqual()
|
||||
self.assertEqual(
|
||||
self.show_pb(backup_dir, 'node', page_id)['parent-backup-id'],
|
||||
full_id)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
@unittest.skip("skip")
|
||||
# @unittest.expectedFailure
|
||||
def test_page_pg_resetxlog(self):
|
||||
|
151
tests/ptrack.py
151
tests/ptrack.py
@ -3,10 +3,10 @@ import unittest
|
||||
from .helpers.ptrack_helpers import ProbackupTest, ProbackupException, idx_ptrack
|
||||
from datetime import datetime, timedelta
|
||||
import subprocess
|
||||
from testgres import QueryException
|
||||
from testgres import QueryException, StartNodeException
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from time import sleep
|
||||
from threading import Thread
|
||||
|
||||
|
||||
@ -210,46 +210,36 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
|
||||
"GRANT EXECUTE ON FUNCTION pg_catalog.txid_snapshot_xmax(txid_snapshot) TO backup;"
|
||||
)
|
||||
|
||||
if self.ptrack:
|
||||
fnames = []
|
||||
if node.major_version < 12:
|
||||
fnames += [
|
||||
'pg_catalog.oideq(oid, oid)',
|
||||
'pg_catalog.ptrack_version()',
|
||||
'pg_catalog.pg_ptrack_clear()',
|
||||
'pg_catalog.pg_ptrack_control_lsn()',
|
||||
'pg_catalog.pg_ptrack_get_and_clear_db(oid, oid)',
|
||||
'pg_catalog.pg_ptrack_get_and_clear(oid, oid)',
|
||||
'pg_catalog.pg_ptrack_get_block_2(oid, oid, oid, bigint)'
|
||||
]
|
||||
else:
|
||||
# TODO why backup works without these grants ?
|
||||
# fnames += [
|
||||
# 'pg_ptrack_get_pagemapset(pg_lsn)',
|
||||
# 'pg_ptrack_control_lsn()',
|
||||
# 'pg_ptrack_get_block(oid, oid, oid, bigint)'
|
||||
# ]
|
||||
|
||||
node.safe_psql(
|
||||
"backupdb",
|
||||
"CREATE SCHEMA ptrack")
|
||||
|
||||
node.safe_psql(
|
||||
"backupdb",
|
||||
"CREATE EXTENSION ptrack WITH SCHEMA ptrack")
|
||||
|
||||
node.safe_psql(
|
||||
"backupdb",
|
||||
"GRANT USAGE ON SCHEMA ptrack TO backup")
|
||||
if node.major_version < 12:
|
||||
fnames = [
|
||||
'pg_catalog.oideq(oid, oid)',
|
||||
'pg_catalog.ptrack_version()',
|
||||
'pg_catalog.pg_ptrack_clear()',
|
||||
'pg_catalog.pg_ptrack_control_lsn()',
|
||||
'pg_catalog.pg_ptrack_get_and_clear_db(oid, oid)',
|
||||
'pg_catalog.pg_ptrack_get_and_clear(oid, oid)',
|
||||
'pg_catalog.pg_ptrack_get_block_2(oid, oid, oid, bigint)'
|
||||
]
|
||||
|
||||
for fname in fnames:
|
||||
node.safe_psql(
|
||||
"backupdb",
|
||||
"GRANT EXECUTE ON FUNCTION {0} TO backup".format(fname))
|
||||
|
||||
else:
|
||||
node.safe_psql(
|
||||
"backupdb",
|
||||
"GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup")
|
||||
"CREATE SCHEMA ptrack")
|
||||
node.safe_psql(
|
||||
"backupdb",
|
||||
"CREATE EXTENSION ptrack WITH SCHEMA ptrack")
|
||||
node.safe_psql(
|
||||
"backupdb",
|
||||
"GRANT USAGE ON SCHEMA ptrack TO backup")
|
||||
|
||||
node.safe_psql(
|
||||
"backupdb",
|
||||
"GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup")
|
||||
|
||||
if ProbackupTest.enterprise:
|
||||
node.safe_psql(
|
||||
@ -3848,7 +3838,7 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
# @unittest.expectedFailure
|
||||
@unittest.expectedFailure
|
||||
def test_ptrack_pg_resetxlog(self):
|
||||
fname = self.id().split('.')[3]
|
||||
node = self.make_simple_node(
|
||||
@ -4016,14 +4006,17 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
node.stop(['-m', 'immediate', '-D', node.data_dir])
|
||||
|
||||
ptrack_map = os.path.join(node.data_dir, 'global', 'ptrack.map')
|
||||
ptrack_map_mmap = os.path.join(node.data_dir, 'global', 'ptrack.map.mmap')
|
||||
|
||||
# Let`s do index corruption. ptrack.map, ptrack.map.mmap
|
||||
with open(os.path.join(node.data_dir, 'global', 'ptrack.map'), "rb+", 0) as f:
|
||||
with open(ptrack_map, "rb+", 0) as f:
|
||||
f.seek(42)
|
||||
f.write(b"blablahblahs")
|
||||
f.flush()
|
||||
f.close
|
||||
|
||||
with open(os.path.join(node.data_dir, 'global', 'ptrack.map.mmap'), "rb+", 0) as f:
|
||||
with open(ptrack_map_mmap, "rb+", 0) as f:
|
||||
f.seek(42)
|
||||
f.write(b"blablahblahs")
|
||||
f.flush()
|
||||
@ -4031,13 +4024,97 @@ class PtrackTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
# os.remove(os.path.join(node.logs_dir, node.pg_log_name))
|
||||
|
||||
try:
|
||||
node.slow_start()
|
||||
# we should die here because exception is what we expect to happen
|
||||
self.assertEqual(
|
||||
1, 0,
|
||||
"Expecting Error because ptrack.map is corrupted"
|
||||
"\n Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except StartNodeException as e:
|
||||
self.assertIn(
|
||||
'Cannot start node',
|
||||
e.message,
|
||||
'\n Unexpected Error Message: {0}\n'
|
||||
' CMD: {1}'.format(repr(e.message), self.cmd))
|
||||
|
||||
log_file = os.path.join(node.logs_dir, 'postgresql.log')
|
||||
with open(log_file, 'r') as f:
|
||||
log_content = f.read()
|
||||
|
||||
self.assertIn(
|
||||
'FATAL: incorrect checksum of file "{0}"'.format(ptrack_map),
|
||||
log_content)
|
||||
|
||||
self.set_auto_conf(node, {'ptrack_map_size': '0'})
|
||||
|
||||
node.slow_start()
|
||||
|
||||
try:
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node,
|
||||
backup_type='ptrack', options=['--stream'])
|
||||
# we should die here because exception is what we expect to happen
|
||||
self.assertEqual(
|
||||
1, 0,
|
||||
"Expecting Error because instance ptrack is disabled"
|
||||
"\n Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertIn(
|
||||
'ERROR: Ptrack is disabled',
|
||||
e.message,
|
||||
'\n Unexpected Error Message: {0}\n'
|
||||
' CMD: {1}'.format(repr(e.message), self.cmd))
|
||||
|
||||
node.safe_psql(
|
||||
'postgres',
|
||||
"update t_heap set id = nextval('t_seq'), text = md5(text), "
|
||||
"tsvector = md5(repeat(tsvector::text, 10))::tsvector")
|
||||
|
||||
node.stop(['-m', 'immediate', '-D', node.data_dir])
|
||||
|
||||
self.set_auto_conf(node, {'ptrack_map_size': '32'})
|
||||
|
||||
node.slow_start()
|
||||
|
||||
sleep(1)
|
||||
|
||||
try:
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node,
|
||||
backup_type='ptrack', options=['--stream'])
|
||||
# we should die here because exception is what we expect to happen
|
||||
self.assertEqual(
|
||||
1, 0,
|
||||
"Expecting Error because ptrack map is from future"
|
||||
"\n Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertIn(
|
||||
'ERROR: LSN from ptrack_control',
|
||||
e.message,
|
||||
'\n Unexpected Error Message: {0}\n'
|
||||
' CMD: {1}'.format(repr(e.message), self.cmd))
|
||||
|
||||
sleep(1)
|
||||
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node,
|
||||
backup_type='delta', options=['--stream'])
|
||||
|
||||
node.safe_psql(
|
||||
'postgres',
|
||||
"update t_heap set id = nextval('t_seq'), text = md5(text), "
|
||||
"tsvector = md5(repeat(tsvector::text, 10))::tsvector")
|
||||
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node,
|
||||
backup_type='ptrack', options=['--stream'])
|
||||
|
||||
pgdata = self.pgdata_content(node.data_dir)
|
||||
|
||||
node.cleanup()
|
||||
|
||||
self.restore_node(backup_dir, 'node', node)
|
||||
|
621
tests/replica.py
621
tests/replica.py
@ -571,30 +571,25 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
'Skipped because backup from replica is not supported in PG 9.5')
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'master', master)
|
||||
self.set_archiving(backup_dir, 'master', master)
|
||||
self.add_instance(backup_dir, 'node', master)
|
||||
self.set_archiving(backup_dir, 'node', master)
|
||||
master.slow_start()
|
||||
|
||||
# freeze bgwriter to get rid of RUNNING XACTS records
|
||||
bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0]
|
||||
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
|
||||
|
||||
self.backup_node(backup_dir, 'master', master)
|
||||
self.backup_node(backup_dir, 'node', master)
|
||||
|
||||
# Create replica
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
self.restore_node(backup_dir, 'master', replica)
|
||||
self.restore_node(backup_dir, 'node', replica)
|
||||
|
||||
# Settings for Replica
|
||||
self.add_instance(backup_dir, 'replica', replica)
|
||||
self.set_replica(master, replica, synchronous=True)
|
||||
self.set_archiving(backup_dir, 'replica', replica, replica=True)
|
||||
|
||||
copy_tree(
|
||||
os.path.join(backup_dir, 'wal', 'master'),
|
||||
os.path.join(backup_dir, 'wal', 'replica'))
|
||||
self.set_archiving(backup_dir, 'node', replica, replica=True)
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
@ -602,7 +597,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
self.switch_wal_segment(master)
|
||||
|
||||
output = self.backup_node(
|
||||
backup_dir, 'replica', replica,
|
||||
backup_dir, 'node', replica, replica.data_dir,
|
||||
options=[
|
||||
'--archive-timeout=30',
|
||||
'--log-level-console=LOG',
|
||||
@ -611,24 +606,24 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
return_id=False)
|
||||
|
||||
self.assertIn(
|
||||
'LOG: Null offset in stop_backup_lsn value 0/3000000',
|
||||
'LOG: Null offset in stop_backup_lsn value 0/4000000',
|
||||
output)
|
||||
|
||||
self.assertIn(
|
||||
'WARNING: WAL segment 000000010000000000000003 could not be streamed in 30 seconds',
|
||||
'WARNING: WAL segment 000000010000000000000004 could not be streamed in 30 seconds',
|
||||
output)
|
||||
|
||||
self.assertIn(
|
||||
'WARNING: Failed to get next WAL record after 0/3000000, looking for previous WAL record',
|
||||
'WARNING: Failed to get next WAL record after 0/4000000, looking for previous WAL record',
|
||||
output)
|
||||
|
||||
self.assertIn(
|
||||
'LOG: Looking for LSN 0/3000000 in segment: 000000010000000000000002',
|
||||
'LOG: Looking for LSN 0/4000000 in segment: 000000010000000000000003',
|
||||
output)
|
||||
|
||||
self.assertIn(
|
||||
'has endpoint 0/3000000 which is '
|
||||
'equal or greater than requested LSN 0/3000000',
|
||||
'has endpoint 0/4000000 which is '
|
||||
'equal or greater than requested LSN 0/4000000',
|
||||
output)
|
||||
|
||||
self.assertIn(
|
||||
@ -719,19 +714,19 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
log_content = f.read()
|
||||
|
||||
self.assertIn(
|
||||
'LOG: Null offset in stop_backup_lsn value 0/3000000',
|
||||
'LOG: Null offset in stop_backup_lsn value 0/4000000',
|
||||
log_content)
|
||||
|
||||
self.assertIn(
|
||||
'LOG: Looking for segment: 000000010000000000000003',
|
||||
'LOG: Looking for segment: 000000010000000000000004',
|
||||
log_content)
|
||||
|
||||
self.assertIn(
|
||||
'LOG: First record in WAL segment "000000010000000000000003": 0/3000028',
|
||||
'LOG: First record in WAL segment "000000010000000000000004": 0/4000028',
|
||||
log_content)
|
||||
|
||||
self.assertIn(
|
||||
'LOG: current.stop_lsn: 0/3000028',
|
||||
'LOG: current.stop_lsn: 0/4000028',
|
||||
log_content)
|
||||
|
||||
# Clean after yourself
|
||||
@ -757,31 +752,26 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
'Skipped because backup from replica is not supported in PG 9.5')
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'master', master)
|
||||
self.set_archiving(backup_dir, 'master', master)
|
||||
self.add_instance(backup_dir, 'node', master)
|
||||
self.set_archiving(backup_dir, 'node', master)
|
||||
master.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'master', master)
|
||||
self.backup_node(backup_dir, 'node', master)
|
||||
|
||||
# Create replica
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
self.restore_node(backup_dir, 'master', replica)
|
||||
self.restore_node(backup_dir, 'node', replica)
|
||||
|
||||
# Settings for Replica
|
||||
self.add_instance(backup_dir, 'replica', replica)
|
||||
self.set_replica(master, replica, synchronous=True)
|
||||
self.set_archiving(backup_dir, 'replica', replica, replica=True)
|
||||
self.set_archiving(backup_dir, 'node', replica, replica=True)
|
||||
|
||||
# freeze bgwriter to get rid of RUNNING XACTS records
|
||||
bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0]
|
||||
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
|
||||
|
||||
copy_tree(
|
||||
os.path.join(backup_dir, 'wal', 'master'),
|
||||
os.path.join(backup_dir, 'wal', 'replica'))
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
self.switch_wal_segment(master)
|
||||
@ -789,7 +779,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
# take backup from replica
|
||||
output = self.backup_node(
|
||||
backup_dir, 'replica', replica,
|
||||
backup_dir, 'node', replica, replica.data_dir,
|
||||
options=[
|
||||
'--archive-timeout=30',
|
||||
'--log-level-console=LOG',
|
||||
@ -797,24 +787,24 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
return_id=False)
|
||||
|
||||
self.assertIn(
|
||||
'LOG: Null offset in stop_backup_lsn value 0/3000000',
|
||||
'LOG: Null offset in stop_backup_lsn value 0/4000000',
|
||||
output)
|
||||
|
||||
self.assertIn(
|
||||
'WARNING: WAL segment 000000010000000000000003 could not be archived in 30 seconds',
|
||||
'WARNING: WAL segment 000000010000000000000004 could not be archived in 30 seconds',
|
||||
output)
|
||||
|
||||
self.assertIn(
|
||||
'WARNING: Failed to get next WAL record after 0/3000000, looking for previous WAL record',
|
||||
'WARNING: Failed to get next WAL record after 0/4000000, looking for previous WAL record',
|
||||
output)
|
||||
|
||||
self.assertIn(
|
||||
'LOG: Looking for LSN 0/3000000 in segment: 000000010000000000000002',
|
||||
'LOG: Looking for LSN 0/4000000 in segment: 000000010000000000000003',
|
||||
output)
|
||||
|
||||
self.assertIn(
|
||||
'has endpoint 0/3000000 which is '
|
||||
'equal or greater than requested LSN 0/3000000',
|
||||
'has endpoint 0/4000000 which is '
|
||||
'equal or greater than requested LSN 0/4000000',
|
||||
output)
|
||||
|
||||
self.assertIn(
|
||||
@ -846,44 +836,39 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
'Skipped because backup from replica is not supported in PG 9.5')
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'master', master)
|
||||
self.set_archiving(backup_dir, 'master', master)
|
||||
self.add_instance(backup_dir, 'node', master)
|
||||
self.set_archiving(backup_dir, 'node', master)
|
||||
master.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'master', master)
|
||||
self.backup_node(backup_dir, 'node', master)
|
||||
|
||||
# Create replica
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
self.restore_node(backup_dir, 'master', replica)
|
||||
self.restore_node(backup_dir, 'node', replica)
|
||||
|
||||
# Settings for Replica
|
||||
self.add_instance(backup_dir, 'replica', replica)
|
||||
self.set_replica(master, replica, synchronous=True)
|
||||
self.set_archiving(backup_dir, 'replica', replica, replica=True)
|
||||
|
||||
copy_tree(
|
||||
os.path.join(backup_dir, 'wal', 'master'),
|
||||
os.path.join(backup_dir, 'wal', 'replica'))
|
||||
self.set_archiving(backup_dir, 'node', replica, replica=True)
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
# take backup from replica
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica,
|
||||
backup_dir, 'node', replica, replica.data_dir,
|
||||
options=[
|
||||
'--archive-timeout=30',
|
||||
'--log-level-console=verbose',
|
||||
'--log-level-console=LOG',
|
||||
'--no-validate'],
|
||||
return_id=False)
|
||||
|
||||
try:
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica,
|
||||
backup_dir, 'node', replica, replica.data_dir,
|
||||
options=[
|
||||
'--archive-timeout=30',
|
||||
'--log-level-console=verbose',
|
||||
'--log-level-console=LOG',
|
||||
'--no-validate'])
|
||||
# we should die here because exception is what we expect to happen
|
||||
self.assertEqual(
|
||||
@ -893,19 +878,19 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertIn(
|
||||
'LOG: Looking for LSN 0/3000060 in segment: 000000010000000000000003',
|
||||
'LOG: Looking for LSN 0/4000060 in segment: 000000010000000000000004',
|
||||
e.message,
|
||||
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
self.assertIn(
|
||||
'INFO: Wait for LSN 0/3000060 in archived WAL segment',
|
||||
'INFO: Wait for LSN 0/4000060 in archived WAL segment',
|
||||
e.message,
|
||||
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
self.assertIn(
|
||||
'ERROR: WAL segment 000000010000000000000003 could not be archived in 30 seconds',
|
||||
'ERROR: WAL segment 000000010000000000000004 could not be archived in 30 seconds',
|
||||
e.message,
|
||||
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
|
||||
repr(e.message), self.cmd))
|
||||
@ -1016,7 +1001,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
@unittest.skip("skip")
|
||||
def test_replica_promote_1(self):
|
||||
"""
|
||||
"""
|
||||
@ -1037,7 +1022,7 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'master', master)
|
||||
# set replica True, so archive_mode 'always' is used.
|
||||
# set replica True, so archive_mode 'always' is used.
|
||||
self.set_archiving(backup_dir, 'master', master, replica=True)
|
||||
master.slow_start()
|
||||
|
||||
@ -1091,6 +1076,528 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_replica_promote_2(self):
|
||||
"""
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
master = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'master'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'master', master)
|
||||
# set replica True, so archive_mode 'always' is used.
|
||||
self.set_archiving(
|
||||
backup_dir, 'master', master, replica=True)
|
||||
master.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'master', master)
|
||||
|
||||
# Create replica
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
self.restore_node(backup_dir, 'master', replica)
|
||||
|
||||
# Settings for Replica
|
||||
self.set_replica(master, replica)
|
||||
self.set_auto_conf(replica, {'port': replica.port})
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
master.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t1 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,1) i')
|
||||
|
||||
self.wait_until_replica_catch_with_master(master, replica)
|
||||
|
||||
replica.promote()
|
||||
|
||||
replica.safe_psql(
|
||||
'postgres',
|
||||
'CHECKPOINT')
|
||||
|
||||
# replica.safe_psql(
|
||||
# 'postgres',
|
||||
# 'create table t2()')
|
||||
#
|
||||
# replica.safe_psql(
|
||||
# 'postgres',
|
||||
# 'CHECKPOINT')
|
||||
|
||||
self.backup_node(
|
||||
backup_dir, 'master', replica, data_dir=replica.data_dir,
|
||||
backup_type='page')
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_replica_promote_3(self):
|
||||
"""
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
master = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'master'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'master', master)
|
||||
|
||||
master.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'master', master, options=['--stream'])
|
||||
|
||||
# Create replica
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
self.restore_node(backup_dir, 'master', replica)
|
||||
|
||||
# Settings for Replica
|
||||
self.set_replica(master, replica)
|
||||
self.set_auto_conf(replica, {'port': replica.port})
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
master.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t1 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,20) i')
|
||||
self.wait_until_replica_catch_with_master(master, replica)
|
||||
|
||||
self.add_instance(backup_dir, 'replica', replica)
|
||||
|
||||
replica.safe_psql(
|
||||
'postgres',
|
||||
'CHECKPOINT')
|
||||
|
||||
full_id = self.backup_node(
|
||||
backup_dir, 'replica',
|
||||
replica, options=['--stream'])
|
||||
|
||||
master.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t2 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,20) i')
|
||||
self.wait_until_replica_catch_with_master(master, replica)
|
||||
|
||||
replica.safe_psql(
|
||||
'postgres',
|
||||
'CHECKPOINT')
|
||||
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica,
|
||||
backup_type='delta', options=['--stream'])
|
||||
|
||||
replica.promote()
|
||||
|
||||
replica.safe_psql(
|
||||
'postgres',
|
||||
'CHECKPOINT')
|
||||
|
||||
# failing, because without archving, it is impossible to
|
||||
# take multi-timeline backup.
|
||||
try:
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica,
|
||||
backup_type='delta', options=['--stream'])
|
||||
# we should die here because exception is what we expect to happen
|
||||
self.assertEqual(
|
||||
1, 0,
|
||||
"Expecting Error because of timeline switch "
|
||||
"\n Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertTrue(
|
||||
'WARNING: Cannot find valid backup on previous timelines, '
|
||||
'WAL archive is not available' in e.message and
|
||||
'ERROR: Create new full backup before an incremental one' in e.message,
|
||||
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_replica_promote_archive_delta(self):
|
||||
"""
|
||||
t3 /---D3-->
|
||||
t2 /------->
|
||||
t1 --F---D1--D2--
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node1 = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node1'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={
|
||||
'checkpoint_timeout': '30s',
|
||||
'archive_timeout': '30s',
|
||||
'autovacuum': 'off'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node1)
|
||||
self.set_config(
|
||||
backup_dir, 'node', options=['--archive-timeout=60s'])
|
||||
self.set_archiving(backup_dir, 'node', node1)
|
||||
|
||||
node1.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'node', node1, options=['--stream'])
|
||||
|
||||
# Create replica
|
||||
node2 = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node2'))
|
||||
node2.cleanup()
|
||||
self.restore_node(backup_dir, 'node', node2, node2.data_dir)
|
||||
|
||||
# Settings for Replica
|
||||
self.set_replica(node1, node2)
|
||||
self.set_auto_conf(node2, {'port': node2.port})
|
||||
self.set_archiving(backup_dir, 'node', node2, replica=True)
|
||||
|
||||
node2.slow_start(replica=True)
|
||||
|
||||
node1.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t1 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,20) i')
|
||||
self.wait_until_replica_catch_with_master(node1, node2)
|
||||
|
||||
node1.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t2 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,20) i')
|
||||
self.wait_until_replica_catch_with_master(node1, node2)
|
||||
|
||||
# delta backup on replica on timeline 1
|
||||
delta1_id = self.backup_node(
|
||||
backup_dir, 'node', node2, node2.data_dir,
|
||||
'delta', options=['--stream'])
|
||||
|
||||
# delta backup on replica on timeline 1
|
||||
delta2_id = self.backup_node(
|
||||
backup_dir, 'node', node2, node2.data_dir, 'delta')
|
||||
|
||||
self.change_backup_status(
|
||||
backup_dir, 'node', delta2_id, 'ERROR')
|
||||
|
||||
# node2 is now master
|
||||
node2.promote()
|
||||
node2.safe_psql('postgres', 'CHECKPOINT')
|
||||
|
||||
node2.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t3 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,20) i')
|
||||
|
||||
# node1 is now replica
|
||||
node1.cleanup()
|
||||
# kludge "backup_id=delta1_id"
|
||||
self.restore_node(
|
||||
backup_dir, 'node', node1, node1.data_dir,
|
||||
backup_id=delta1_id,
|
||||
options=[
|
||||
'--recovery-target-timeline=2',
|
||||
'--recovery-target=latest'])
|
||||
|
||||
# Settings for Replica
|
||||
self.set_replica(node2, node1)
|
||||
self.set_auto_conf(node1, {'port': node1.port})
|
||||
self.set_archiving(backup_dir, 'node', node1, replica=True)
|
||||
|
||||
node1.slow_start(replica=True)
|
||||
|
||||
node2.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t4 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,30) i')
|
||||
self.wait_until_replica_catch_with_master(node2, node1)
|
||||
|
||||
# node1 is back to be a master
|
||||
node1.promote()
|
||||
node1.safe_psql('postgres', 'CHECKPOINT')
|
||||
|
||||
# delta backup on timeline 3
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node1, node1.data_dir, 'delta',
|
||||
options=['--archive-timeout=60'])
|
||||
|
||||
pgdata = self.pgdata_content(node1.data_dir)
|
||||
|
||||
node1.cleanup()
|
||||
self.restore_node(backup_dir, 'node', node1, node1.data_dir)
|
||||
|
||||
pgdata_restored = self.pgdata_content(node1.data_dir)
|
||||
self.compare_pgdata(pgdata, pgdata_restored)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_replica_promote_archive_page(self):
|
||||
"""
|
||||
t3 /---P3-->
|
||||
t2 /------->
|
||||
t1 --F---P1--P2--
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node1 = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node1'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={
|
||||
'checkpoint_timeout': '30s',
|
||||
'archive_timeout': '30s',
|
||||
'autovacuum': 'off'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node1)
|
||||
self.set_archiving(backup_dir, 'node', node1)
|
||||
self.set_config(
|
||||
backup_dir, 'node', options=['--archive-timeout=60s'])
|
||||
|
||||
node1.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'node', node1, options=['--stream'])
|
||||
|
||||
# Create replica
|
||||
node2 = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node2'))
|
||||
node2.cleanup()
|
||||
self.restore_node(backup_dir, 'node', node2, node2.data_dir)
|
||||
|
||||
# Settings for Replica
|
||||
self.set_replica(node1, node2)
|
||||
self.set_auto_conf(node2, {'port': node2.port})
|
||||
self.set_archiving(backup_dir, 'node', node2, replica=True)
|
||||
|
||||
node2.slow_start(replica=True)
|
||||
|
||||
node1.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t1 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,20) i')
|
||||
self.wait_until_replica_catch_with_master(node1, node2)
|
||||
|
||||
node1.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t2 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,20) i')
|
||||
self.wait_until_replica_catch_with_master(node1, node2)
|
||||
|
||||
# page backup on replica on timeline 1
|
||||
page1_id = self.backup_node(
|
||||
backup_dir, 'node', node2, node2.data_dir,
|
||||
'page', options=['--stream'])
|
||||
|
||||
# page backup on replica on timeline 1
|
||||
page2_id = self.backup_node(
|
||||
backup_dir, 'node', node2, node2.data_dir, 'page')
|
||||
|
||||
self.change_backup_status(
|
||||
backup_dir, 'node', page2_id, 'ERROR')
|
||||
|
||||
# node2 is now master
|
||||
node2.promote()
|
||||
node2.safe_psql('postgres', 'CHECKPOINT')
|
||||
|
||||
node2.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t3 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,20) i')
|
||||
|
||||
# node1 is now replica
|
||||
node1.cleanup()
|
||||
# kludge "backup_id=page1_id"
|
||||
self.restore_node(
|
||||
backup_dir, 'node', node1, node1.data_dir,
|
||||
backup_id=page1_id,
|
||||
options=[
|
||||
'--recovery-target-timeline=2',
|
||||
'--recovery-target=latest'])
|
||||
|
||||
# Settings for Replica
|
||||
self.set_replica(node2, node1)
|
||||
self.set_auto_conf(node1, {'port': node1.port})
|
||||
self.set_archiving(backup_dir, 'node', node1, replica=True)
|
||||
|
||||
node1.slow_start(replica=True)
|
||||
|
||||
node2.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t4 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,30) i')
|
||||
self.wait_until_replica_catch_with_master(node2, node1)
|
||||
|
||||
# node1 is back to be a master
|
||||
node1.promote()
|
||||
node1.safe_psql('postgres', 'CHECKPOINT')
|
||||
|
||||
# delta3_id = self.backup_node(
|
||||
# backup_dir, 'node', node2, node2.data_dir, 'delta')
|
||||
# page backup on timeline 3
|
||||
page3_id = self.backup_node(
|
||||
backup_dir, 'node', node1, node1.data_dir, 'page',
|
||||
options=['--archive-timeout=60'])
|
||||
|
||||
pgdata = self.pgdata_content(node1.data_dir)
|
||||
|
||||
node1.cleanup()
|
||||
self.restore_node(backup_dir, 'node', node1, node1.data_dir)
|
||||
|
||||
pgdata_restored = self.pgdata_content(node1.data_dir)
|
||||
self.compare_pgdata(pgdata, pgdata_restored)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_parent_choosing(self):
|
||||
"""
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
master = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'master'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'master', master)
|
||||
|
||||
master.slow_start()
|
||||
|
||||
self.backup_node(backup_dir, 'master', master, options=['--stream'])
|
||||
|
||||
# Create replica
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
self.restore_node(backup_dir, 'master', replica)
|
||||
|
||||
# Settings for Replica
|
||||
self.set_replica(master, replica)
|
||||
self.set_auto_conf(replica, {'port': replica.port})
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
master.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t1 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,20) i')
|
||||
self.wait_until_replica_catch_with_master(master, replica)
|
||||
|
||||
self.add_instance(backup_dir, 'replica', replica)
|
||||
|
||||
full_id = self.backup_node(
|
||||
backup_dir, 'replica',
|
||||
replica, options=['--stream'])
|
||||
|
||||
master.safe_psql(
|
||||
'postgres',
|
||||
'CREATE TABLE t2 AS '
|
||||
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
|
||||
'FROM generate_series(0,20) i')
|
||||
self.wait_until_replica_catch_with_master(master, replica)
|
||||
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica,
|
||||
backup_type='delta', options=['--stream'])
|
||||
|
||||
replica.promote()
|
||||
replica.safe_psql('postgres', 'CHECKPOINT')
|
||||
|
||||
# failing, because without archving, it is impossible to
|
||||
# take multi-timeline backup.
|
||||
try:
|
||||
self.backup_node(
|
||||
backup_dir, 'replica', replica,
|
||||
backup_type='delta', options=['--stream'])
|
||||
# we should die here because exception is what we expect to happen
|
||||
self.assertEqual(
|
||||
1, 0,
|
||||
"Expecting Error because of timeline switch "
|
||||
"\n Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertTrue(
|
||||
'WARNING: Cannot find valid backup on previous timelines, '
|
||||
'WAL archive is not available' in e.message and
|
||||
'ERROR: Create new full backup before an incremental one' in e.message,
|
||||
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_instance_from_the_past(self):
|
||||
"""
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
|
||||
node.slow_start()
|
||||
|
||||
full_id = self.backup_node(backup_dir, 'node', node, options=['--stream'])
|
||||
|
||||
node.pgbench_init(scale=10)
|
||||
self.backup_node(backup_dir, 'node', node, options=['--stream'])
|
||||
node.cleanup()
|
||||
|
||||
self.restore_node(backup_dir, 'node', node, backup_id=full_id)
|
||||
node.slow_start()
|
||||
|
||||
try:
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node,
|
||||
backup_type='delta', options=['--stream'])
|
||||
# we should die here because exception is what we expect to happen
|
||||
self.assertEqual(
|
||||
1, 0,
|
||||
"Expecting Error because instance is from the past "
|
||||
"\n Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertTrue(
|
||||
'ERROR: Current START LSN' in e.message and
|
||||
'is lower than START LSN' in e.message and
|
||||
'It may indicate that we are trying to backup '
|
||||
'PostgreSQL instance from the past' in e.message,
|
||||
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# TODO:
|
||||
# null offset STOP LSN and latest record in previous segment is conrecord (manual only)
|
||||
|
@ -1712,10 +1712,9 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
"without valid full backup.\n Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertIn(
|
||||
"ERROR: Valid backup on current timeline 1 is not found. "
|
||||
"Create new FULL backup before an incremental one.",
|
||||
e.message,
|
||||
self.assertTrue(
|
||||
"WARNING: Valid backup on current timeline 1 is not found" in e.message and
|
||||
"ERROR: Create new full backup before an incremental one" in e.message,
|
||||
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
@ -2675,7 +2674,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
self.assertIn(
|
||||
'LOG: Archive backup {0} to stay consistent protect from '
|
||||
'purge WAL interval between 000000010000000000000004 '
|
||||
'and 000000010000000000000004 on timeline 1'.format(B1), output)
|
||||
'and 000000010000000000000005 on timeline 1'.format(B1), output)
|
||||
|
||||
start_lsn_B4 = self.show_pb(backup_dir, 'node', B4)['start-lsn']
|
||||
self.assertIn(
|
||||
@ -2684,13 +2683,13 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
self.assertIn(
|
||||
'LOG: Timeline 3 to stay reachable from timeline 1 protect '
|
||||
'from purge WAL interval between 000000020000000000000005 and '
|
||||
'000000020000000000000008 on timeline 2', output)
|
||||
'from purge WAL interval between 000000020000000000000006 and '
|
||||
'000000020000000000000009 on timeline 2', output)
|
||||
|
||||
self.assertIn(
|
||||
'LOG: Timeline 3 to stay reachable from timeline 1 protect '
|
||||
'from purge WAL interval between 000000010000000000000004 and '
|
||||
'000000010000000000000005 on timeline 1', output)
|
||||
'000000010000000000000006 on timeline 1', output)
|
||||
|
||||
show_tli1_before = self.show_archive(backup_dir, 'node', tli=1)
|
||||
show_tli2_before = self.show_archive(backup_dir, 'node', tli=2)
|
||||
@ -2745,19 +2744,19 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
self.assertEqual(
|
||||
show_tli1_after['lost-segments'][0]['begin-segno'],
|
||||
'000000010000000000000006')
|
||||
'000000010000000000000007')
|
||||
|
||||
self.assertEqual(
|
||||
show_tli1_after['lost-segments'][0]['end-segno'],
|
||||
'000000010000000000000009')
|
||||
'00000001000000000000000A')
|
||||
|
||||
self.assertEqual(
|
||||
show_tli2_after['lost-segments'][0]['begin-segno'],
|
||||
'000000020000000000000009')
|
||||
'00000002000000000000000A')
|
||||
|
||||
self.assertEqual(
|
||||
show_tli2_after['lost-segments'][0]['end-segno'],
|
||||
'000000020000000000000009')
|
||||
'00000002000000000000000A')
|
||||
|
||||
self.validate_pb(backup_dir, 'node')
|
||||
|
||||
|
@ -1786,7 +1786,7 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
|
||||
self.assertTrue(
|
||||
'LOG: archive command failed with exit code 1' in log_content and
|
||||
'DETAIL: The failed archive command was:' in log_content and
|
||||
'INFO: pg_probackup archive-push from' in log_content,
|
||||
'WAL file already exists in archive with different checksum' in log_content,
|
||||
'Expecting error messages about failed archive_command'
|
||||
)
|
||||
self.assertFalse(
|
||||
|
24
travis/Dockerfile.in
Normal file
24
travis/Dockerfile.in
Normal file
@ -0,0 +1,24 @@
|
||||
FROM ololobus/postgres-dev:stretch
|
||||
|
||||
USER root
|
||||
RUN apt-get update
|
||||
RUN apt-get -yq install python python-pip python-virtualenv
|
||||
|
||||
# Environment
|
||||
ENV PG_MAJOR=${PG_VERSION} PG_BRANCH=${PG_BRANCH}
|
||||
ENV LANG=C.UTF-8 PGHOME=/pg/testdir/pgbin
|
||||
|
||||
# Make directories
|
||||
RUN mkdir -p /pg/testdir
|
||||
|
||||
COPY run_tests.sh /run.sh
|
||||
RUN chmod 755 /run.sh
|
||||
|
||||
COPY . /pg/testdir
|
||||
WORKDIR /pg/testdir
|
||||
|
||||
# Grant privileges
|
||||
RUN chown -R postgres:postgres /pg/testdir
|
||||
|
||||
USER postgres
|
||||
ENTRYPOINT MODE=${MODE} /run.sh
|
2
travis/docker-compose.yml
Normal file
2
travis/docker-compose.yml
Normal file
@ -0,0 +1,2 @@
|
||||
tests:
|
||||
build: .
|
25
travis/make_dockerfile.sh
Executable file
25
travis/make_dockerfile.sh
Executable file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
if [ -z ${PG_VERSION+x} ]; then
|
||||
echo PG_VERSION is not set!
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z ${PG_BRANCH+x} ]; then
|
||||
echo PG_BRANCH is not set!
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z ${MODE+x} ]; then
|
||||
MODE=basic
|
||||
fi
|
||||
|
||||
echo PG_VERSION=${PG_VERSION}
|
||||
echo PG_BRANCH=${PG_BRANCH}
|
||||
echo MODE=${MODE}
|
||||
|
||||
sed \
|
||||
-e 's/${PG_VERSION}/'${PG_VERSION}/g \
|
||||
-e 's/${PG_BRANCH}/'${PG_BRANCH}/g \
|
||||
-e 's/${MODE}/'${MODE}/g \
|
||||
Dockerfile.in > Dockerfile
|
80
travis/run_tests.sh
Executable file
80
travis/run_tests.sh
Executable file
@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
#
|
||||
# Copyright (c) 2019-2020, Postgres Professional
|
||||
#
|
||||
|
||||
|
||||
PG_SRC=$PWD/postgres
|
||||
|
||||
# # Here PG_VERSION is provided by postgres:X-alpine docker image
|
||||
# curl "https://ftp.postgresql.org/pub/source/v$PG_VERSION/postgresql-$PG_VERSION.tar.bz2" -o postgresql.tar.bz2
|
||||
# echo "$PG_SHA256 *postgresql.tar.bz2" | sha256sum -c -
|
||||
|
||||
# mkdir $PG_SRC
|
||||
|
||||
# tar \
|
||||
# --extract \
|
||||
# --file postgresql.tar.bz2 \
|
||||
# --directory $PG_SRC \
|
||||
# --strip-components 1
|
||||
|
||||
# Clone Postgres
|
||||
echo "############### Getting Postgres sources:"
|
||||
git clone https://github.com/postgres/postgres.git -b $PG_BRANCH --depth=1
|
||||
|
||||
# Compile and install Postgres
|
||||
echo "############### Compiling Postgres:"
|
||||
cd postgres # Go to postgres dir
|
||||
./configure --prefix=$PGHOME --enable-debug --enable-cassert --enable-depend --enable-tap-tests
|
||||
make -s -j$(nproc) install
|
||||
make -s -j$(nproc) -C contrib/ install
|
||||
|
||||
# Override default Postgres instance
|
||||
export PATH=$PGHOME/bin:$PATH
|
||||
export LD_LIBRARY_PATH=$PGHOME/lib
|
||||
export PG_CONFIG=$(which pg_config)
|
||||
|
||||
# Get amcheck if missing
|
||||
if [ ! -d "contrib/amcheck" ]; then
|
||||
echo "############### Getting missing amcheck:"
|
||||
git clone https://github.com/petergeoghegan/amcheck.git --depth=1 contrib/amcheck
|
||||
make USE_PGXS=1 -C contrib/amcheck install
|
||||
fi
|
||||
|
||||
# Get back to testdir
|
||||
cd ..
|
||||
|
||||
# Show pg_config path (just in case)
|
||||
echo "############### pg_config path:"
|
||||
which pg_config
|
||||
|
||||
# Show pg_config just in case
|
||||
echo "############### pg_config:"
|
||||
pg_config
|
||||
|
||||
# Build and install pg_probackup (using PG_CPPFLAGS and SHLIB_LINK for gcov)
|
||||
echo "############### Compiling and installing pg_probackup:"
|
||||
# make USE_PGXS=1 PG_CPPFLAGS="-coverage" SHLIB_LINK="-coverage" top_srcdir=$CUSTOM_PG_SRC install
|
||||
make USE_PGXS=1 top_srcdir=$PG_SRC install
|
||||
|
||||
# Setup python environment
|
||||
echo "############### Setting up python env:"
|
||||
virtualenv pyenv
|
||||
source pyenv/bin/activate
|
||||
pip install testgres==1.8.2
|
||||
|
||||
echo "############### Testing:"
|
||||
if [ "$MODE" = "basic" ]; then
|
||||
export PG_PROBACKUP_TEST_BASIC=ON
|
||||
python -m unittest -v tests
|
||||
python -m unittest -v tests.init
|
||||
else
|
||||
python -m unittest -v tests.$MODE
|
||||
fi
|
||||
|
||||
# Generate *.gcov files
|
||||
# gcov src/*.c src/*.h
|
||||
|
||||
# Send coverage stats to Codecov
|
||||
# bash <(curl -s https://codecov.io/bash)
|
Loading…
Reference in New Issue
Block a user