You've already forked docker-volume-backup
mirror of
https://github.com/offen/docker-volume-backup.git
synced 2025-11-23 21:44:40 +02:00
Add label to optionally skip container restart after backup (#659)
* Add label to optionally skip container restart after backup * Add new mutually exclusive label instead * Simplified `hasLabel` * removed unnecessary else block * added new test-case `no-restart` based on test-case `local` * removed invalid README entry * added new section to how-tos * Added configuration reference
This commit is contained in:
@@ -43,6 +43,7 @@ type Config struct {
|
||||
BackupPruningPrefix string `split_words:"true"`
|
||||
BackupStopContainerLabel string `split_words:"true"`
|
||||
BackupStopDuringBackupLabel string `split_words:"true" default:"true"`
|
||||
BackupStopDuringBackupNoRestartLabel string `split_words:"true" default:"true"`
|
||||
BackupStopServiceTimeout time.Duration `split_words:"true" default:"5m"`
|
||||
BackupFromSnapshot bool `split_words:"true"`
|
||||
BackupExcludeRegexp RegexpDecoder `split_words:"true"`
|
||||
|
||||
@@ -91,6 +91,21 @@ func isSwarm(c interface {
|
||||
return info.Swarm.LocalNodeState != "" && info.Swarm.LocalNodeState != swarm.LocalNodeStateInactive && info.Swarm.ControlAvailable, nil
|
||||
}
|
||||
|
||||
func hasLabel(labels map[string]string, key, value string) bool {
|
||||
val, ok := labels[key]
|
||||
return ok && val == value
|
||||
}
|
||||
|
||||
func checkStopLabels(labels map[string]string, stopDuringBackupLabelValue string, stopDuringBackupNoRestartLabelValue string) (bool, bool, error) {
|
||||
hasStopDuringBackupLabel := hasLabel(labels, "docker-volume-backup.stop-during-backup", stopDuringBackupLabelValue)
|
||||
hasStopDuringBackupNoRestartLabel := hasLabel(labels, "docker-volume-backup.stop-during-backup-no-restart", stopDuringBackupNoRestartLabelValue)
|
||||
if hasStopDuringBackupLabel && hasStopDuringBackupNoRestartLabel {
|
||||
return hasStopDuringBackupLabel, hasStopDuringBackupNoRestartLabel, errwrap.Wrap(nil, "both docker-volume-backup.stop-during-backup and docker-volume-backup.stop-during-backup-no-restart have been set, cannot continue")
|
||||
}
|
||||
|
||||
return hasStopDuringBackupLabel, hasStopDuringBackupNoRestartLabel, nil
|
||||
}
|
||||
|
||||
// stopContainersAndServices stops all Docker containers that are marked as to being
|
||||
// stopped during the backup and returns a function that can be called to
|
||||
// restart everything that has been stopped.
|
||||
@@ -118,52 +133,67 @@ func (s *script) stopContainersAndServices() (func() error, error) {
|
||||
labelValue = s.c.BackupStopContainerLabel
|
||||
}
|
||||
|
||||
filterMatchLabel := fmt.Sprintf(
|
||||
stopDuringBackupLabel := fmt.Sprintf(
|
||||
"docker-volume-backup.stop-during-backup=%s",
|
||||
labelValue,
|
||||
)
|
||||
|
||||
stopDuringBackupNoRestartLabel := fmt.Sprintf(
|
||||
"docker-volume-backup.stop-during-backup-no-restart=%s",
|
||||
s.c.BackupStopDuringBackupNoRestartLabel,
|
||||
)
|
||||
|
||||
allContainers, err := s.cli.ContainerList(context.Background(), ctr.ListOptions{})
|
||||
if err != nil {
|
||||
return noop, errwrap.Wrap(err, "error querying for containers")
|
||||
}
|
||||
containersToStop, err := s.cli.ContainerList(context.Background(), ctr.ListOptions{
|
||||
Filters: filters.NewArgs(filters.KeyValuePair{
|
||||
Key: "label",
|
||||
Value: filterMatchLabel,
|
||||
}),
|
||||
})
|
||||
|
||||
var containersToStop []handledContainer
|
||||
for _, c := range allContainers {
|
||||
hasStopDuringBackupLabel, hasStopDuringBackupNoRestartLabel, err := checkStopLabels(c.Labels, labelValue, s.c.BackupStopDuringBackupNoRestartLabel)
|
||||
if err != nil {
|
||||
return noop, errwrap.Wrap(err, "error querying for containers to stop")
|
||||
}
|
||||
|
||||
if !hasStopDuringBackupLabel && !hasStopDuringBackupNoRestartLabel {
|
||||
continue
|
||||
}
|
||||
|
||||
containersToStop = append(containersToStop, handledContainer{
|
||||
summary: c,
|
||||
restart: !hasStopDuringBackupNoRestartLabel,
|
||||
})
|
||||
}
|
||||
|
||||
var allServices []swarm.Service
|
||||
var servicesToScaleDown []handledSwarmService
|
||||
if isDockerSwarm {
|
||||
allServices, err = s.cli.ServiceList(context.Background(), swarm.ServiceListOptions{})
|
||||
allServices, err = s.cli.ServiceList(context.Background(), swarm.ServiceListOptions{Status: true})
|
||||
if err != nil {
|
||||
return noop, errwrap.Wrap(err, "error querying for services")
|
||||
}
|
||||
matchingServices, err := s.cli.ServiceList(context.Background(), swarm.ServiceListOptions{
|
||||
Filters: filters.NewArgs(filters.KeyValuePair{
|
||||
Key: "label",
|
||||
Value: filterMatchLabel,
|
||||
}),
|
||||
Status: true,
|
||||
})
|
||||
|
||||
for _, service := range allServices {
|
||||
hasStopDuringBackupLabel, hasStopDuringBackupNoRestartLabel, err := checkStopLabels(service.Spec.Labels, labelValue, s.c.BackupStopDuringBackupNoRestartLabel)
|
||||
if err != nil {
|
||||
return noop, errwrap.Wrap(err, "error querying for services to scale down")
|
||||
}
|
||||
for _, s := range matchingServices {
|
||||
if s.Spec.Mode.Replicated == nil {
|
||||
|
||||
if !hasStopDuringBackupLabel && !hasStopDuringBackupNoRestartLabel {
|
||||
continue
|
||||
}
|
||||
|
||||
if service.Spec.Mode.Replicated == nil {
|
||||
return noop, errwrap.Wrap(
|
||||
nil,
|
||||
fmt.Sprintf("only replicated services can be restarted, but found a label on service %s", s.Spec.Name),
|
||||
fmt.Sprintf("only replicated services can be restarted, but found a label on service %s", service.Spec.Name),
|
||||
)
|
||||
}
|
||||
|
||||
servicesToScaleDown = append(servicesToScaleDown, handledSwarmService{
|
||||
serviceID: s.ID,
|
||||
initialReplicaCount: *s.Spec.Mode.Replicated.Replicas,
|
||||
serviceID: service.ID,
|
||||
initialReplicaCount: *service.Spec.Mode.Replicated.Replicas,
|
||||
restart: !hasStopDuringBackupNoRestartLabel,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -174,7 +204,7 @@ func (s *script) stopContainersAndServices() (func() error, error) {
|
||||
|
||||
if isDockerSwarm {
|
||||
for _, container := range containersToStop {
|
||||
if swarmServiceID, ok := container.Labels["com.docker.swarm.service.id"]; ok {
|
||||
if swarmServiceID, ok := container.summary.Labels["com.docker.swarm.service.id"]; ok {
|
||||
parentService, _, err := s.cli.ServiceInspectWithRaw(context.Background(), swarmServiceID, swarm.ServiceInspectOptions{})
|
||||
if err != nil {
|
||||
return noop, errwrap.Wrap(err, fmt.Sprintf("error querying for parent service with ID %s", swarmServiceID))
|
||||
@@ -185,7 +215,7 @@ func (s *script) stopContainersAndServices() (func() error, error) {
|
||||
nil,
|
||||
fmt.Sprintf(
|
||||
"container %s is labeled to stop but has parent service %s which is also labeled, cannot continue",
|
||||
container.Names[0],
|
||||
container.summary.Names[0],
|
||||
parentService.Spec.Name,
|
||||
),
|
||||
)
|
||||
@@ -197,27 +227,29 @@ func (s *script) stopContainersAndServices() (func() error, error) {
|
||||
|
||||
s.logger.Info(
|
||||
fmt.Sprintf(
|
||||
"Stopping %d out of %d running container(s) as they were labeled %s.",
|
||||
"Stopping %d out of %d running container(s) as they were labeled %s or %s.",
|
||||
len(containersToStop),
|
||||
len(allContainers),
|
||||
filterMatchLabel,
|
||||
stopDuringBackupLabel,
|
||||
stopDuringBackupNoRestartLabel,
|
||||
),
|
||||
)
|
||||
if isDockerSwarm {
|
||||
s.logger.Info(
|
||||
fmt.Sprintf(
|
||||
"Scaling down %d out of %d active service(s) as they were labeled %s.",
|
||||
"Scaling down %d out of %d active service(s) as they were labeled %s or %s.",
|
||||
len(servicesToScaleDown),
|
||||
len(allServices),
|
||||
filterMatchLabel,
|
||||
stopDuringBackupLabel,
|
||||
stopDuringBackupNoRestartLabel,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
var stoppedContainers []ctr.Summary
|
||||
var stoppedContainers []handledContainer
|
||||
var stopErrors []error
|
||||
for _, container := range containersToStop {
|
||||
if err := s.cli.ContainerStop(context.Background(), container.ID, ctr.StopOptions{}); err != nil {
|
||||
if err := s.cli.ContainerStop(context.Background(), container.summary.ID, ctr.StopOptions{}); err != nil {
|
||||
stopErrors = append(stopErrors, err)
|
||||
} else {
|
||||
stoppedContainers = append(stoppedContainers, container)
|
||||
@@ -281,9 +313,14 @@ func (s *script) stopContainersAndServices() (func() error, error) {
|
||||
|
||||
return func() error {
|
||||
var restartErrors []error
|
||||
var restartedContainers []handledContainer
|
||||
matchedServices := map[string]bool{}
|
||||
for _, container := range stoppedContainers {
|
||||
if swarmServiceID, ok := container.Labels["com.docker.swarm.service.id"]; ok && isDockerSwarm {
|
||||
if !container.restart {
|
||||
continue
|
||||
}
|
||||
|
||||
if swarmServiceID, ok := container.summary.Labels["com.docker.swarm.service.id"]; ok && isDockerSwarm {
|
||||
if _, ok := matchedServices[swarmServiceID]; ok {
|
||||
continue
|
||||
}
|
||||
@@ -309,15 +346,22 @@ func (s *script) stopContainersAndServices() (func() error, error) {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := s.cli.ContainerStart(context.Background(), container.ID, ctr.StartOptions{}); err != nil {
|
||||
if err := s.cli.ContainerStart(context.Background(), container.summary.ID, ctr.StartOptions{}); err != nil {
|
||||
restartErrors = append(restartErrors, err)
|
||||
} else {
|
||||
restartedContainers = append(restartedContainers, container)
|
||||
}
|
||||
}
|
||||
|
||||
var scaleUpErrors concurrentSlice[error]
|
||||
var scaledUpServices []handledSwarmService
|
||||
if isDockerSwarm {
|
||||
wg := &sync.WaitGroup{}
|
||||
for _, svc := range servicesToScaleDown {
|
||||
if !svc.restart {
|
||||
continue
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func(svc handledSwarmService) {
|
||||
defer wg.Done()
|
||||
@@ -326,6 +370,9 @@ func (s *script) stopContainersAndServices() (func() error, error) {
|
||||
scaleDownErrors.append(err)
|
||||
return
|
||||
}
|
||||
|
||||
scaledUpServices = append(scaledUpServices, svc)
|
||||
|
||||
for _, warning := range warnings {
|
||||
s.logger.Warn(
|
||||
fmt.Sprintf("The Docker API returned a warning when scaling up service %s: %s", svc.serviceID, warning),
|
||||
@@ -349,14 +396,16 @@ func (s *script) stopContainersAndServices() (func() error, error) {
|
||||
|
||||
s.logger.Info(
|
||||
fmt.Sprintf(
|
||||
"Restarted %d container(s).",
|
||||
"Restarted %d out of %d stopped container(s).",
|
||||
len(restartedContainers),
|
||||
len(stoppedContainers),
|
||||
),
|
||||
)
|
||||
if isDockerSwarm {
|
||||
s.logger.Info(
|
||||
fmt.Sprintf(
|
||||
"Scaled %d service(s) back up.",
|
||||
"Scaled %d out of %d scaled down service(s) back up.",
|
||||
len(scaledUpServices),
|
||||
len(scaledDownServices),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
ctr "github.com/docker/docker/api/types/container"
|
||||
"github.com/offen/docker-volume-backup/internal/errwrap"
|
||||
"github.com/robfig/cron/v3"
|
||||
)
|
||||
@@ -64,9 +65,15 @@ func (noopWriteCloser) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type handledContainer struct {
|
||||
summary ctr.Summary
|
||||
restart bool
|
||||
}
|
||||
|
||||
type handledSwarmService struct {
|
||||
serviceID string
|
||||
initialReplicaCount uint64
|
||||
restart bool
|
||||
}
|
||||
|
||||
type concurrentSlice[T any] struct {
|
||||
|
||||
@@ -34,3 +34,29 @@ services:
|
||||
volumes:
|
||||
data:
|
||||
```
|
||||
|
||||
## Stop containers during backup without restarting
|
||||
|
||||
Sometimes you might want to stop containers for the backup but not have them start again automatically, for example if they are normally started by an external process or scheduler.
|
||||
|
||||
For this use case, you can use the label `docker-volume-backup.stop-during-backup-no-restart`.
|
||||
This label is **mutually exclusive** with `docker-volume-backup.stop-during-backup` and performs the same stop operation but skips restarting the container after the backup has finished.
|
||||
|
||||
```yml
|
||||
services:
|
||||
app:
|
||||
# definition for app ...
|
||||
labels:
|
||||
- docker-volume-backup.stop-during-backup-no-restart=service2
|
||||
|
||||
backup:
|
||||
image: offen/docker-volume-backup:v2
|
||||
environment:
|
||||
BACKUP_STOP_DURING_BACKUP__NO_RESTART_LABEL: service2
|
||||
volumes:
|
||||
- data:/backup/my-app-backup:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
|
||||
volumes:
|
||||
data:
|
||||
```
|
||||
|
||||
@@ -539,6 +539,13 @@ The values for each key currently match its default.
|
||||
# you can override this default by specifying a different string value here.
|
||||
# BACKUP_STOP_DURING_BACKUP_LABEL="true"
|
||||
|
||||
# Containers or services can also be stopped for the duration of the backup
|
||||
# without being restarted afterwards by applying a
|
||||
# `docker-volume-backup.stop-during-backup-no-restart` label. This behaves the
|
||||
# same as `docker-volume-backup.stop-during-backup` but is mutually exclusive and
|
||||
# skips restarting the container or service once the backup has finished.
|
||||
# BACKUP_STOP_DURING_BACKUP_NO_RESTART_LABEL="true"
|
||||
|
||||
# When trying to scale down Docker Swarm services, give up after
|
||||
# the specified amount of time in case the service has not converged yet.
|
||||
# In case you need to adjust this timeout, supply a duration
|
||||
|
||||
27
test/no-restart/docker-compose.yml
Normal file
27
test/no-restart/docker-compose.yml
Normal file
@@ -0,0 +1,27 @@
|
||||
services:
|
||||
backup:
|
||||
image: offen/docker-volume-backup:${TEST_VERSION:-canary}
|
||||
hostname: hostnametoken
|
||||
restart: always
|
||||
environment:
|
||||
BACKUP_FILENAME_EXPAND: 'true'
|
||||
BACKUP_FILENAME: test-$$HOSTNAME.tar.gz
|
||||
BACKUP_LATEST_SYMLINK: test-$$HOSTNAME.latest.tar.gz.gpg
|
||||
BACKUP_CRON_EXPRESSION: 0 0 5 31 2 ?
|
||||
BACKUP_RETENTION_DAYS: ${BACKUP_RETENTION_DAYS:-7}
|
||||
BACKUP_PRUNING_LEEWAY: 5s
|
||||
BACKUP_PRUNING_PREFIX: test
|
||||
volumes:
|
||||
- app_data:/backup/app_data:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
- ${LOCAL_DIR:-./local}:/archive
|
||||
|
||||
offen:
|
||||
image: offen/offen:latest
|
||||
labels:
|
||||
- docker-volume-backup.stop-during-backup-no-restart=true
|
||||
volumes:
|
||||
- app_data:/var/opt/offen
|
||||
|
||||
volumes:
|
||||
app_data:
|
||||
76
test/no-restart/run.sh
Executable file
76
test/no-restart/run.sh
Executable file
@@ -0,0 +1,76 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
cd "$(dirname "$0")"
|
||||
. ../util.sh
|
||||
current_test=$(basename $(pwd))
|
||||
|
||||
export LOCAL_DIR=$(mktemp -d)
|
||||
|
||||
docker compose up -d --quiet-pull
|
||||
sleep 5
|
||||
|
||||
# A symlink for a known file in the volume is created so the test can check
|
||||
# whether symlinks are preserved on backup.
|
||||
docker compose exec offen ln -s /var/opt/offen/offen.db /var/opt/offen/db.link
|
||||
docker compose exec backup backup
|
||||
|
||||
sleep 5
|
||||
|
||||
expect_running_containers "1"
|
||||
|
||||
tmp_dir=$(mktemp -d)
|
||||
tar -xvf "$LOCAL_DIR/test-hostnametoken.tar.gz" -C $tmp_dir
|
||||
if [ ! -f "$tmp_dir/backup/app_data/offen.db" ]; then
|
||||
fail "Could not find expected file in untared archive."
|
||||
fi
|
||||
rm -f "$LOCAL_DIR/test-hostnametoken.tar.gz"
|
||||
|
||||
if [ ! -L "$tmp_dir/backup/app_data/db.link" ]; then
|
||||
fail "Could not find expected symlink in untared archive."
|
||||
fi
|
||||
|
||||
pass "Found relevant files in decrypted and untared local backup."
|
||||
|
||||
if [ ! -L "$LOCAL_DIR/test-hostnametoken.latest.tar.gz.gpg" ]; then
|
||||
fail "Could not find symlink to latest version."
|
||||
fi
|
||||
|
||||
pass "Found symlink to latest version in local backup."
|
||||
|
||||
# The second part of this test checks if backups get deleted when the retention
|
||||
# is set to 0 days (which it should not as it would mean all backups get deleted)
|
||||
BACKUP_RETENTION_DAYS="0" docker compose up -d
|
||||
sleep 5
|
||||
|
||||
docker compose exec backup backup
|
||||
|
||||
if [ "$(find "$LOCAL_DIR" -type f | wc -l)" != "1" ]; then
|
||||
fail "Backups should not have been deleted, instead seen: "$(find "$local_dir" -type f)""
|
||||
fi
|
||||
pass "Local backups have not been deleted."
|
||||
|
||||
# The third part of this test checks if old backups get deleted when the retention
|
||||
# is set to 7 days (which it should)
|
||||
|
||||
BACKUP_RETENTION_DAYS="7" docker compose up -d
|
||||
sleep 5
|
||||
|
||||
info "Create first backup with no prune"
|
||||
docker compose exec backup backup
|
||||
|
||||
touch -r "$LOCAL_DIR/test-hostnametoken.tar.gz" -d "14 days ago" "$LOCAL_DIR/test-hostnametoken-old.tar.gz"
|
||||
|
||||
info "Create second backup and prune"
|
||||
docker compose exec backup backup
|
||||
|
||||
if [ -f "$LOCAL_DIR/test-hostnametoken-old.tar.gz" ]; then
|
||||
fail "Backdated file has not been deleted."
|
||||
fi
|
||||
|
||||
if [ ! -f "$LOCAL_DIR/test-hostnametoken.tar.gz" ]; then
|
||||
fail "Recent file has been deleted."
|
||||
fi
|
||||
|
||||
pass "Old remote backup has been pruned, new one is still present."
|
||||
Reference in New Issue
Block a user