1
0
mirror of https://github.com/offen/docker-volume-backup.git synced 2025-11-23 21:44:40 +02:00

Add label to optionally skip container restart after backup (#659)

* Add label to optionally skip container restart after backup

* Add new mutually exclusive label instead

* Simplified `hasLabel`

* removed unnecessary else block

* added new test-case `no-restart` based on test-case `local`

* removed invalid README entry

* added new section to how-tos

* Added configuration reference
This commit is contained in:
Jean Michel
2025-11-01 10:52:56 +01:00
committed by GitHub
parent 52234592b2
commit 60482b2045
7 changed files with 307 additions and 114 deletions

View File

@@ -43,6 +43,7 @@ type Config struct {
BackupPruningPrefix string `split_words:"true"`
BackupStopContainerLabel string `split_words:"true"`
BackupStopDuringBackupLabel string `split_words:"true" default:"true"`
BackupStopDuringBackupNoRestartLabel string `split_words:"true" default:"true"`
BackupStopServiceTimeout time.Duration `split_words:"true" default:"5m"`
BackupFromSnapshot bool `split_words:"true"`
BackupExcludeRegexp RegexpDecoder `split_words:"true"`

View File

@@ -91,6 +91,21 @@ func isSwarm(c interface {
return info.Swarm.LocalNodeState != "" && info.Swarm.LocalNodeState != swarm.LocalNodeStateInactive && info.Swarm.ControlAvailable, nil
}
func hasLabel(labels map[string]string, key, value string) bool {
val, ok := labels[key]
return ok && val == value
}
func checkStopLabels(labels map[string]string, stopDuringBackupLabelValue string, stopDuringBackupNoRestartLabelValue string) (bool, bool, error) {
hasStopDuringBackupLabel := hasLabel(labels, "docker-volume-backup.stop-during-backup", stopDuringBackupLabelValue)
hasStopDuringBackupNoRestartLabel := hasLabel(labels, "docker-volume-backup.stop-during-backup-no-restart", stopDuringBackupNoRestartLabelValue)
if hasStopDuringBackupLabel && hasStopDuringBackupNoRestartLabel {
return hasStopDuringBackupLabel, hasStopDuringBackupNoRestartLabel, errwrap.Wrap(nil, "both docker-volume-backup.stop-during-backup and docker-volume-backup.stop-during-backup-no-restart have been set, cannot continue")
}
return hasStopDuringBackupLabel, hasStopDuringBackupNoRestartLabel, nil
}
// stopContainersAndServices stops all Docker containers that are marked as to being
// stopped during the backup and returns a function that can be called to
// restart everything that has been stopped.
@@ -118,52 +133,67 @@ func (s *script) stopContainersAndServices() (func() error, error) {
labelValue = s.c.BackupStopContainerLabel
}
filterMatchLabel := fmt.Sprintf(
stopDuringBackupLabel := fmt.Sprintf(
"docker-volume-backup.stop-during-backup=%s",
labelValue,
)
stopDuringBackupNoRestartLabel := fmt.Sprintf(
"docker-volume-backup.stop-during-backup-no-restart=%s",
s.c.BackupStopDuringBackupNoRestartLabel,
)
allContainers, err := s.cli.ContainerList(context.Background(), ctr.ListOptions{})
if err != nil {
return noop, errwrap.Wrap(err, "error querying for containers")
}
containersToStop, err := s.cli.ContainerList(context.Background(), ctr.ListOptions{
Filters: filters.NewArgs(filters.KeyValuePair{
Key: "label",
Value: filterMatchLabel,
}),
})
var containersToStop []handledContainer
for _, c := range allContainers {
hasStopDuringBackupLabel, hasStopDuringBackupNoRestartLabel, err := checkStopLabels(c.Labels, labelValue, s.c.BackupStopDuringBackupNoRestartLabel)
if err != nil {
return noop, errwrap.Wrap(err, "error querying for containers to stop")
}
if !hasStopDuringBackupLabel && !hasStopDuringBackupNoRestartLabel {
continue
}
containersToStop = append(containersToStop, handledContainer{
summary: c,
restart: !hasStopDuringBackupNoRestartLabel,
})
}
var allServices []swarm.Service
var servicesToScaleDown []handledSwarmService
if isDockerSwarm {
allServices, err = s.cli.ServiceList(context.Background(), swarm.ServiceListOptions{})
allServices, err = s.cli.ServiceList(context.Background(), swarm.ServiceListOptions{Status: true})
if err != nil {
return noop, errwrap.Wrap(err, "error querying for services")
}
matchingServices, err := s.cli.ServiceList(context.Background(), swarm.ServiceListOptions{
Filters: filters.NewArgs(filters.KeyValuePair{
Key: "label",
Value: filterMatchLabel,
}),
Status: true,
})
for _, service := range allServices {
hasStopDuringBackupLabel, hasStopDuringBackupNoRestartLabel, err := checkStopLabels(service.Spec.Labels, labelValue, s.c.BackupStopDuringBackupNoRestartLabel)
if err != nil {
return noop, errwrap.Wrap(err, "error querying for services to scale down")
}
for _, s := range matchingServices {
if s.Spec.Mode.Replicated == nil {
if !hasStopDuringBackupLabel && !hasStopDuringBackupNoRestartLabel {
continue
}
if service.Spec.Mode.Replicated == nil {
return noop, errwrap.Wrap(
nil,
fmt.Sprintf("only replicated services can be restarted, but found a label on service %s", s.Spec.Name),
fmt.Sprintf("only replicated services can be restarted, but found a label on service %s", service.Spec.Name),
)
}
servicesToScaleDown = append(servicesToScaleDown, handledSwarmService{
serviceID: s.ID,
initialReplicaCount: *s.Spec.Mode.Replicated.Replicas,
serviceID: service.ID,
initialReplicaCount: *service.Spec.Mode.Replicated.Replicas,
restart: !hasStopDuringBackupNoRestartLabel,
})
}
}
@@ -174,7 +204,7 @@ func (s *script) stopContainersAndServices() (func() error, error) {
if isDockerSwarm {
for _, container := range containersToStop {
if swarmServiceID, ok := container.Labels["com.docker.swarm.service.id"]; ok {
if swarmServiceID, ok := container.summary.Labels["com.docker.swarm.service.id"]; ok {
parentService, _, err := s.cli.ServiceInspectWithRaw(context.Background(), swarmServiceID, swarm.ServiceInspectOptions{})
if err != nil {
return noop, errwrap.Wrap(err, fmt.Sprintf("error querying for parent service with ID %s", swarmServiceID))
@@ -185,7 +215,7 @@ func (s *script) stopContainersAndServices() (func() error, error) {
nil,
fmt.Sprintf(
"container %s is labeled to stop but has parent service %s which is also labeled, cannot continue",
container.Names[0],
container.summary.Names[0],
parentService.Spec.Name,
),
)
@@ -197,27 +227,29 @@ func (s *script) stopContainersAndServices() (func() error, error) {
s.logger.Info(
fmt.Sprintf(
"Stopping %d out of %d running container(s) as they were labeled %s.",
"Stopping %d out of %d running container(s) as they were labeled %s or %s.",
len(containersToStop),
len(allContainers),
filterMatchLabel,
stopDuringBackupLabel,
stopDuringBackupNoRestartLabel,
),
)
if isDockerSwarm {
s.logger.Info(
fmt.Sprintf(
"Scaling down %d out of %d active service(s) as they were labeled %s.",
"Scaling down %d out of %d active service(s) as they were labeled %s or %s.",
len(servicesToScaleDown),
len(allServices),
filterMatchLabel,
stopDuringBackupLabel,
stopDuringBackupNoRestartLabel,
),
)
}
var stoppedContainers []ctr.Summary
var stoppedContainers []handledContainer
var stopErrors []error
for _, container := range containersToStop {
if err := s.cli.ContainerStop(context.Background(), container.ID, ctr.StopOptions{}); err != nil {
if err := s.cli.ContainerStop(context.Background(), container.summary.ID, ctr.StopOptions{}); err != nil {
stopErrors = append(stopErrors, err)
} else {
stoppedContainers = append(stoppedContainers, container)
@@ -281,9 +313,14 @@ func (s *script) stopContainersAndServices() (func() error, error) {
return func() error {
var restartErrors []error
var restartedContainers []handledContainer
matchedServices := map[string]bool{}
for _, container := range stoppedContainers {
if swarmServiceID, ok := container.Labels["com.docker.swarm.service.id"]; ok && isDockerSwarm {
if !container.restart {
continue
}
if swarmServiceID, ok := container.summary.Labels["com.docker.swarm.service.id"]; ok && isDockerSwarm {
if _, ok := matchedServices[swarmServiceID]; ok {
continue
}
@@ -309,15 +346,22 @@ func (s *script) stopContainersAndServices() (func() error, error) {
continue
}
if err := s.cli.ContainerStart(context.Background(), container.ID, ctr.StartOptions{}); err != nil {
if err := s.cli.ContainerStart(context.Background(), container.summary.ID, ctr.StartOptions{}); err != nil {
restartErrors = append(restartErrors, err)
} else {
restartedContainers = append(restartedContainers, container)
}
}
var scaleUpErrors concurrentSlice[error]
var scaledUpServices []handledSwarmService
if isDockerSwarm {
wg := &sync.WaitGroup{}
for _, svc := range servicesToScaleDown {
if !svc.restart {
continue
}
wg.Add(1)
go func(svc handledSwarmService) {
defer wg.Done()
@@ -326,6 +370,9 @@ func (s *script) stopContainersAndServices() (func() error, error) {
scaleDownErrors.append(err)
return
}
scaledUpServices = append(scaledUpServices, svc)
for _, warning := range warnings {
s.logger.Warn(
fmt.Sprintf("The Docker API returned a warning when scaling up service %s: %s", svc.serviceID, warning),
@@ -349,14 +396,16 @@ func (s *script) stopContainersAndServices() (func() error, error) {
s.logger.Info(
fmt.Sprintf(
"Restarted %d container(s).",
"Restarted %d out of %d stopped container(s).",
len(restartedContainers),
len(stoppedContainers),
),
)
if isDockerSwarm {
s.logger.Info(
fmt.Sprintf(
"Scaled %d service(s) back up.",
"Scaled %d out of %d scaled down service(s) back up.",
len(scaledUpServices),
len(scaledDownServices),
),
)

View File

@@ -11,6 +11,7 @@ import (
"sync"
"time"
ctr "github.com/docker/docker/api/types/container"
"github.com/offen/docker-volume-backup/internal/errwrap"
"github.com/robfig/cron/v3"
)
@@ -64,9 +65,15 @@ func (noopWriteCloser) Close() error {
return nil
}
type handledContainer struct {
summary ctr.Summary
restart bool
}
type handledSwarmService struct {
serviceID string
initialReplicaCount uint64
restart bool
}
type concurrentSlice[T any] struct {

View File

@@ -34,3 +34,29 @@ services:
volumes:
data:
```
## Stop containers during backup without restarting
Sometimes you might want to stop containers for the backup but not have them start again automatically, for example if they are normally started by an external process or scheduler.
For this use case, you can use the label `docker-volume-backup.stop-during-backup-no-restart`.
This label is **mutually exclusive** with `docker-volume-backup.stop-during-backup` and performs the same stop operation but skips restarting the container after the backup has finished.
```yml
services:
app:
# definition for app ...
labels:
- docker-volume-backup.stop-during-backup-no-restart=service2
backup:
image: offen/docker-volume-backup:v2
environment:
BACKUP_STOP_DURING_BACKUP__NO_RESTART_LABEL: service2
volumes:
- data:/backup/my-app-backup:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
volumes:
data:
```

View File

@@ -539,6 +539,13 @@ The values for each key currently match its default.
# you can override this default by specifying a different string value here.
# BACKUP_STOP_DURING_BACKUP_LABEL="true"
# Containers or services can also be stopped for the duration of the backup
# without being restarted afterwards by applying a
# `docker-volume-backup.stop-during-backup-no-restart` label. This behaves the
# same as `docker-volume-backup.stop-during-backup` but is mutually exclusive and
# skips restarting the container or service once the backup has finished.
# BACKUP_STOP_DURING_BACKUP_NO_RESTART_LABEL="true"
# When trying to scale down Docker Swarm services, give up after
# the specified amount of time in case the service has not converged yet.
# In case you need to adjust this timeout, supply a duration

View File

@@ -0,0 +1,27 @@
services:
backup:
image: offen/docker-volume-backup:${TEST_VERSION:-canary}
hostname: hostnametoken
restart: always
environment:
BACKUP_FILENAME_EXPAND: 'true'
BACKUP_FILENAME: test-$$HOSTNAME.tar.gz
BACKUP_LATEST_SYMLINK: test-$$HOSTNAME.latest.tar.gz.gpg
BACKUP_CRON_EXPRESSION: 0 0 5 31 2 ?
BACKUP_RETENTION_DAYS: ${BACKUP_RETENTION_DAYS:-7}
BACKUP_PRUNING_LEEWAY: 5s
BACKUP_PRUNING_PREFIX: test
volumes:
- app_data:/backup/app_data:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
- ${LOCAL_DIR:-./local}:/archive
offen:
image: offen/offen:latest
labels:
- docker-volume-backup.stop-during-backup-no-restart=true
volumes:
- app_data:/var/opt/offen
volumes:
app_data:

76
test/no-restart/run.sh Executable file
View File

@@ -0,0 +1,76 @@
#!/bin/sh
set -e
cd "$(dirname "$0")"
. ../util.sh
current_test=$(basename $(pwd))
export LOCAL_DIR=$(mktemp -d)
docker compose up -d --quiet-pull
sleep 5
# A symlink for a known file in the volume is created so the test can check
# whether symlinks are preserved on backup.
docker compose exec offen ln -s /var/opt/offen/offen.db /var/opt/offen/db.link
docker compose exec backup backup
sleep 5
expect_running_containers "1"
tmp_dir=$(mktemp -d)
tar -xvf "$LOCAL_DIR/test-hostnametoken.tar.gz" -C $tmp_dir
if [ ! -f "$tmp_dir/backup/app_data/offen.db" ]; then
fail "Could not find expected file in untared archive."
fi
rm -f "$LOCAL_DIR/test-hostnametoken.tar.gz"
if [ ! -L "$tmp_dir/backup/app_data/db.link" ]; then
fail "Could not find expected symlink in untared archive."
fi
pass "Found relevant files in decrypted and untared local backup."
if [ ! -L "$LOCAL_DIR/test-hostnametoken.latest.tar.gz.gpg" ]; then
fail "Could not find symlink to latest version."
fi
pass "Found symlink to latest version in local backup."
# The second part of this test checks if backups get deleted when the retention
# is set to 0 days (which it should not as it would mean all backups get deleted)
BACKUP_RETENTION_DAYS="0" docker compose up -d
sleep 5
docker compose exec backup backup
if [ "$(find "$LOCAL_DIR" -type f | wc -l)" != "1" ]; then
fail "Backups should not have been deleted, instead seen: "$(find "$local_dir" -type f)""
fi
pass "Local backups have not been deleted."
# The third part of this test checks if old backups get deleted when the retention
# is set to 7 days (which it should)
BACKUP_RETENTION_DAYS="7" docker compose up -d
sleep 5
info "Create first backup with no prune"
docker compose exec backup backup
touch -r "$LOCAL_DIR/test-hostnametoken.tar.gz" -d "14 days ago" "$LOCAL_DIR/test-hostnametoken-old.tar.gz"
info "Create second backup and prune"
docker compose exec backup backup
if [ -f "$LOCAL_DIR/test-hostnametoken-old.tar.gz" ]; then
fail "Backdated file has not been deleted."
fi
if [ ! -f "$LOCAL_DIR/test-hostnametoken.tar.gz" ]; then
fail "Recent file has been deleted."
fi
pass "Old remote backup has been pruned, new one is still present."