1
0
mirror of https://github.com/offen/docker-volume-backup.git synced 2025-06-14 22:25:03 +02:00

Improve Swarm support (#333)

* Query for labeled services as well

* Try scaling down services

* Scale services back up

* Use progress tool from Docker CLI

* In test, label both services

* Clean up error and log messages

* Document scale-up/down approach in docs

* Downgrade Docker CLI to match client

* Document services stats

* Do not rely on PreviousSpec for storing desired replica count

* Log warnings from Docker when updating services

* Check whether container and service labels collide

* Document script behavior on label collision

* Add additional check if all containers have been removed

* Scale services concurrently

* Move docker interaction code into own file

* Factor out code for service updating

* Time out after five minutes of not reaching desired container count

* Inline handling of in-swarm container level restart

* Timer is more suitable for timeout race

* Timeout when scaling down services should be configurable

* Choose better filename

* Reflect changes in naming

* Rename and deprecate BACKUP_STOP_CONTAINER_LABEL

* Improve logging

* Further simplify logging
This commit is contained in:
Frederik Ring
2024-01-31 12:17:41 +01:00
committed by GitHub
parent 2065fb2815
commit c3daeacecb
18 changed files with 640 additions and 145 deletions

View File

@ -5,8 +5,6 @@ package main
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/fs"
@ -30,10 +28,6 @@ import (
openpgp "github.com/ProtonMail/go-crypto/openpgp/v2"
"github.com/containrrr/shoutrrr"
"github.com/containrrr/shoutrrr/pkg/router"
"github.com/docker/docker/api/types"
ctr "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/client"
"github.com/leekchan/timeutil"
"github.com/offen/envconfig"
@ -318,126 +312,6 @@ func newScript() (*script, error) {
return s, nil
}
// stopContainers stops all Docker containers that are marked as to being
// stopped during the backup and returns a function that can be called to
// restart everything that has been stopped.
func (s *script) stopContainers() (func() error, error) {
if s.cli == nil {
return noop, nil
}
allContainers, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{})
if err != nil {
return noop, fmt.Errorf("stopContainers: error querying for containers: %w", err)
}
containerLabel := fmt.Sprintf(
"docker-volume-backup.stop-during-backup=%s",
s.c.BackupStopContainerLabel,
)
containersToStop, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{
Filters: filters.NewArgs(filters.KeyValuePair{
Key: "label",
Value: containerLabel,
}),
})
if err != nil {
return noop, fmt.Errorf("stopContainers: error querying for containers to stop: %w", err)
}
if len(containersToStop) == 0 {
return noop, nil
}
s.logger.Info(
fmt.Sprintf(
"Stopping %d container(s) labeled `%s` out of %d running container(s).",
len(containersToStop),
containerLabel,
len(allContainers),
),
)
var stoppedContainers []types.Container
var stopErrors []error
for _, container := range containersToStop {
if err := s.cli.ContainerStop(context.Background(), container.ID, ctr.StopOptions{}); err != nil {
stopErrors = append(stopErrors, err)
} else {
stoppedContainers = append(stoppedContainers, container)
}
}
var stopError error
if len(stopErrors) != 0 {
stopError = fmt.Errorf(
"stopContainers: %d error(s) stopping containers: %w",
len(stopErrors),
errors.Join(stopErrors...),
)
}
s.stats.Containers = ContainersStats{
All: uint(len(allContainers)),
ToStop: uint(len(containersToStop)),
Stopped: uint(len(stoppedContainers)),
}
return func() error {
servicesRequiringUpdate := map[string]struct{}{}
var restartErrors []error
for _, container := range stoppedContainers {
if swarmServiceName, ok := container.Labels["com.docker.swarm.service.name"]; ok {
servicesRequiringUpdate[swarmServiceName] = struct{}{}
continue
}
if err := s.cli.ContainerStart(context.Background(), container.ID, types.ContainerStartOptions{}); err != nil {
restartErrors = append(restartErrors, err)
}
}
if len(servicesRequiringUpdate) != 0 {
services, _ := s.cli.ServiceList(context.Background(), types.ServiceListOptions{})
for serviceName := range servicesRequiringUpdate {
var serviceMatch swarm.Service
for _, service := range services {
if service.Spec.Name == serviceName {
serviceMatch = service
break
}
}
if serviceMatch.ID == "" {
return fmt.Errorf("stopContainers: couldn't find service with name %s", serviceName)
}
serviceMatch.Spec.TaskTemplate.ForceUpdate += 1
if _, err := s.cli.ServiceUpdate(
context.Background(), serviceMatch.ID,
serviceMatch.Version, serviceMatch.Spec, types.ServiceUpdateOptions{},
); err != nil {
restartErrors = append(restartErrors, err)
}
}
}
if len(restartErrors) != 0 {
return fmt.Errorf(
"stopContainers: %d error(s) restarting containers and services: %w",
len(restartErrors),
errors.Join(restartErrors...),
)
}
s.logger.Info(
fmt.Sprintf(
"Restarted %d container(s) and the matching service(s).",
len(stoppedContainers),
),
)
return nil
}, stopError
}
// createArchive creates a tar archive of the configured backup location and
// saves it to disk.
func (s *script) createArchive() error {
@ -448,7 +322,7 @@ func (s *script) createArchive() error {
"Using BACKUP_FROM_SNAPSHOT has been deprecated and will be removed in the next major version.",
)
s.logger.Warn(
"Please use `archive-pre` and `archive-post` commands to prepare your backup sources. Refer to the README for an upgrade guide.",
"Please use `archive-pre` and `archive-post` commands to prepare your backup sources. Refer to the documentation for an upgrade guide.",
)
backupSources = filepath.Join("/tmp", s.c.BackupSources)
// copy before compressing guard against a situation where backup folder's content are still growing.