testserver: fix tests failing due to stopped servers

Before this fix there were various issues with the test server framework, most noticeably servers stopping when they shouldn't causing timeouts. This was caused by the reference counting in the Go code not being engineered to work in multiple processes so it was not working at all properly. This fix moves the reference counting logic to the start scripts and in turn removes that logic from the Go code. This means that the reference counting is now global and works correctly over multiple processes.
2025-11-23 21:44:49 +02:00 · 2025-11-04 11:08:45 +00:00
parent 700e6e11fd
commit 55655efabf
3 changed files with 173 additions and 109 deletions
--- a/fstest/testserver/init.d/README.md
+++ b/fstest/testserver/init.d/README.md
@@ -1,11 +1,12 @@
 This directory contains scripts to start and stop servers for testing.

-The commands are named after the remotes in use.  They should be
-executable files with the following parameters:
+The commands are named after the remotes in use. They are executable
+files with the following parameters:

-    start  - starts the server
-    stop   - stops the server
+    start  - starts the server if not running
+    stop   - stops the server if nothing is using it
    status - returns non-zero exit code if the server is not running
+    reset  - stops the server and resets any reference counts

 These will be called automatically by test_all if that remote is
 required.
@@ -21,16 +22,22 @@ after the connection succeeds rclone will wait `5s` before continuing.
 This is for servers that aren't quite ready even though they have
 opened their TCP ports.

+## Writing new scripts
+
+A docker based server or an `rclone serve` based server should be easy
+to write. Look at once of the examples.
+
 `run.bash` contains boilerplate to be included in a bash script for
-interpreting the command line parameters.
+interpreting the command line parameters. This does reference counting
+to ensure multiple copies of the server aren't running at once.
+Including this is mandatory. It will call your `start()`, `stop()` and
+`status()` functions.

 `docker.bash` contains library functions to help with docker
-implementations.
-
-## TODO
-
- sftpd - https://github.com/panubo/docker-sshd ?
- openstack swift - https://github.com/bouncestorage/docker-swift
- ceph - https://github.com/ceph/cn
- other ftp servers
+implementations. It contains implementations of `stop()` and
+`status()` so all you have to do is write a `start()` function.

+`rclone-serve.bash` contains functions to help with `rclone serve`
+based implementations. It contains implementations of `stop()` and
+`status()` so all you have to do is write a `start()` function which
+should call the `run()` function provided.
--- a/fstest/testserver/init.d/run.bash
+++ b/fstest/testserver/init.d/run.bash
@@ -1,17 +1,101 @@
 #!/usr/bin/env bash
+set -euo pipefail

-case "$1" in 
-    start)
-	start
-	;;
-    stop)
-	stop
-	;;
-    status)
-	status
-	;;
-    *)
-	echo "usage: $0 start|stop|status" >&2
-	exit 1
-	;;
+BASE="${STATE_DIR:-${XDG_RUNTIME_DIR:-/tmp}/rclone-test-server}"
+NAME="$(basename "$0")"
+ROOT="${BASE}/${NAME}"
+STATE="${ROOT}/state"
+LOCKF="${ROOT}/lock"
+REFC="${STATE}/refcount"
+ENVF="${STATE}/env"
+
+mkdir -p "${STATE}"
+[[ -f "${REFC}" ]] || echo 0 >"${REFC}"
+[[ -f "${ENVF}" ]] || : >"${ENVF}"
+: > "${LOCKF}"  # ensure file exists
+
+# status helper that won't trip set -e
+_is_running() { set +e; status >/dev/null 2>&1; local rc=$?; set -e; return $rc; }
+
+_acquire_lock() {
+  # open fd 9 on lock file and take exclusive lock
+  exec 9>"${LOCKF}"
+  flock -x 9
+}
+
+_release_lock() {
+  flock -u 9
+  exec 9>&-
+}
+
+case "${1:-}" in
+  start)
+    _acquire_lock
+    trap '_release_lock' EXIT
+
+    rc=$(cat "${REFC}" 2>/dev/null || echo 0)
+
+    if (( rc == 0 )); then
+      # First client: ensure a clean instance, then start and cache env
+      if _is_running; then
+        stop || true
+      fi
+      if ! out="$(start)"; then
+        echo "failed to start" >&2
+        exit 1
+      fi
+      printf "%s\n" "$out" > "${ENVF}"
+    else
+      # Already owned: make sure it’s still up; if not, restart and refresh env
+      if ! _is_running; then
+        if ! out="$(start)"; then
+          echo "failed to restart" >&2
+          exit 1
+        fi
+        printf "%s\n" "$out" > "${ENVF}"
+      fi
+    fi
+
+    rc=$((rc+1)); echo "${rc}" > "${REFC}"
+    cat "${ENVF}"
+
+    trap - EXIT
+    _release_lock
+    ;;
+
+  stop)
+    _acquire_lock
+    trap '_release_lock' EXIT
+
+    rc=$(cat "${REFC}" 2>/dev/null || echo 0)
+    if (( rc > 0 )); then rc=$((rc-1)); fi
+    echo "${rc}" > "${REFC}"
+    if (( rc == 0 )) && _is_running; then
+      stop || true
+    fi
+
+    trap - EXIT
+    _release_lock
+    ;;
+
+  reset)
+    _acquire_lock
+    trap '_release_lock' EXIT
+
+    stop || true
+    rm -rf "${BASE}"
+
+    trap - EXIT
+    _release_lock
+    ;;
+
+  status)
+    # passthrough; do NOT take the lock
+    status
+    ;;
+
+  *)
+    echo "usage: $0 {start|stop|reset|status}" >&2
+    exit 2
+    ;;
 esac
--- a/fstest/testserver/testserver.go
+++ b/fstest/testserver/testserver.go
@@ -19,12 +19,8 @@ import (
 )

 var (
-	once      sync.Once
-	configDir string // where the config is stored
-	// Note of running servers
-	runningMu   sync.Mutex
-	running     = map[string]int{}
-	errNotFound = errors.New("command not found")
+	findConfigOnce sync.Once
+	configDir      string // where the config is stored
 )

 // Assume we are run somewhere within the rclone root
@@ -42,25 +38,26 @@ func findConfig() (string, error) {
 	return "", errors.New("couldn't find testserver config files - run from within rclone source")
 }

-// run the command returning the output and an error
-func run(name, command string) (out []byte, err error) {
-	cmdPath := filepath.Join(configDir, name)
-	fi, err := os.Stat(cmdPath)
-	if err != nil || fi.IsDir() {
-		return nil, errNotFound
-	}
-	cmd := exec.Command(cmdPath, command)
-	out, err = cmd.CombinedOutput()
-	if err != nil {
-		err = fmt.Errorf("failed to run %s %s\n%s: %w", cmdPath, command, string(out), err)
-	}
-	return out, err
+// returns path to a script to start this server
+func cmdPath(name string) string {
+	return filepath.Join(configDir, name)
 }

-// Check to see if the server is running
-func isRunning(name string) bool {
-	_, err := run(name, "status")
-	return err == nil
+// return true if the server with name has a start command
+func hasStartCommand(name string) bool {
+	fi, err := os.Stat(cmdPath(name))
+	return err == nil && !fi.IsDir()
+}
+
+// run the command returning the output and an error
+func run(name, command string) (out []byte, err error) {
+	script := cmdPath(name)
+	cmd := exec.Command(script, command)
+	out, err = cmd.CombinedOutput()
+	if err != nil {
+		err = fmt.Errorf("failed to run %s %s\n%s: %w", script, command, string(out), err)
+	}
+	return out, err
 }

 // envKey returns the environment variable name to set name, key
@@ -71,8 +68,7 @@ func envKey(name, key string) string {
 // match a line of config var=value
 var matchLine = regexp.MustCompile(`^([a-zA-Z_]+)=(.*)$`)

-// Start the server and set its env vars
-// Call with the mutex held
+// Start the server and env vars so rclone can use it
 func start(name string) error {
 	fs.Logf(name, "Starting server")
 	out, err := run(name, "start")
@@ -144,82 +140,59 @@ func start(name string) error {
 	return fmt.Errorf("failed to connect to %q on %q", name, connect)
 }

-// Start starts the named test server which can be stopped by the
-// function returned.
-func Start(remoteName string) (fn func(), err error) {
-	if remoteName == "" {
-		// don't start the local backend
-		return func() {}, nil
+// Stops the named test server
+func stop(name string) {
+	fs.Logf(name, "Stopping server")
+	_, err := run(name, "stop")
+	if err != nil {
+		fs.Errorf(name, "Failed to stop server: %v", err)
 	}
-	parsed, err := fspath.Parse(remoteName)
+}
+
+// No server to stop so do nothing
+func stopNothing() {
+}
+
+// Start starts the test server for remoteName.
+//
+// This must be stopped by calling the function returned when finished.
+func Start(remote string) (fn func(), err error) {
+	// don't start the local backend
+	if remote == "" {
+		return stopNothing, nil
+	}
+	parsed, err := fspath.Parse(remote)
 	if err != nil {
 		return nil, err
 	}
 	name := parsed.ConfigString
+	// don't start the local backend
 	if name == "" {
-		// don't start the local backend
-		return func() {}, nil
+		return stopNothing, nil
 	}

 	// Make sure we know where the config is
-	once.Do(func() {
+	findConfigOnce.Do(func() {
 		configDir, err = findConfig()
 	})
 	if err != nil {
 		return nil, err
 	}

-	runningMu.Lock()
-	defer runningMu.Unlock()
-
-	if running[name] <= 0 {
-		// if server isn't running check to see if this server has
-		// been started already but not by us and stop it if so
-		const maxTries = 10
-		for i := 1; i <= maxTries; i++ {
-			if !isRunning(name) {
-				fs.Logf(name, "Stopped server")
-				break
-			}
-			if i != 1 {
-				time.Sleep(time.Second)
-				fs.Logf(name, "Attempting to stop %s try %d/%d", name, i, maxTries)
-			}
-			stop(name)
-		}
-		if !isRunning(name) {
-			err = start(name)
-			if err == errNotFound {
-				// if no file found then don't start or stop
-				return func() {}, nil
-			} else if err != nil {
-				return nil, err
-			}
-			running[name] = 0
-		} else {
-			running[name] = 1
-		}
+	// If remote has no start command then do nothing
+	if !hasStartCommand(name) {
+		return stopNothing, nil
 	}
-	running[name]++

+	// Start the server
+	err = start(name)
+	if err != nil {
+		return nil, err
+	}
+
+	// And return a function to stop it
 	return func() {
-		runningMu.Lock()
-		defer runningMu.Unlock()
 		stop(name)
 	}, nil

 }
-
-// Stops the named test server
-// Call with the mutex held
-func stop(name string) {
-	running[name]--
-	if running[name] <= 0 {
-		_, err := run(name, "stop")
-		if err != nil {
-			fs.Errorf(name, "Failed to stop server: %v", err)
-		}
-		running[name] = 0
-		fs.Logf(name, "Stopping server")
-	}
-}