diff --git a/.gitignore b/.gitignore index 6d5b8eab..21c645e3 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ dist/ # exuberant ctags tags -/bin +/bin/* +!/bin/.keep /testdata/vars/v1 /tmp diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fc77295..98cd2a4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## Unreleased +- Fix behavior of interrupt (SIGINT, SIGTERM) signals. Task will now give time + for the processes running to do cleanup work + ([#458](https://github.com/go-task/task/issues/458), [#479](https://github.com/go-task/task/pull/479), [#728](https://github.com/go-task/task/issues/728)). - Add new `--exit-code` (`-x`) flag that will pass-through the exit form the command being ran ([#755](https://github.com/go-task/task/pull/755)). diff --git a/Taskfile.yml b/Taskfile.yml index 3bb0065a..709cddcd 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -23,6 +23,8 @@ tasks: install: desc: Installs Task + sources: + - './**/*.go' cmds: - go install -v -ldflags="-w -s -X main.version={{.GIT_COMMIT}}" ./cmd/task @@ -40,12 +42,30 @@ tasks: lint: desc: Runs golangci-lint + sources: + - './**/*.go' cmds: - golangci-lint run + sleepit:build: + desc: Builds the sleepit test helper + sources: + - ./cmd/sleepit/**/*.go + generates: + - ./bin/sleepit + cmds: + - go build -o ./bin/sleepit{{exeExt}} ./cmd/sleepit + + sleepit:run: + desc: Builds the sleepit test helper + deps: [sleepit:build] + cmds: + - ./bin/sleepit {{.CLI_ARGS}} + silent: true + test: desc: Runs test suite - deps: [install] + deps: [install, sleepit:build] cmds: - go test {{catLines .GO_PACKAGES}} diff --git a/bin/.keep b/bin/.keep new file mode 100644 index 00000000..e69de29b diff --git a/cmd/sleepit/sleepit.go b/cmd/sleepit/sleepit.go new file mode 100644 index 00000000..90201d1c --- /dev/null +++ b/cmd/sleepit/sleepit.go @@ -0,0 +1,173 @@ +// This code is released under the MIT License +// Copyright (c) 2020 Marco Molteni and the timeit contributors. + +package main + +import ( + "flag" + "fmt" + "os" + "os/signal" + "time" +) + +const usage = `sleepit: sleep for the specified duration, optionally handling signals +When the line "sleepit: ready" is printed, it means that it is safe to send signals to it +Usage: sleepit [] +Commands + default Use default action: on reception of SIGINT terminate abruptly + handle Handle signals: on reception of SIGINT perform cleanup before exiting + version Show the sleepit version` + +var ( + // Filled by the linker. + fullVersion = "unknown" // example: v0.0.9-8-g941583d027-dirty +) + +func main() { + os.Exit(run(os.Args[1:])) +} + +func run(args []string) int { + if len(args) < 1 { + fmt.Fprintln(os.Stderr, usage) + return 2 + } + + defaultCmd := flag.NewFlagSet("default", flag.ExitOnError) + defaultSleep := defaultCmd.Duration("sleep", 5*time.Second, "Sleep duration") + + handleCmd := flag.NewFlagSet("handle", flag.ExitOnError) + handleSleep := handleCmd.Duration("sleep", 5*time.Second, "Sleep duration") + handleCleanup := handleCmd.Duration("cleanup", 5*time.Second, "Cleanup duration") + handleTermAfter := handleCmd.Int("term-after", 0, + "Terminate immediately after `N` signals.\n"+ + "Default is to terminate only when the cleanup phase has completed.") + + versionCmd := flag.NewFlagSet("version", flag.ExitOnError) + + switch args[0] { + + case "default": + _ = defaultCmd.Parse(args[1:]) + if len(defaultCmd.Args()) > 0 { + fmt.Fprintf(os.Stderr, "default: unexpected arguments: %v\n", defaultCmd.Args()) + return 2 + } + return supervisor(*defaultSleep, 0, 0, nil) + + case "handle": + _ = handleCmd.Parse(args[1:]) + if *handleTermAfter == 1 { + fmt.Fprintf(os.Stderr, "handle: term-after cannot be 1\n") + return 2 + } + if len(handleCmd.Args()) > 0 { + fmt.Fprintf(os.Stderr, "handle: unexpected arguments: %v\n", handleCmd.Args()) + return 2 + } + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, os.Interrupt) // Ctrl-C -> SIGINT + return supervisor(*handleSleep, *handleCleanup, *handleTermAfter, sigCh) + + case "version": + _ = versionCmd.Parse(args[1:]) + if len(versionCmd.Args()) > 0 { + fmt.Fprintf(os.Stderr, "version: unexpected arguments: %v\n", versionCmd.Args()) + return 2 + } + fmt.Printf("sleepit version %s\n", fullVersion) + return 0 + + default: + fmt.Fprintln(os.Stderr, usage) + return 2 + } +} + +func supervisor( + sleep time.Duration, + cleanup time.Duration, + termAfter int, + sigCh <-chan os.Signal, +) int { + fmt.Printf("sleepit: ready\n") + fmt.Printf("sleepit: PID=%d sleep=%v cleanup=%v\n", + os.Getpid(), sleep, cleanup) + + cancelWork := make(chan struct{}) + workerDone := worker(cancelWork, sleep, "work") + + cancelCleaner := make(chan struct{}) + var cleanerDone <-chan struct{} + + sigCount := 0 + for { + select { + case sig := <-sigCh: + sigCount++ + fmt.Printf("sleepit: got signal=%s count=%d\n", sig, sigCount) + if sigCount == 1 { + // since `cancelWork` is unbuffered, sending will be synchronous: + // we are ensured that the worker has terminated before starting cleanup. + // This is important in some real-life situations. + cancelWork <- struct{}{} + cleanerDone = worker(cancelCleaner, cleanup, "cleanup") + } + if sigCount == termAfter { + cancelCleaner <- struct{}{} + return 4 + } + case <-workerDone: + return 0 + case <-cleanerDone: + return 3 + } + } +} + +// Start a worker goroutine and return immediately a `workerDone` channel. +// The goroutine will prepend its prints with the prefix `name`. +// The goroutine will simulate some work and will terminate when one of the following +// conditions happens: +// 1. When `howlong` is elapsed. This case will be signaled on the `workerDone` channel. +// 2. When something happens on channel `canceled`. Note that this simulates real-life, +// so cancellation is not instantaneous: if the caller wants a synchronous cancel, +// it should send a message; if instead it wants an asynchronous cancel, it should +// close the channel. +func worker( + canceled <-chan struct{}, + howlong time.Duration, + name string, +) <-chan struct{} { + workerDone := make(chan struct{}) + deadline := time.Now().Add(howlong) + go func() { + fmt.Printf("sleepit: %s started\n", name) + for { + select { + case <-canceled: + fmt.Printf("sleepit: %s canceled\n", name) + return + default: + if doSomeWork(deadline) { + fmt.Printf("sleepit: %s done\n", name) // <== NOTE THIS LINE + workerDone <- struct{}{} + return + } + } + } + }() + return workerDone +} + +// Do some work and then return, so that the caller can decide wether to continue or not. +// Return true when all work is done. +func doSomeWork(deadline time.Time) bool { + if time.Now().After(deadline) { + return true + } + timeout := 100 * time.Millisecond + time.Sleep(timeout) + return false +} diff --git a/cmd/task/task.go b/cmd/task/task.go index 7b9c8202..bc788e39 100644 --- a/cmd/task/task.go +++ b/cmd/task/task.go @@ -5,11 +5,9 @@ import ( "fmt" "log" "os" - "os/signal" "path/filepath" "runtime/debug" "strings" - "syscall" "github.com/spf13/pflag" "mvdan.cc/sh/v3/syntax" @@ -204,11 +202,12 @@ func main() { globals.Set("CLI_ARGS", taskfile.Var{Static: cliArgs}) e.Taskfile.Vars.Merge(globals) - ctx := context.Background() if !watch { - ctx = getSignalContext() + e.InterceptInterruptSignals() } + ctx := context.Background() + if status { if err := e.Status(ctx, calls...); err != nil { log.Fatal(err) @@ -249,18 +248,6 @@ func getArgs() ([]string, string, error) { return args[:doubleDashPos], strings.Join(quotedCliArgs, " "), nil } -func getSignalContext() context.Context { - ch := make(chan os.Signal, 1) - signal.Notify(ch, os.Interrupt, syscall.SIGTERM) - ctx, cancel := context.WithCancel(context.Background()) - go func() { - sig := <-ch - log.Printf("task: signal received: %s", sig) - cancel() - }() - return ctx -} - func getVersion() string { if version != "" { return version diff --git a/internal/execext/exec.go b/internal/execext/exec.go index d3283fb8..d7107ca1 100644 --- a/internal/execext/exec.go +++ b/internal/execext/exec.go @@ -7,6 +7,7 @@ import ( "os" "path/filepath" "strings" + "time" "mvdan.cc/sh/v3/expand" "mvdan.cc/sh/v3/interp" @@ -48,6 +49,7 @@ func RunCommand(ctx context.Context, opts *RunCommandOptions) error { r, err := interp.New( interp.Params("-e"), interp.Env(expand.ListEnviron(environ...)), + interp.ExecHandler(interp.DefaultExecHandler(15*time.Second)), interp.OpenHandler(openHandler), interp.StdIO(opts.Stdin, opts.Stdout, opts.Stderr), dirOption(opts.Dir), diff --git a/signals.go b/signals.go new file mode 100644 index 00000000..09a23c04 --- /dev/null +++ b/signals.go @@ -0,0 +1,31 @@ +package task + +import ( + "os" + "os/signal" + "syscall" + + "github.com/go-task/task/v3/internal/logger" +) + +// NOTE(@andreynering): This function intercepts SIGINT and SIGTERM signals +// so the Task process is not killed immediatelly and processes running have +// time to do cleanup work. +func (e *Executor) InterceptInterruptSignals() { + ch := make(chan os.Signal, 3) + signal.Notify(ch, os.Interrupt, syscall.SIGTERM) + + go func() { + for i := 1; i <= 3; i++ { + sig := <-ch + + if i < 3 { + e.Logger.Outf(logger.Yellow, `task: Signal received: "%s"`, sig) + continue + } + + e.Logger.Errf(logger.Red, `task: Signal received for the third time: "%s". Forcing shutdown`, sig) + os.Exit(1) + } + }() +} diff --git a/unix_test.go b/unix_test.go new file mode 100644 index 00000000..6c302910 --- /dev/null +++ b/unix_test.go @@ -0,0 +1,245 @@ +//go:build !windows +// +build !windows + +// This file contains tests for signal handling on Unix. +// Based on code from https://github.com/marco-m/timeit +// Due to how signals work, for robustness we always spawn a separate process; +// we never send signals to the test process. + +package task_test + +import ( + "bytes" + "errors" + "os" + "os/exec" + "path/filepath" + "strings" + "syscall" + "testing" + "time" +) + +var ( + SLEEPIT, _ = filepath.Abs("./bin/sleepit") +) + +func TestSignalSentToProcessGroup(t *testing.T) { + task, err := getTaskPath() + if err != nil { + t.Fatal(err) + } + + testCases := map[string]struct { + args []string + sendSigs int + want []string + notWant []string + }{ + // regression: + // - child is terminated, immediately, by "context canceled" (another bug???) + "child does not handle sigint: receives sigint and terminates immediately": { + args: []string{task, "--", SLEEPIT, "default", "-sleep=10s"}, + sendSigs: 1, + want: []string{ + "sleepit: ready\n", + "sleepit: work started\n", + "task: Signal received: \"interrupt\"\n", + // 130 = 128 + SIGINT + "task: Failed to run task \"default\": exit status 130\n", + }, + notWant: []string{ + "task: Failed to run task \"default\": context canceled\n", + }, + }, + // 2 regressions: + // - child receives 2 signals instead of 1 + // - child is terminated, immediately, by "context canceled" (another bug???) + // TODO we need -cleanup=2s only to show reliably the bug; once the fix is committed, + // we can use -cleanup=50ms to speed the test up + "child intercepts sigint: receives sigint and does cleanup": { + args: []string{task, "--", SLEEPIT, "handle", "-sleep=10s", "-cleanup=2s"}, + sendSigs: 1, + want: []string{ + "sleepit: ready\n", + "sleepit: work started\n", + "task: Signal received: \"interrupt\"\n", + "sleepit: got signal=interrupt count=1\n", + "sleepit: work canceled\n", + "sleepit: cleanup started\n", + "sleepit: cleanup done\n", + "task: Failed to run task \"default\": exit status 3\n", + }, + notWant: []string{ + "sleepit: got signal=interrupt count=2\n", + "task: Failed to run task \"default\": context canceled\n", + }, + }, + // regression: child receives 2 signal instead of 1 and thus terminates abruptly + "child simulates terraform: receives 1 sigint and does cleanup": { + args: []string{task, "--", SLEEPIT, "handle", "-term-after=2", "-sleep=10s", "-cleanup=50ms"}, + sendSigs: 1, + want: []string{ + "sleepit: ready\n", + "sleepit: work started\n", + "task: Signal received: \"interrupt\"\n", + "sleepit: got signal=interrupt count=1\n", + "sleepit: work canceled\n", + "sleepit: cleanup started\n", + "sleepit: cleanup done\n", + "task: Failed to run task \"default\": exit status 3\n", + }, + notWant: []string{ + "sleepit: got signal=interrupt count=2\n", + "sleepit: cleanup canceled\n", + "task: Failed to run task \"default\": exit status 4\n", + }, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + var out bytes.Buffer + sut := exec.Command(tc.args[0], tc.args[1:]...) + sut.Stdout = &out + sut.Stderr = &out + sut.Dir = "testdata/ignore_signals" + // Create a new process group by setting the process group ID of the child + // to the child PID. + // By default, the child would inherit the process group of the parent, but + // we want to avoid this, to protect the parent (the test process) from the + // signal that this test will send. More info in the comments below for + // syscall.Kill(). + sut.SysProcAttr = &syscall.SysProcAttr{Setpgid: true, Pgid: 0} + + if err := sut.Start(); err != nil { + t.Fatalf("starting the SUT process: %v", err) + } + + // After the child is started, we want to avoid a race condition where we send + // it a signal before it had time to setup its own signal handlers. Sleeping + // is way too flaky, instead we parse the child output until we get a line + // that we know is printed after the signal handlers are installed... + ready := false + timeout := time.Duration(time.Second) + start := time.Now() + for time.Since(start) < timeout { + if strings.Contains(out.String(), "sleepit: ready\n") { + ready = true + break + } + time.Sleep(10 * time.Millisecond) + } + if !ready { + t.Fatalf("sleepit not ready after %v\n"+ + "additional information:\n"+ + " output:\n%s", + timeout, out.String()) + } + + // When we have a running program in a shell and type CTRL-C, the tty driver + // will send a SIGINT signal to all the processes in the foreground process + // group (see https://en.wikipedia.org/wiki/Process_group). + // + // Here we want to emulate this behavior: send SIGINT to the process group of + // the test executable. Although Go for some reasons doesn't wrap the + // killpg(2) system call, what works is using syscall.Kill(-PID, SIGINT), + // where the negative PID means the corresponding process group. Note that + // this negative PID works only as long as the caller of the kill(2) system + // call has a different PID, which is the case for this test. + for i := 1; i <= tc.sendSigs; i++ { + if err := syscall.Kill(-sut.Process.Pid, syscall.SIGINT); err != nil { + t.Fatalf("sending INT signal to the process group: %v", err) + } + time.Sleep(1 * time.Millisecond) + } + + err := sut.Wait() + + // In case of a subprocess failing, Task always returns exit code 1, not the + // exit code returned by the subprocess. This is understandable, since Task + // supports parallel execution: if two parallel subprocess fail, each with a + // different exit code, which one should Task report? This would be a race. + var wantErr *exec.ExitError + const wantExitStatus = 1 // Task always returns exit code 1 in case of error + if errors.As(err, &wantErr) { + if wantErr.ExitCode() != wantExitStatus { + t.Errorf( + "waiting for child process: got exit status %v; want %d\n"+ + "additional information:\n"+ + " process state: %q", + wantErr.ExitCode(), wantExitStatus, wantErr.String()) + } + } else { + t.Errorf("waiting for child process: got unexpected error type %v (%T); want (%T)", + err, err, wantErr) + } + + gotLines := strings.SplitAfter(out.String(), "\n") + notFound := listDifference(tc.want, gotLines) + if len(notFound) > 0 { + t.Errorf("\nwanted but not found:\n%v", notFound) + } + + found := listIntersection(tc.notWant, gotLines) + if len(found) > 0 { + t.Errorf("\nunwanted but found:\n%v", found) + } + + if len(notFound) > 0 || len(found) > 0 { + t.Errorf("\noutput:\n%v", gotLines) + } + }) + } +} + +func getTaskPath() (string, error) { + if info, err := os.Stat("./bin/task"); err == nil { + return info.Name(), nil + } + + if path, err := exec.LookPath("task"); err == nil { + return path, nil + } + + return "", errors.New("task: \"task\" binary was not found!") +} + +// Return the difference of the two lists: the elements that are present in the first +// list, but not in the second one. The notion of presence is not with `=` but with +// string.Contains(l2, l1). +// FIXME this does not enforce ordering. We might want to support both. +func listDifference(lines1, lines2 []string) []string { + difference := []string{} + for _, l1 := range lines1 { + found := false + for _, l2 := range lines2 { + if strings.Contains(l2, l1) { + found = true + break + } + } + if !found { + difference = append(difference, l1) + } + } + + return difference +} + +// Return the intersection of the two lists: the elements that are present in both lists. +// The notion of presence is not with '=' but with string.Contains(l2, l1) +// FIXME this does not enforce ordering. We might want to support both. +func listIntersection(lines1, lines2 []string) []string { + intersection := []string{} + for _, l1 := range lines1 { + for _, l2 := range lines2 { + if strings.Contains(l2, l1) { + intersection = append(intersection, l1) + break + } + } + } + + return intersection +}