1
0
mirror of https://github.com/go-task/task.git synced 2024-12-04 10:24:45 +02:00

Fix behavior of interrupt (SIGINT, SIGTERM) signals

Task will now give time for the processes running to do cleanup work

Ref #458
Ref #479
Fixes #728

Co-authored-by: Marco Molteni <marco.molteni@pix4d.com>
Co-authored-by: aliculPix4D <aleksandar.licul_ext@pix4d.com>
This commit is contained in:
Andrey Nering 2022-06-11 21:32:06 -03:00
parent c9a582fbcc
commit 7989f73f06
9 changed files with 480 additions and 18 deletions

3
.gitignore vendored
View File

@ -26,6 +26,7 @@ dist/
# exuberant ctags
tags
/bin
/bin/*
!/bin/.keep
/testdata/vars/v1
/tmp

View File

@ -2,6 +2,9 @@
## Unreleased
- Fix behavior of interrupt (SIGINT, SIGTERM) signals. Task will now give time
for the processes running to do cleanup work
([#458](https://github.com/go-task/task/issues/458), [#479](https://github.com/go-task/task/pull/479), [#728](https://github.com/go-task/task/issues/728)).
- Add new `--exit-code` (`-x`) flag that will pass-through the exit form the
command being ran
([#755](https://github.com/go-task/task/pull/755)).

View File

@ -23,6 +23,8 @@ tasks:
install:
desc: Installs Task
sources:
- './**/*.go'
cmds:
- go install -v -ldflags="-w -s -X main.version={{.GIT_COMMIT}}" ./cmd/task
@ -40,12 +42,30 @@ tasks:
lint:
desc: Runs golangci-lint
sources:
- './**/*.go'
cmds:
- golangci-lint run
sleepit:build:
desc: Builds the sleepit test helper
sources:
- ./cmd/sleepit/**/*.go
generates:
- ./bin/sleepit
cmds:
- go build -o ./bin/sleepit{{exeExt}} ./cmd/sleepit
sleepit:run:
desc: Builds the sleepit test helper
deps: [sleepit:build]
cmds:
- ./bin/sleepit {{.CLI_ARGS}}
silent: true
test:
desc: Runs test suite
deps: [install]
deps: [install, sleepit:build]
cmds:
- go test {{catLines .GO_PACKAGES}}

0
bin/.keep Normal file
View File

173
cmd/sleepit/sleepit.go Normal file
View File

@ -0,0 +1,173 @@
// This code is released under the MIT License
// Copyright (c) 2020 Marco Molteni and the timeit contributors.
package main
import (
"flag"
"fmt"
"os"
"os/signal"
"time"
)
const usage = `sleepit: sleep for the specified duration, optionally handling signals
When the line "sleepit: ready" is printed, it means that it is safe to send signals to it
Usage: sleepit <command> [<args>]
Commands
default Use default action: on reception of SIGINT terminate abruptly
handle Handle signals: on reception of SIGINT perform cleanup before exiting
version Show the sleepit version`
var (
// Filled by the linker.
fullVersion = "unknown" // example: v0.0.9-8-g941583d027-dirty
)
func main() {
os.Exit(run(os.Args[1:]))
}
func run(args []string) int {
if len(args) < 1 {
fmt.Fprintln(os.Stderr, usage)
return 2
}
defaultCmd := flag.NewFlagSet("default", flag.ExitOnError)
defaultSleep := defaultCmd.Duration("sleep", 5*time.Second, "Sleep duration")
handleCmd := flag.NewFlagSet("handle", flag.ExitOnError)
handleSleep := handleCmd.Duration("sleep", 5*time.Second, "Sleep duration")
handleCleanup := handleCmd.Duration("cleanup", 5*time.Second, "Cleanup duration")
handleTermAfter := handleCmd.Int("term-after", 0,
"Terminate immediately after `N` signals.\n"+
"Default is to terminate only when the cleanup phase has completed.")
versionCmd := flag.NewFlagSet("version", flag.ExitOnError)
switch args[0] {
case "default":
_ = defaultCmd.Parse(args[1:])
if len(defaultCmd.Args()) > 0 {
fmt.Fprintf(os.Stderr, "default: unexpected arguments: %v\n", defaultCmd.Args())
return 2
}
return supervisor(*defaultSleep, 0, 0, nil)
case "handle":
_ = handleCmd.Parse(args[1:])
if *handleTermAfter == 1 {
fmt.Fprintf(os.Stderr, "handle: term-after cannot be 1\n")
return 2
}
if len(handleCmd.Args()) > 0 {
fmt.Fprintf(os.Stderr, "handle: unexpected arguments: %v\n", handleCmd.Args())
return 2
}
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt) // Ctrl-C -> SIGINT
return supervisor(*handleSleep, *handleCleanup, *handleTermAfter, sigCh)
case "version":
_ = versionCmd.Parse(args[1:])
if len(versionCmd.Args()) > 0 {
fmt.Fprintf(os.Stderr, "version: unexpected arguments: %v\n", versionCmd.Args())
return 2
}
fmt.Printf("sleepit version %s\n", fullVersion)
return 0
default:
fmt.Fprintln(os.Stderr, usage)
return 2
}
}
func supervisor(
sleep time.Duration,
cleanup time.Duration,
termAfter int,
sigCh <-chan os.Signal,
) int {
fmt.Printf("sleepit: ready\n")
fmt.Printf("sleepit: PID=%d sleep=%v cleanup=%v\n",
os.Getpid(), sleep, cleanup)
cancelWork := make(chan struct{})
workerDone := worker(cancelWork, sleep, "work")
cancelCleaner := make(chan struct{})
var cleanerDone <-chan struct{}
sigCount := 0
for {
select {
case sig := <-sigCh:
sigCount++
fmt.Printf("sleepit: got signal=%s count=%d\n", sig, sigCount)
if sigCount == 1 {
// since `cancelWork` is unbuffered, sending will be synchronous:
// we are ensured that the worker has terminated before starting cleanup.
// This is important in some real-life situations.
cancelWork <- struct{}{}
cleanerDone = worker(cancelCleaner, cleanup, "cleanup")
}
if sigCount == termAfter {
cancelCleaner <- struct{}{}
return 4
}
case <-workerDone:
return 0
case <-cleanerDone:
return 3
}
}
}
// Start a worker goroutine and return immediately a `workerDone` channel.
// The goroutine will prepend its prints with the prefix `name`.
// The goroutine will simulate some work and will terminate when one of the following
// conditions happens:
// 1. When `howlong` is elapsed. This case will be signaled on the `workerDone` channel.
// 2. When something happens on channel `canceled`. Note that this simulates real-life,
// so cancellation is not instantaneous: if the caller wants a synchronous cancel,
// it should send a message; if instead it wants an asynchronous cancel, it should
// close the channel.
func worker(
canceled <-chan struct{},
howlong time.Duration,
name string,
) <-chan struct{} {
workerDone := make(chan struct{})
deadline := time.Now().Add(howlong)
go func() {
fmt.Printf("sleepit: %s started\n", name)
for {
select {
case <-canceled:
fmt.Printf("sleepit: %s canceled\n", name)
return
default:
if doSomeWork(deadline) {
fmt.Printf("sleepit: %s done\n", name) // <== NOTE THIS LINE
workerDone <- struct{}{}
return
}
}
}
}()
return workerDone
}
// Do some work and then return, so that the caller can decide wether to continue or not.
// Return true when all work is done.
func doSomeWork(deadline time.Time) bool {
if time.Now().After(deadline) {
return true
}
timeout := 100 * time.Millisecond
time.Sleep(timeout)
return false
}

View File

@ -5,11 +5,9 @@ import (
"fmt"
"log"
"os"
"os/signal"
"path/filepath"
"runtime/debug"
"strings"
"syscall"
"github.com/spf13/pflag"
"mvdan.cc/sh/v3/syntax"
@ -204,11 +202,12 @@ func main() {
globals.Set("CLI_ARGS", taskfile.Var{Static: cliArgs})
e.Taskfile.Vars.Merge(globals)
ctx := context.Background()
if !watch {
ctx = getSignalContext()
e.InterceptInterruptSignals()
}
ctx := context.Background()
if status {
if err := e.Status(ctx, calls...); err != nil {
log.Fatal(err)
@ -249,18 +248,6 @@ func getArgs() ([]string, string, error) {
return args[:doubleDashPos], strings.Join(quotedCliArgs, " "), nil
}
func getSignalContext() context.Context {
ch := make(chan os.Signal, 1)
signal.Notify(ch, os.Interrupt, syscall.SIGTERM)
ctx, cancel := context.WithCancel(context.Background())
go func() {
sig := <-ch
log.Printf("task: signal received: %s", sig)
cancel()
}()
return ctx
}
func getVersion() string {
if version != "" {
return version

View File

@ -7,6 +7,7 @@ import (
"os"
"path/filepath"
"strings"
"time"
"mvdan.cc/sh/v3/expand"
"mvdan.cc/sh/v3/interp"
@ -48,6 +49,7 @@ func RunCommand(ctx context.Context, opts *RunCommandOptions) error {
r, err := interp.New(
interp.Params("-e"),
interp.Env(expand.ListEnviron(environ...)),
interp.ExecHandler(interp.DefaultExecHandler(15*time.Second)),
interp.OpenHandler(openHandler),
interp.StdIO(opts.Stdin, opts.Stdout, opts.Stderr),
dirOption(opts.Dir),

31
signals.go Normal file
View File

@ -0,0 +1,31 @@
package task
import (
"os"
"os/signal"
"syscall"
"github.com/go-task/task/v3/internal/logger"
)
// NOTE(@andreynering): This function intercepts SIGINT and SIGTERM signals
// so the Task process is not killed immediatelly and processes running have
// time to do cleanup work.
func (e *Executor) InterceptInterruptSignals() {
ch := make(chan os.Signal, 3)
signal.Notify(ch, os.Interrupt, syscall.SIGTERM)
go func() {
for i := 1; i <= 3; i++ {
sig := <-ch
if i < 3 {
e.Logger.Outf(logger.Yellow, `task: Signal received: "%s"`, sig)
continue
}
e.Logger.Errf(logger.Red, `task: Signal received for the third time: "%s". Forcing shutdown`, sig)
os.Exit(1)
}
}()
}

245
unix_test.go Normal file
View File

@ -0,0 +1,245 @@
//go:build !windows
// +build !windows
// This file contains tests for signal handling on Unix.
// Based on code from https://github.com/marco-m/timeit
// Due to how signals work, for robustness we always spawn a separate process;
// we never send signals to the test process.
package task_test
import (
"bytes"
"errors"
"os"
"os/exec"
"path/filepath"
"strings"
"syscall"
"testing"
"time"
)
var (
SLEEPIT, _ = filepath.Abs("./bin/sleepit")
)
func TestSignalSentToProcessGroup(t *testing.T) {
task, err := getTaskPath()
if err != nil {
t.Fatal(err)
}
testCases := map[string]struct {
args []string
sendSigs int
want []string
notWant []string
}{
// regression:
// - child is terminated, immediately, by "context canceled" (another bug???)
"child does not handle sigint: receives sigint and terminates immediately": {
args: []string{task, "--", SLEEPIT, "default", "-sleep=10s"},
sendSigs: 1,
want: []string{
"sleepit: ready\n",
"sleepit: work started\n",
"task: Signal received: \"interrupt\"\n",
// 130 = 128 + SIGINT
"task: Failed to run task \"default\": exit status 130\n",
},
notWant: []string{
"task: Failed to run task \"default\": context canceled\n",
},
},
// 2 regressions:
// - child receives 2 signals instead of 1
// - child is terminated, immediately, by "context canceled" (another bug???)
// TODO we need -cleanup=2s only to show reliably the bug; once the fix is committed,
// we can use -cleanup=50ms to speed the test up
"child intercepts sigint: receives sigint and does cleanup": {
args: []string{task, "--", SLEEPIT, "handle", "-sleep=10s", "-cleanup=2s"},
sendSigs: 1,
want: []string{
"sleepit: ready\n",
"sleepit: work started\n",
"task: Signal received: \"interrupt\"\n",
"sleepit: got signal=interrupt count=1\n",
"sleepit: work canceled\n",
"sleepit: cleanup started\n",
"sleepit: cleanup done\n",
"task: Failed to run task \"default\": exit status 3\n",
},
notWant: []string{
"sleepit: got signal=interrupt count=2\n",
"task: Failed to run task \"default\": context canceled\n",
},
},
// regression: child receives 2 signal instead of 1 and thus terminates abruptly
"child simulates terraform: receives 1 sigint and does cleanup": {
args: []string{task, "--", SLEEPIT, "handle", "-term-after=2", "-sleep=10s", "-cleanup=50ms"},
sendSigs: 1,
want: []string{
"sleepit: ready\n",
"sleepit: work started\n",
"task: Signal received: \"interrupt\"\n",
"sleepit: got signal=interrupt count=1\n",
"sleepit: work canceled\n",
"sleepit: cleanup started\n",
"sleepit: cleanup done\n",
"task: Failed to run task \"default\": exit status 3\n",
},
notWant: []string{
"sleepit: got signal=interrupt count=2\n",
"sleepit: cleanup canceled\n",
"task: Failed to run task \"default\": exit status 4\n",
},
},
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
var out bytes.Buffer
sut := exec.Command(tc.args[0], tc.args[1:]...)
sut.Stdout = &out
sut.Stderr = &out
sut.Dir = "testdata/ignore_signals"
// Create a new process group by setting the process group ID of the child
// to the child PID.
// By default, the child would inherit the process group of the parent, but
// we want to avoid this, to protect the parent (the test process) from the
// signal that this test will send. More info in the comments below for
// syscall.Kill().
sut.SysProcAttr = &syscall.SysProcAttr{Setpgid: true, Pgid: 0}
if err := sut.Start(); err != nil {
t.Fatalf("starting the SUT process: %v", err)
}
// After the child is started, we want to avoid a race condition where we send
// it a signal before it had time to setup its own signal handlers. Sleeping
// is way too flaky, instead we parse the child output until we get a line
// that we know is printed after the signal handlers are installed...
ready := false
timeout := time.Duration(time.Second)
start := time.Now()
for time.Since(start) < timeout {
if strings.Contains(out.String(), "sleepit: ready\n") {
ready = true
break
}
time.Sleep(10 * time.Millisecond)
}
if !ready {
t.Fatalf("sleepit not ready after %v\n"+
"additional information:\n"+
" output:\n%s",
timeout, out.String())
}
// When we have a running program in a shell and type CTRL-C, the tty driver
// will send a SIGINT signal to all the processes in the foreground process
// group (see https://en.wikipedia.org/wiki/Process_group).
//
// Here we want to emulate this behavior: send SIGINT to the process group of
// the test executable. Although Go for some reasons doesn't wrap the
// killpg(2) system call, what works is using syscall.Kill(-PID, SIGINT),
// where the negative PID means the corresponding process group. Note that
// this negative PID works only as long as the caller of the kill(2) system
// call has a different PID, which is the case for this test.
for i := 1; i <= tc.sendSigs; i++ {
if err := syscall.Kill(-sut.Process.Pid, syscall.SIGINT); err != nil {
t.Fatalf("sending INT signal to the process group: %v", err)
}
time.Sleep(1 * time.Millisecond)
}
err := sut.Wait()
// In case of a subprocess failing, Task always returns exit code 1, not the
// exit code returned by the subprocess. This is understandable, since Task
// supports parallel execution: if two parallel subprocess fail, each with a
// different exit code, which one should Task report? This would be a race.
var wantErr *exec.ExitError
const wantExitStatus = 1 // Task always returns exit code 1 in case of error
if errors.As(err, &wantErr) {
if wantErr.ExitCode() != wantExitStatus {
t.Errorf(
"waiting for child process: got exit status %v; want %d\n"+
"additional information:\n"+
" process state: %q",
wantErr.ExitCode(), wantExitStatus, wantErr.String())
}
} else {
t.Errorf("waiting for child process: got unexpected error type %v (%T); want (%T)",
err, err, wantErr)
}
gotLines := strings.SplitAfter(out.String(), "\n")
notFound := listDifference(tc.want, gotLines)
if len(notFound) > 0 {
t.Errorf("\nwanted but not found:\n%v", notFound)
}
found := listIntersection(tc.notWant, gotLines)
if len(found) > 0 {
t.Errorf("\nunwanted but found:\n%v", found)
}
if len(notFound) > 0 || len(found) > 0 {
t.Errorf("\noutput:\n%v", gotLines)
}
})
}
}
func getTaskPath() (string, error) {
if info, err := os.Stat("./bin/task"); err == nil {
return info.Name(), nil
}
if path, err := exec.LookPath("task"); err == nil {
return path, nil
}
return "", errors.New("task: \"task\" binary was not found!")
}
// Return the difference of the two lists: the elements that are present in the first
// list, but not in the second one. The notion of presence is not with `=` but with
// string.Contains(l2, l1).
// FIXME this does not enforce ordering. We might want to support both.
func listDifference(lines1, lines2 []string) []string {
difference := []string{}
for _, l1 := range lines1 {
found := false
for _, l2 := range lines2 {
if strings.Contains(l2, l1) {
found = true
break
}
}
if !found {
difference = append(difference, l1)
}
}
return difference
}
// Return the intersection of the two lists: the elements that are present in both lists.
// The notion of presence is not with '=' but with string.Contains(l2, l1)
// FIXME this does not enforce ordering. We might want to support both.
func listIntersection(lines1, lines2 []string) []string {
intersection := []string{}
for _, l1 := range lines1 {
for _, l2 := range lines2 {
if strings.Contains(l2, l1) {
intersection = append(intersection, l1)
break
}
}
}
return intersection
}