1
0
mirror of https://github.com/woodpecker-ci/woodpecker.git synced 2026-06-03 16:35:37 +02:00

local backend: on linux / mac start commands in own process group and kill the group on cancel (#6609)

This commit is contained in:
6543
2026-05-26 14:44:50 +02:00
committed by GitHub
parent 32a7bf9748
commit d37ab38ae1
6 changed files with 264 additions and 5 deletions
+3 -3
View File
@@ -89,13 +89,13 @@ func (e *local) execClone(ctx context.Context, step *types.Step, state *workflow
if err != nil {
return err
}
cmd = exec.CommandContext(ctx, pwsh, "-Command", fmt.Sprintf("%s ; $code=$? ; %s ; if (!$code) {[Environment]::Exit(1)}", state.pluginGitBinary, rmCmd))
cmd = newCmd(ctx, pwsh, "-Command", fmt.Sprintf("%s ; $code=$? ; %s ; if (!$code) {[Environment]::Exit(1)}", state.pluginGitBinary, rmCmd))
} else {
cmd = exec.CommandContext(ctx, "/bin/sh", "-c", fmt.Sprintf("%s ; export code=$? ; %s ; exit $code", state.pluginGitBinary, rmCmd))
cmd = newCmd(ctx, "/bin/sh", "-c", fmt.Sprintf("%s ; export code=$? ; %s ; exit $code", state.pluginGitBinary, rmCmd))
}
} else {
// if we have NO netrc, we can just exec the clone directly
cmd = exec.CommandContext(ctx, state.pluginGitBinary)
cmd = newCmd(ctx, state.pluginGitBinary)
}
cmd.Env = env
cmd.Dir = state.workspaceDir
+37
View File
@@ -0,0 +1,37 @@
// Copyright 2026 Woodpecker Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !windows
package local
import (
"context"
"os/exec"
"syscall"
)
func newCmd(ctx context.Context, binary string, args ...string) *exec.Cmd {
cmd := exec.CommandContext(ctx, binary, args...)
// this make sure kill signal to command does not propagate to agent while having no orphans by ...
// ... create new process group for the command.
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
cmd.Cancel = func() error {
// ... send kill to whole process group.
return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}
return cmd
}
+28
View File
@@ -0,0 +1,28 @@
// Copyright 2026 Woodpecker Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package local
import (
"context"
"os/exec"
)
func newCmd(ctx context.Context, binary string, args ...string) *exec.Cmd {
cmd := exec.CommandContext(ctx, binary, args...)
return cmd
}
+1 -1
View File
@@ -45,7 +45,7 @@ func (e *local) execCommands(ctx context.Context, step *types.Step, state *workf
}
// Use "image name" as run command (indicate shell)
cmd := exec.CommandContext(ctx, step.Image, args...)
cmd := newCmd(ctx, step.Image, args...)
cmd.Env = env
cmd.Dir = state.workspaceDir
+1 -1
View File
@@ -29,7 +29,7 @@ func (e *local) execPlugin(ctx context.Context, step *types.Step, state *workflo
return fmt.Errorf("lookup plugin binary: %w", err)
}
cmd := exec.CommandContext(ctx, binary)
cmd := newCmd(ctx, binary)
cmd.Env = env
cmd.Dir = state.workspaceDir
@@ -0,0 +1,194 @@
// Copyright 2026 Woodpecker Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build linux
package local
import (
"context"
"os"
"os/exec"
"path/filepath"
"slices"
"strconv"
"strings"
"syscall"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.woodpecker-ci.org/woodpecker/v3/pipeline/backend/types"
)
// resolveTestPath looks up every binary in the current PATH and returns a PATH
// value composed of their (unique) directories. Used to rebuild PATH after
// prepairEnv() clears the environment.
func resolveTestPath(t *testing.T, bins ...string) string {
t.Helper()
var dirs []string
for _, bin := range bins {
p, err := exec.LookPath(bin)
require.NoErrorf(t, err, "lookup %q", bin)
d := filepath.Dir(p)
if !slices.Contains(dirs, d) {
dirs = append(dirs, d)
}
}
return strings.Join(dirs, ":")
}
// TestStepInOwnProcessGroup ensures a step's shell is spawned in its own
// process group, isolating it from the agent (the test process). Without this
// isolation, signals the step sends to its own process group (e.g. `make -j`
// cleaning up failed parallel jobs) would also reach the agent.
//
// Regression test for: local backend signal propagation to agent.
func TestStepInOwnProcessGroup(t *testing.T) {
path := resolveTestPath(t, "sh", "sleep")
prepairEnv(t)
//nolint:usetesting // see prepairEnv
os.Setenv("PATH", path)
backend, _ := New().(*local)
backend.tempDir = t.TempDir()
ctx := t.Context()
taskUUID := "test-pgrp-isolation"
require.NoError(t, backend.SetupWorkflow(ctx, &types.Config{}, taskUUID))
t.Cleanup(func() {
_ = backend.DestroyWorkflow(ctx, &types.Config{}, taskUUID)
})
step := &types.Step{
UUID: "step-pgrp",
Name: "pgrp",
Type: types.StepTypeCommands,
Image: "sh",
Commands: []string{"sleep 5"},
}
require.NoError(t, backend.StartStep(ctx, step, taskUUID))
stepState, err := backend.getStepState(taskUUID, step.UUID)
require.NoError(t, err)
require.NotNil(t, stepState.cmd)
require.NotNil(t, stepState.cmd.Process)
childPID := stepState.cmd.Process.Pid
childPgid, err := syscall.Getpgid(childPID)
require.NoError(t, err)
agentPgid, err := syscall.Getpgid(os.Getpid())
require.NoError(t, err)
// The child must NOT share the agent's process group, otherwise signals
// the child sends to its own group (e.g. via `make` or `kill -- -$$`) hit
// the agent too.
assert.NotEqualf(t, agentPgid, childPgid,
"step shell shares process group with agent (pgid=%d); signals from the step would reach the agent",
agentPgid)
// The child should be the leader of its own group (pgid == pid).
assert.Equalf(t, childPID, childPgid,
"step shell is not the leader of its own process group (pid=%d, pgid=%d)",
childPID, childPgid)
require.NoError(t, backend.DestroyStep(ctx, step, taskUUID))
}
// TestStepCancelKillsGrandchildren ensures that canceling a step also kills
// processes spawned by the step's shell. Default exec.CommandContext only
// signals the direct child; without a group-aware cancel hook the
// grandchildren (e.g. `make`, `nix`, `cc1`) become orphans and keep running.
//
// Regression test for: orphan grandchildren after step cancel.
func TestStepCancelKillsGrandchildren(t *testing.T) {
path := resolveTestPath(t, "sh", "sleep")
prepairEnv(t)
//nolint:usetesting // see prepairEnv
os.Setenv("PATH", path)
backend, _ := New().(*local)
backend.tempDir = t.TempDir()
ctx, cancel := context.WithCancelCause(t.Context())
defer cancel(nil)
taskUUID := "test-cancel-grandchild"
require.NoError(t, backend.SetupWorkflow(ctx, &types.Config{}, taskUUID))
t.Cleanup(func() {
_ = backend.DestroyWorkflow(context.Background(), &types.Config{}, taskUUID)
})
pidFile := filepath.Join(t.TempDir(), "grandchild.pid")
step := &types.Step{
UUID: "step-grandchild",
Name: "grandchild",
Type: types.StepTypeCommands,
Image: "sh",
Commands: []string{
// Background `sleep` is the "grandchild". Write its PID, then
// `wait` so the shell stays alive until the context is canceled.
"sleep 30 & echo $! > " + pidFile + "; wait",
},
}
require.NoError(t, backend.StartStep(ctx, step, taskUUID))
// Wait for the grandchild to record its PID.
var grandchildPID int
require.Eventually(t, func() bool {
data, err := os.ReadFile(pidFile)
if err != nil {
return false
}
pid, err := strconv.Atoi(strings.TrimSpace(string(data)))
if err != nil || pid <= 0 {
return false
}
grandchildPID = pid
return true
}, 3*time.Second, 20*time.Millisecond, "grandchild never wrote its pid")
// Cancel the context — this should fire the step's cancel hook and kill
// the entire process group, taking the grandchild with it.
cancel(nil)
_, _ = backend.WaitStep(context.Background(), step, taskUUID)
require.Eventuallyf(t, func() bool {
return !pidAlive(grandchildPID)
}, 3*time.Second, 50*time.Millisecond,
"grandchild pid %d is still alive after step cancel; cancel did not propagate to the process group",
grandchildPID)
}
// The pidAlive reports whether pid still maps to a non-zombie process,
// kill(pid, 0) succeeds for zombies too, which would give false positives,
// so /proc/<pid>/status is the more reliable signal on Linux.
func pidAlive(pid int) bool {
data, err := os.ReadFile("/proc/" + strconv.Itoa(pid) + "/status")
if err != nil {
return false
}
for _, line := range strings.Split(string(data), "\n") {
if strings.HasPrefix(line, "State:") {
// e.g. "State:\tZ (zombie)"
return !strings.Contains(line, "Z")
}
}
return false
}