1
0
mirror of https://github.com/woodpecker-ci/woodpecker.git synced 2026-06-03 16:35:37 +02:00
Files
woodpecker/cmd/agent/core/agent.go
T

333 lines
9.8 KiB
Go
Raw Normal View History

// Copyright 2023 Woodpecker Authors
2018-02-19 14:24:10 -08:00
// Copyright 2018 Drone.IO Inc.
2018-03-21 14:02:17 +01:00
//
2018-02-19 14:24:10 -08:00
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
2018-03-21 14:02:17 +01:00
//
2018-02-19 14:24:10 -08:00
// http://www.apache.org/licenses/LICENSE-2.0
2018-03-21 14:02:17 +01:00
//
2018-02-19 14:24:10 -08:00
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package core
2016-04-19 18:37:53 -07:00
import (
2017-03-16 18:14:02 +08:00
"context"
"crypto/tls"
"errors"
"fmt"
2017-09-12 11:25:55 -07:00
"net/http"
2017-07-19 17:46:03 -04:00
"os"
"strings"
2024-07-13 16:06:20 -07:00
"sync/atomic"
"time"
2016-04-19 18:37:53 -07:00
2021-10-12 02:25:13 -05:00
"github.com/rs/zerolog/log"
2024-07-17 16:26:35 -07:00
"github.com/urfave/cli/v3"
2024-07-13 16:06:20 -07:00
"golang.org/x/sync/errgroup"
2017-06-28 13:21:22 -04:00
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
2024-05-24 22:35:04 +02:00
grpc_credentials "google.golang.org/grpc/credentials"
2022-01-29 16:04:50 +01:00
"google.golang.org/grpc/credentials/insecure"
2018-01-08 09:28:38 -06:00
"google.golang.org/grpc/keepalive"
2017-07-19 17:46:03 -04:00
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
2017-06-28 13:21:22 -04:00
"go.woodpecker-ci.org/woodpecker/v2/agent"
2024-05-24 22:35:04 +02:00
agent_rpc "go.woodpecker-ci.org/woodpecker/v2/agent/rpc"
"go.woodpecker-ci.org/woodpecker/v2/pipeline/backend"
"go.woodpecker-ci.org/woodpecker/v2/pipeline/backend/types"
"go.woodpecker-ci.org/woodpecker/v2/pipeline/rpc"
2023-12-31 23:29:56 +01:00
"go.woodpecker-ci.org/woodpecker/v2/shared/logger"
"go.woodpecker-ci.org/woodpecker/v2/shared/utils"
"go.woodpecker-ci.org/woodpecker/v2/version"
2016-04-19 18:37:53 -07:00
)
2024-07-13 16:06:20 -07:00
const (
reportHealthInterval = time.Second * 10
authInterceptorRefreshInterval = time.Minute * 30
)
const (
shutdownTimeout = time.Second * 5
)
var (
stopAgentFunc context.CancelCauseFunc = func(error) {}
shutdownCancelFunc context.CancelFunc = func() {}
shutdownCtx = context.Background()
)
2024-07-17 16:26:35 -07:00
func run(ctx context.Context, c *cli.Command, backends []types.Backend) error {
2024-07-13 16:06:20 -07:00
agentCtx, ctxCancel := context.WithCancelCause(ctx)
stopAgentFunc = func(err error) {
msg := "shutdown of whole agent"
if err != nil {
log.Error().Err(err).Msg(msg)
} else {
log.Info().Msg(msg)
}
stopAgentFunc = func(error) {}
shutdownCtx, shutdownCancelFunc = context.WithTimeout(shutdownCtx, shutdownTimeout)
ctxCancel(err)
}
defer stopAgentFunc(nil)
defer shutdownCancelFunc()
serviceWaitingGroup := errgroup.Group{}
2023-07-12 18:51:40 +02:00
agentConfigPath := c.String("agent-config")
2017-07-19 17:46:03 -04:00
hostname := c.String("hostname")
if len(hostname) == 0 {
hostname, _ = os.Hostname()
}
2024-07-17 16:26:35 -07:00
counter.Polling = int(c.Int("max-workflows"))
2017-09-12 13:40:24 -07:00
counter.Running = 0
2021-10-27 21:03:14 +02:00
if c.Bool("healthcheck") {
2024-07-13 16:06:20 -07:00
serviceWaitingGroup.Go(
func() error {
server := &http.Server{Addr: c.String("healthcheck-addr")}
go func() {
<-agentCtx.Done()
log.Info().Msg("shutdown healthcheck server ...")
if err := server.Shutdown(shutdownCtx); err != nil { //nolint:contextcheck
log.Error().Err(err).Msg("shutdown healthcheck server failed")
} else {
log.Info().Msg("healthcheck server stopped")
}
}()
if err := server.ListenAndServe(); err != nil {
log.Error().Err(err).Msgf("cannot listen on address %s", c.String("healthcheck-addr"))
}
return nil
})
2017-09-12 11:25:55 -07:00
}
2022-01-29 16:04:50 +01:00
var transport grpc.DialOption
2022-01-31 15:38:00 +01:00
if c.Bool("grpc-secure") {
log.Trace().Msg("use ssl for grpc")
2024-05-24 22:35:04 +02:00
transport = grpc.WithTransportCredentials(grpc_credentials.NewTLS(&tls.Config{InsecureSkipVerify: c.Bool("grpc-skip-insecure")}))
2022-01-29 16:04:50 +01:00
} else {
transport = grpc.WithTransportCredentials(insecure.NewCredentials())
}
2024-05-23 17:37:21 +02:00
authConn, err := grpc.NewClient(
2017-06-28 13:21:22 -04:00
c.String("server"),
transport,
grpc.WithKeepaliveParams(keepalive.ClientParameters{
Time: c.Duration("grpc-keepalive-time"),
Timeout: c.Duration("grpc-keepalive-timeout"),
2017-06-29 19:35:38 -04:00
}),
)
if err != nil {
return fmt.Errorf("could not create new gRPC 'channel' for authentication: %w", err)
}
defer authConn.Close()
2023-07-12 18:51:40 +02:00
agentConfig := readAgentConfig(agentConfigPath)
agentToken := c.String("grpc-token")
2024-07-13 16:06:20 -07:00
grpcClientCtx, grpcClientCtxCancel := context.WithCancelCause(context.Background())
defer grpcClientCtxCancel(nil)
2024-05-24 22:35:04 +02:00
authClient := agent_rpc.NewAuthGrpcClient(authConn, agentToken, agentConfig.AgentID)
2024-07-17 16:26:35 -07:00
authInterceptor, err := agent_rpc.NewAuthInterceptor(grpcClientCtx, authClient, authInterceptorRefreshInterval) //nolint:contextcheck
if err != nil {
return fmt.Errorf("could not create new auth interceptor: %w", err)
}
2024-05-23 17:37:21 +02:00
conn, err := grpc.NewClient(
c.String("server"),
transport,
2018-01-08 12:47:08 -06:00
grpc.WithKeepaliveParams(keepalive.ClientParameters{
2022-01-31 15:38:00 +01:00
Time: c.Duration("grpc-keepalive-time"),
Timeout: c.Duration("grpc-keepalive-timeout"),
2018-01-08 12:47:08 -06:00
}),
grpc.WithUnaryInterceptor(authInterceptor.Unary()),
grpc.WithStreamInterceptor(authInterceptor.Stream()),
2017-03-16 18:14:02 +08:00
)
if err != nil {
return fmt.Errorf("could not create new gRPC 'channel' for normal orchestration: %w", err)
2017-03-05 18:56:08 +11:00
}
2017-06-28 13:21:22 -04:00
defer conn.Close()
2024-09-18 19:29:56 +05:00
client := agent_rpc.NewGrpcClient(ctx, conn)
2024-07-13 16:06:20 -07:00
agentConfigPersisted := atomic.Bool{}
2017-03-05 18:56:08 +11:00
2024-07-13 16:06:20 -07:00
grpcCtx := metadata.NewOutgoingContext(grpcClientCtx, metadata.Pairs("hostname", hostname))
2016-09-29 17:45:13 -04:00
// check if grpc server version is compatible with agent
2024-07-17 16:26:35 -07:00
grpcServerVersion, err := client.Version(grpcCtx) //nolint:contextcheck
if err != nil {
log.Error().Err(err).Msg("could not get grpc server version")
return err
}
2024-05-24 22:35:04 +02:00
if grpcServerVersion.GrpcVersion != agent_rpc.ClientGrpcVersion {
err := errors.New("GRPC version mismatch")
2024-01-11 19:17:07 +01:00
log.Error().Err(err).Msgf("server version %s does report grpc version %d but we only understand %d",
grpcServerVersion.ServerVersion,
grpcServerVersion.GrpcVersion,
2024-05-24 22:35:04 +02:00
agent_rpc.ClientGrpcVersion)
return err
}
// new engine
2024-07-17 16:26:35 -07:00
backendCtx := context.WithValue(agentCtx, types.CliCommand, c)
backendName := c.String("backend-engine")
2024-02-09 00:04:43 +01:00
backendEngine, err := backend.FindBackend(backendCtx, backends, backendName)
2022-09-03 20:41:23 +02:00
if err != nil {
log.Error().Err(err).Msgf("cannot find backend engine '%s'", backendName)
2022-09-03 20:41:23 +02:00
return err
}
if !backendEngine.IsAvailable(backendCtx) {
log.Error().Str("engine", backendEngine.Name()).Msg("selected backend engine is unavailable")
return fmt.Errorf("selected backend engine %s is unavailable", backendEngine.Name())
}
// load engine (e.g. init api client)
2023-12-14 19:20:47 +01:00
engInfo, err := backendEngine.Load(backendCtx)
if err != nil {
log.Error().Err(err).Msg("cannot load backend engine")
return err
}
2023-12-14 19:20:47 +01:00
log.Debug().Msgf("loaded %s backend engine", backendEngine.Name())
2024-07-17 16:26:35 -07:00
maxWorkflows := int(c.Int("max-workflows"))
agentConfig.AgentID, err = client.RegisterAgent(grpcCtx, engInfo.Platform, backendEngine.Name(), version.String(), maxWorkflows) //nolint:contextcheck
if err != nil {
return err
}
2024-07-13 16:06:20 -07:00
serviceWaitingGroup.Go(func() error {
// we close grpc client context once unregister was handled
defer grpcClientCtxCancel(nil)
// we wait till agent context is done
<-agentCtx.Done()
// Remove stateless agents from server
if !agentConfigPersisted.Load() {
log.Debug().Msg("unregistering agent from server ...")
// we want to run it explicit run when context got canceled so run it in background
err := client.UnregisterAgent(grpcClientCtx)
if err != nil {
log.Err(err).Msg("failed to unregister agent from server")
} else {
log.Info().Msg("agent unregistered from server")
}
}
return nil
})
if agentConfigPath != "" {
if err := writeAgentConfig(agentConfig, agentConfigPath); err == nil {
2024-07-13 16:06:20 -07:00
agentConfigPersisted.Store(true)
}
}
2023-07-02 17:22:05 +02:00
2023-04-03 12:30:52 +02:00
labels := map[string]string{
"hostname": hostname,
"platform": engInfo.Platform,
2023-12-14 19:20:47 +01:00
"backend": backendEngine.Name(),
2023-04-03 12:30:52 +02:00
"repo": "*", // allow all repos by default
}
if err := stringSliceAddToMap(c.StringSlice("filter"), labels); err != nil {
return err
2023-04-03 12:30:52 +02:00
}
2024-10-03 09:53:41 +01:00
log.Debug().Any("labels", labels).Msgf("agent configured with labels")
2023-04-03 12:30:52 +02:00
filter := rpc.Filter{
Labels: labels,
}
2024-01-11 19:17:07 +01:00
log.Debug().Msgf("agent registered with ID %d", agentConfig.AgentID)
2024-07-13 16:06:20 -07:00
serviceWaitingGroup.Go(func() error {
for {
2024-07-13 16:06:20 -07:00
err := client.ReportHealth(grpcCtx)
if err != nil {
2024-01-11 19:17:07 +01:00
log.Err(err).Msg("failed to report health")
}
2024-07-13 16:06:20 -07:00
select {
case <-agentCtx.Done():
log.Debug().Msg("terminating health reporting")
return nil
case <-time.After(reportHealthInterval):
}
}
2024-07-13 16:06:20 -07:00
})
2024-07-13 16:06:20 -07:00
for i := 0; i < maxWorkflows; i++ {
2023-07-20 20:39:20 +02:00
i := i
2024-07-13 16:06:20 -07:00
serviceWaitingGroup.Go(func() error {
runner := agent.NewRunner(client, filter, hostname, counter, &backendEngine)
2023-07-20 20:39:20 +02:00
log.Debug().Msgf("created new runner %d", i)
for {
2024-07-13 16:06:20 -07:00
if agentCtx.Err() != nil {
return nil
2021-11-26 03:34:48 +01:00
}
2022-10-28 21:08:53 +05:30
log.Debug().Msg("polling new steps")
2024-07-13 16:06:20 -07:00
if err := runner.Run(agentCtx, shutdownCtx); err != nil {
log.Error().Err(err).Msg("runner done with error")
return err
2017-03-16 18:14:02 +08:00
}
}
2024-07-13 16:06:20 -07:00
})
2016-04-19 18:37:53 -07:00
}
2022-09-03 20:41:23 +02:00
log.Info().Msgf(
2024-01-11 19:17:07 +01:00
"starting Woodpecker agent with version '%s' and backend '%s' using platform '%s' running up to %d pipelines in parallel",
2024-07-13 16:06:20 -07:00
version.String(), backendEngine.Name(), engInfo.Platform, maxWorkflows)
2022-09-03 20:41:23 +02:00
2024-07-13 16:06:20 -07:00
return serviceWaitingGroup.Wait()
2017-03-16 18:14:02 +08:00
}
2024-07-17 16:26:35 -07:00
func runWithRetry(backendEngines []types.Backend) func(ctx context.Context, c *cli.Command) error {
return func(ctx context.Context, c *cli.Command) error {
if err := logger.SetupGlobalLogger(ctx, c, true); err != nil {
return err
}
initHealth()
2024-07-17 16:26:35 -07:00
retryCount := int(c.Int("connect-retry-count"))
2024-07-13 16:06:20 -07:00
retryDelay := c.Duration("connect-retry-delay")
var err error
for i := 0; i < retryCount; i++ {
2024-07-17 16:26:35 -07:00
if err = run(ctx, c, backendEngines); status.Code(err) == codes.Unavailable {
log.Warn().Err(err).Msg(fmt.Sprintf("cannot connect to server, retrying in %v", retryDelay))
time.Sleep(retryDelay)
} else {
break
}
}
return err
}
}
func stringSliceAddToMap(sl []string, m map[string]string) error {
if m == nil {
m = make(map[string]string)
}
for _, v := range utils.StringSliceDeleteEmpty(sl) {
2024-03-15 18:00:25 +01:00
before, after, _ := strings.Cut(v, "=")
switch {
case before != "" && after != "":
m[before] = after
case before != "":
return fmt.Errorf("key '%s' does not have a value assigned", before)
default:
return fmt.Errorf("empty string in slice")
}
}
return nil
}