mirror of
https://github.com/woodpecker-ci/woodpecker.git
synced 2025-01-11 17:18:09 +02:00
more advanced health check logic
This commit is contained in:
parent
eca91f4ec7
commit
42b60bd822
@ -1,9 +1,11 @@
|
||||
# docker build --rm -f Dockerfile.agent -t drone/agent .
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
FROM centurylink/ca-certs
|
||||
ENV GODEBUG=netdns=go
|
||||
ADD release/drone-agent /bin/
|
||||
|
||||
ENTRYPOINT ["/bin/drone-agent"]
|
||||
|
||||
HEALTHCHECK CMD ["/bin/drone-agent", "ping"]
|
||||
|
||||
ENTRYPOINT ["/bin/drone-agent"]
|
||||
|
@ -56,6 +56,9 @@ func loop(c *cli.Context) error {
|
||||
)
|
||||
}
|
||||
|
||||
counter.Polling = c.Int("max-procs")
|
||||
counter.Running = 0
|
||||
|
||||
if c.BoolT("healthcheck") {
|
||||
go http.ListenAndServe(":3000", nil)
|
||||
}
|
||||
@ -138,9 +141,22 @@ func run(ctx context.Context, client rpc.Peer, filter rpc.Filter) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
timeout := time.Hour
|
||||
if minutes := work.Timeout; minutes != 0 {
|
||||
timeout = time.Duration(minutes) * time.Minute
|
||||
}
|
||||
|
||||
counter.Add(
|
||||
work.ID,
|
||||
timeout,
|
||||
extractRepositoryName(work.Config), // hack
|
||||
extractBuildNumber(work.Config), // hack
|
||||
)
|
||||
defer counter.Done(work.ID)
|
||||
|
||||
logger := log.With().
|
||||
Str("repo", extractRepositoryName(work.Config)).
|
||||
Str("build", extractBuildNumber(work.Config)).
|
||||
Str("repo", extractRepositoryName(work.Config)). // hack
|
||||
Str("build", extractBuildNumber(work.Config)). // hack
|
||||
Str("id", work.ID).
|
||||
Logger()
|
||||
|
||||
@ -157,11 +173,6 @@ func run(ctx context.Context, client rpc.Peer, filter rpc.Filter) error {
|
||||
return err
|
||||
}
|
||||
|
||||
timeout := time.Hour
|
||||
if minutes := work.Timeout; minutes != 0 {
|
||||
timeout = time.Duration(minutes) * time.Minute
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctxmeta, timeout)
|
||||
defer cancel()
|
||||
|
||||
|
@ -3,7 +3,10 @@ package main
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/drone/drone/version"
|
||||
"github.com/urfave/cli"
|
||||
@ -14,12 +17,17 @@ import (
|
||||
// https://github.com/mozilla-services/Dockerflow
|
||||
|
||||
func init() {
|
||||
http.HandleFunc("/__heartbeat__", handleHeartbeat)
|
||||
http.HandleFunc("/__version__", handleVersion)
|
||||
http.HandleFunc("/varz", handleStats)
|
||||
http.HandleFunc("/healthz", handleHeartbeat)
|
||||
http.HandleFunc("/version", handleVersion)
|
||||
}
|
||||
|
||||
func handleHeartbeat(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(200)
|
||||
if counter.Healthy() {
|
||||
w.WriteHeader(200)
|
||||
} else {
|
||||
w.WriteHeader(500)
|
||||
}
|
||||
}
|
||||
|
||||
func handleVersion(w http.ResponseWriter, r *http.Request) {
|
||||
@ -31,15 +39,87 @@ func handleVersion(w http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
}
|
||||
|
||||
func handleStats(w http.ResponseWriter, r *http.Request) {
|
||||
if counter.Healthy() {
|
||||
w.WriteHeader(200)
|
||||
} else {
|
||||
w.WriteHeader(500)
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/json")
|
||||
counter.writeTo(w)
|
||||
}
|
||||
|
||||
type versionResp struct {
|
||||
Version string `json:"version"`
|
||||
Source string `json:"source"`
|
||||
}
|
||||
|
||||
// default statistics counter
|
||||
var counter = &state{
|
||||
Metadata: map[string]info{},
|
||||
}
|
||||
|
||||
type state struct {
|
||||
sync.Mutex `json:"-"`
|
||||
Polling int `json:"polling_count"`
|
||||
Running int `json:"running_count"`
|
||||
Metadata map[string]info `json:"running"`
|
||||
}
|
||||
|
||||
type info struct {
|
||||
ID string `json:"id"`
|
||||
Repo string `json:"repository"`
|
||||
Build string `json:"build_number"`
|
||||
Started time.Time `json:"build_started"`
|
||||
Timeout time.Duration `json:"build_timeout"`
|
||||
}
|
||||
|
||||
func (s *state) Add(id string, timeout time.Duration, repo, build string) {
|
||||
s.Lock()
|
||||
s.Polling--
|
||||
s.Running++
|
||||
s.Metadata[id] = info{
|
||||
ID: id,
|
||||
Repo: repo,
|
||||
Build: build,
|
||||
Timeout: timeout,
|
||||
Started: time.Now().UTC(),
|
||||
}
|
||||
s.Unlock()
|
||||
}
|
||||
|
||||
func (s *state) Done(id string) {
|
||||
s.Lock()
|
||||
s.Polling++
|
||||
s.Running--
|
||||
delete(s.Metadata, id)
|
||||
s.Unlock()
|
||||
}
|
||||
|
||||
func (s *state) Healthy() bool {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
now := time.Now()
|
||||
buf := time.Hour // 1 hour buffer
|
||||
for _, item := range s.Metadata {
|
||||
if now.After(item.Started.Add(item.Timeout).Add(buf)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *state) writeTo(w io.Writer) (int, error) {
|
||||
s.Lock()
|
||||
out, _ := json.Marshal(s)
|
||||
s.Unlock()
|
||||
return w.Write(out)
|
||||
}
|
||||
|
||||
// handles pinging the endpoint and returns an error if the
|
||||
// agent is in an unhealthy state.
|
||||
func pinger(c *cli.Context) error {
|
||||
resp, err := http.Get("http://localhost:3000/__heartbeat__")
|
||||
resp, err := http.Get("http://localhost:3000/healthz")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
45
cmd/drone-agent/health_test.go
Normal file
45
cmd/drone-agent/health_test.go
Normal file
@ -0,0 +1,45 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestHealthy(t *testing.T) {
|
||||
s := state{}
|
||||
s.Metadata = map[string]info{}
|
||||
|
||||
s.Add("1", time.Hour, "octocat/hello-world", "42")
|
||||
|
||||
if got, want := s.Metadata["1"].ID, "1"; got != want {
|
||||
t.Errorf("got ID %s, want %s", got, want)
|
||||
}
|
||||
if got, want := s.Metadata["1"].Timeout, time.Hour; got != want {
|
||||
t.Errorf("got duration %v, want %v", got, want)
|
||||
}
|
||||
if got, want := s.Metadata["1"].Repo, "octocat/hello-world"; got != want {
|
||||
t.Errorf("got repository name %s, want %s", got, want)
|
||||
}
|
||||
|
||||
s.Metadata["1"] = info{
|
||||
Timeout: time.Hour,
|
||||
Started: time.Now().UTC(),
|
||||
}
|
||||
if s.Healthy() == false {
|
||||
t.Error("want healthy status when timeout not exceeded, got false")
|
||||
}
|
||||
|
||||
s.Metadata["1"] = info{
|
||||
Started: time.Now().UTC().Add(-(time.Minute * 30)),
|
||||
}
|
||||
if s.Healthy() == false {
|
||||
t.Error("want healthy status when timeout+buffer not exceeded, got false")
|
||||
}
|
||||
|
||||
s.Metadata["1"] = info{
|
||||
Started: time.Now().UTC().Add(-(time.Hour + time.Minute)),
|
||||
}
|
||||
if s.Healthy() == true {
|
||||
t.Error("want unhealthy status when timeout+buffer not exceeded, got true")
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user