mirror of
https://github.com/ceph/ceph-csi.git
synced 2024-11-28 09:10:19 +00:00
309 lines
10 KiB
Go
309 lines
10 KiB
Go
|
/*
|
||
|
Copyright 2014 The Kubernetes Authors.
|
||
|
|
||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
you may not use this file except in compliance with the License.
|
||
|
You may obtain a copy of the License at
|
||
|
|
||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
Unless required by applicable law or agreed to in writing, software
|
||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
See the License for the specific language governing permissions and
|
||
|
limitations under the License.
|
||
|
*/
|
||
|
|
||
|
package healthz
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"context"
|
||
|
"fmt"
|
||
|
"net/http"
|
||
|
"reflect"
|
||
|
"strings"
|
||
|
"sync"
|
||
|
"sync/atomic"
|
||
|
"time"
|
||
|
|
||
|
"k8s.io/apimachinery/pkg/util/sets"
|
||
|
"k8s.io/apimachinery/pkg/util/wait"
|
||
|
"k8s.io/apiserver/pkg/endpoints/metrics"
|
||
|
"k8s.io/apiserver/pkg/server/httplog"
|
||
|
"k8s.io/component-base/metrics/prometheus/slis"
|
||
|
"k8s.io/klog/v2"
|
||
|
)
|
||
|
|
||
|
// HealthChecker is a named healthz checker.
|
||
|
type HealthChecker interface {
|
||
|
Name() string
|
||
|
Check(req *http.Request) error
|
||
|
}
|
||
|
|
||
|
// PingHealthz returns true automatically when checked
|
||
|
var PingHealthz HealthChecker = ping{}
|
||
|
|
||
|
// ping implements the simplest possible healthz checker.
|
||
|
type ping struct{}
|
||
|
|
||
|
func (ping) Name() string {
|
||
|
return "ping"
|
||
|
}
|
||
|
|
||
|
// PingHealthz is a health check that returns true.
|
||
|
func (ping) Check(_ *http.Request) error {
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// LogHealthz returns true if logging is not blocked
|
||
|
var LogHealthz HealthChecker = &log{}
|
||
|
|
||
|
type log struct {
|
||
|
startOnce sync.Once
|
||
|
lastVerified atomic.Value
|
||
|
}
|
||
|
|
||
|
func (l *log) Name() string {
|
||
|
return "log"
|
||
|
}
|
||
|
|
||
|
func (l *log) Check(_ *http.Request) error {
|
||
|
l.startOnce.Do(func() {
|
||
|
l.lastVerified.Store(time.Now())
|
||
|
go wait.Forever(func() {
|
||
|
klog.Flush()
|
||
|
l.lastVerified.Store(time.Now())
|
||
|
}, time.Minute)
|
||
|
})
|
||
|
|
||
|
lastVerified := l.lastVerified.Load().(time.Time)
|
||
|
if time.Since(lastVerified) < (2 * time.Minute) {
|
||
|
return nil
|
||
|
}
|
||
|
return fmt.Errorf("logging blocked")
|
||
|
}
|
||
|
|
||
|
type cacheSyncWaiter interface {
|
||
|
WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool
|
||
|
}
|
||
|
|
||
|
type informerSync struct {
|
||
|
cacheSyncWaiter cacheSyncWaiter
|
||
|
}
|
||
|
|
||
|
var _ HealthChecker = &informerSync{}
|
||
|
|
||
|
// NewInformerSyncHealthz returns a new HealthChecker that will pass only if all informers in the given cacheSyncWaiter sync.
|
||
|
func NewInformerSyncHealthz(cacheSyncWaiter cacheSyncWaiter) HealthChecker {
|
||
|
return &informerSync{
|
||
|
cacheSyncWaiter: cacheSyncWaiter,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (i *informerSync) Name() string {
|
||
|
return "informer-sync"
|
||
|
}
|
||
|
|
||
|
func (i *informerSync) Check(_ *http.Request) error {
|
||
|
stopCh := make(chan struct{})
|
||
|
// Close stopCh to force checking if informers are synced now.
|
||
|
close(stopCh)
|
||
|
|
||
|
informersByStarted := make(map[bool][]string)
|
||
|
for informerType, started := range i.cacheSyncWaiter.WaitForCacheSync(stopCh) {
|
||
|
informersByStarted[started] = append(informersByStarted[started], informerType.String())
|
||
|
}
|
||
|
|
||
|
if notStarted := informersByStarted[false]; len(notStarted) > 0 {
|
||
|
return fmt.Errorf("%d informers not started yet: %v", len(notStarted), notStarted)
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// NamedCheck returns a healthz checker for the given name and function.
|
||
|
func NamedCheck(name string, check func(r *http.Request) error) HealthChecker {
|
||
|
return &healthzCheck{name, check}
|
||
|
}
|
||
|
|
||
|
// InstallHandler registers handlers for health checking on the path
|
||
|
// "/healthz" to mux. *All handlers* for mux must be specified in
|
||
|
// exactly one call to InstallHandler. Calling InstallHandler more
|
||
|
// than once for the same mux will result in a panic.
|
||
|
func InstallHandler(mux mux, checks ...HealthChecker) {
|
||
|
InstallPathHandler(mux, "/healthz", checks...)
|
||
|
}
|
||
|
|
||
|
// InstallReadyzHandler registers handlers for health checking on the path
|
||
|
// "/readyz" to mux. *All handlers* for mux must be specified in
|
||
|
// exactly one call to InstallHandler. Calling InstallHandler more
|
||
|
// than once for the same mux will result in a panic.
|
||
|
func InstallReadyzHandler(mux mux, checks ...HealthChecker) {
|
||
|
InstallPathHandler(mux, "/readyz", checks...)
|
||
|
}
|
||
|
|
||
|
// InstallLivezHandler registers handlers for liveness checking on the path
|
||
|
// "/livez" to mux. *All handlers* for mux must be specified in
|
||
|
// exactly one call to InstallHandler. Calling InstallHandler more
|
||
|
// than once for the same mux will result in a panic.
|
||
|
func InstallLivezHandler(mux mux, checks ...HealthChecker) {
|
||
|
InstallPathHandler(mux, "/livez", checks...)
|
||
|
}
|
||
|
|
||
|
// InstallPathHandler registers handlers for health checking on
|
||
|
// a specific path to mux. *All handlers* for the path must be
|
||
|
// specified in exactly one call to InstallPathHandler. Calling
|
||
|
// InstallPathHandler more than once for the same path and mux will
|
||
|
// result in a panic.
|
||
|
func InstallPathHandler(mux mux, path string, checks ...HealthChecker) {
|
||
|
InstallPathHandlerWithHealthyFunc(mux, path, nil, checks...)
|
||
|
}
|
||
|
|
||
|
// InstallPathHandlerWithHealthyFunc is like InstallPathHandler, but calls firstTimeHealthy exactly once
|
||
|
// when the handler succeeds for the first time.
|
||
|
func InstallPathHandlerWithHealthyFunc(mux mux, path string, firstTimeHealthy func(), checks ...HealthChecker) {
|
||
|
if len(checks) == 0 {
|
||
|
klog.V(5).Info("No default health checks specified. Installing the ping handler.")
|
||
|
checks = []HealthChecker{PingHealthz}
|
||
|
}
|
||
|
|
||
|
klog.V(5).Infof("Installing health checkers for (%v): %v", path, formatQuoted(checkerNames(checks...)...))
|
||
|
|
||
|
name := strings.Split(strings.TrimPrefix(path, "/"), "/")[0]
|
||
|
mux.Handle(path,
|
||
|
metrics.InstrumentHandlerFunc("GET",
|
||
|
/* group = */ "",
|
||
|
/* version = */ "",
|
||
|
/* resource = */ "",
|
||
|
/* subresource = */ path,
|
||
|
/* scope = */ "",
|
||
|
/* component = */ "",
|
||
|
/* deprecated */ false,
|
||
|
/* removedRelease */ "",
|
||
|
handleRootHealth(name, firstTimeHealthy, checks...)))
|
||
|
for _, check := range checks {
|
||
|
mux.Handle(fmt.Sprintf("%s/%v", path, check.Name()), adaptCheckToHandler(check.Check))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// mux is an interface describing the methods InstallHandler requires.
|
||
|
type mux interface {
|
||
|
Handle(pattern string, handler http.Handler)
|
||
|
}
|
||
|
|
||
|
// healthzCheck implements HealthChecker on an arbitrary name and check function.
|
||
|
type healthzCheck struct {
|
||
|
name string
|
||
|
check func(r *http.Request) error
|
||
|
}
|
||
|
|
||
|
var _ HealthChecker = &healthzCheck{}
|
||
|
|
||
|
func (c *healthzCheck) Name() string {
|
||
|
return c.name
|
||
|
}
|
||
|
|
||
|
func (c *healthzCheck) Check(r *http.Request) error {
|
||
|
return c.check(r)
|
||
|
}
|
||
|
|
||
|
// getExcludedChecks extracts the health check names to be excluded from the query param
|
||
|
func getExcludedChecks(r *http.Request) sets.String {
|
||
|
checks, found := r.URL.Query()["exclude"]
|
||
|
if found {
|
||
|
return sets.NewString(checks...)
|
||
|
}
|
||
|
return sets.NewString()
|
||
|
}
|
||
|
|
||
|
// handleRootHealth returns an http.HandlerFunc that serves the provided checks.
|
||
|
func handleRootHealth(name string, firstTimeHealthy func(), checks ...HealthChecker) http.HandlerFunc {
|
||
|
var notifyOnce sync.Once
|
||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||
|
excluded := getExcludedChecks(r)
|
||
|
// failedVerboseLogOutput is for output to the log. It indicates detailed failed output information for the log.
|
||
|
var failedVerboseLogOutput bytes.Buffer
|
||
|
var failedChecks []string
|
||
|
var individualCheckOutput bytes.Buffer
|
||
|
for _, check := range checks {
|
||
|
// no-op the check if we've specified we want to exclude the check
|
||
|
if excluded.Has(check.Name()) {
|
||
|
excluded.Delete(check.Name())
|
||
|
fmt.Fprintf(&individualCheckOutput, "[+]%s excluded: ok\n", check.Name())
|
||
|
continue
|
||
|
}
|
||
|
if err := check.Check(r); err != nil {
|
||
|
slis.ObserveHealthcheck(context.Background(), check.Name(), name, slis.Error)
|
||
|
// don't include the error since this endpoint is public. If someone wants more detail
|
||
|
// they should have explicit permission to the detailed checks.
|
||
|
fmt.Fprintf(&individualCheckOutput, "[-]%s failed: reason withheld\n", check.Name())
|
||
|
// but we do want detailed information for our log
|
||
|
fmt.Fprintf(&failedVerboseLogOutput, "[-]%s failed: %v\n", check.Name(), err)
|
||
|
failedChecks = append(failedChecks, check.Name())
|
||
|
} else {
|
||
|
slis.ObserveHealthcheck(context.Background(), check.Name(), name, slis.Success)
|
||
|
fmt.Fprintf(&individualCheckOutput, "[+]%s ok\n", check.Name())
|
||
|
}
|
||
|
}
|
||
|
if excluded.Len() > 0 {
|
||
|
fmt.Fprintf(&individualCheckOutput, "warn: some health checks cannot be excluded: no matches for %s\n", formatQuoted(excluded.List()...))
|
||
|
klog.V(6).Infof("cannot exclude some health checks, no health checks are installed matching %s",
|
||
|
formatQuoted(excluded.List()...))
|
||
|
}
|
||
|
// always be verbose on failure
|
||
|
if len(failedChecks) > 0 {
|
||
|
klog.V(2).Infof("%s check failed: %s\n%v", strings.Join(failedChecks, ","), name, failedVerboseLogOutput.String())
|
||
|
httplog.SetStacktracePredicate(r.Context(), func(int) bool { return false })
|
||
|
http.Error(w, fmt.Sprintf("%s%s check failed", individualCheckOutput.String(), name), http.StatusInternalServerError)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// signal first time this is healthy
|
||
|
if firstTimeHealthy != nil {
|
||
|
notifyOnce.Do(firstTimeHealthy)
|
||
|
}
|
||
|
|
||
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||
|
w.Header().Set("X-Content-Type-Options", "nosniff")
|
||
|
if _, found := r.URL.Query()["verbose"]; !found {
|
||
|
fmt.Fprint(w, "ok")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
individualCheckOutput.WriteTo(w)
|
||
|
fmt.Fprintf(w, "%s check passed\n", name)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// adaptCheckToHandler returns an http.HandlerFunc that serves the provided checks.
|
||
|
func adaptCheckToHandler(c func(r *http.Request) error) http.HandlerFunc {
|
||
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||
|
err := c(r)
|
||
|
if err != nil {
|
||
|
http.Error(w, fmt.Sprintf("internal server error: %v", err), http.StatusInternalServerError)
|
||
|
} else {
|
||
|
fmt.Fprint(w, "ok")
|
||
|
}
|
||
|
})
|
||
|
}
|
||
|
|
||
|
// checkerNames returns the names of the checks in the same order as passed in.
|
||
|
func checkerNames(checks ...HealthChecker) []string {
|
||
|
// accumulate the names of checks for printing them out.
|
||
|
checkerNames := make([]string, 0, len(checks))
|
||
|
for _, check := range checks {
|
||
|
checkerNames = append(checkerNames, check.Name())
|
||
|
}
|
||
|
return checkerNames
|
||
|
}
|
||
|
|
||
|
// formatQuoted returns a formatted string of the health check names,
|
||
|
// preserving the order passed in.
|
||
|
func formatQuoted(names ...string) string {
|
||
|
quoted := make([]string, 0, len(names))
|
||
|
for _, name := range names {
|
||
|
quoted = append(quoted, fmt.Sprintf("%q", name))
|
||
|
}
|
||
|
return strings.Join(quoted, ",")
|
||
|
}
|