2023-09-13 14:02:16 +00:00
|
|
|
/*
|
|
|
|
Copyright 2023 ceph-csi authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package healthchecker
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
|
|
|
// command is what is sent through the channel to terminate the go routine.
|
|
|
|
type command string
|
|
|
|
|
|
|
|
const (
|
|
|
|
// stopCommand is sent through the channel to stop checking.
|
|
|
|
stopCommand = command("STOP")
|
|
|
|
)
|
|
|
|
|
|
|
|
type checker struct {
|
|
|
|
// interval contains the time to sleep between health checks.
|
|
|
|
interval time.Duration
|
|
|
|
|
|
|
|
// timeout contains the delay (interval + timeout)
|
|
|
|
timeout time.Duration
|
|
|
|
|
2024-09-04 08:13:03 +00:00
|
|
|
// mutex protects against concurrent access to healthy, err and
|
2023-09-13 14:02:16 +00:00
|
|
|
// lastUpdate
|
|
|
|
mutex *sync.RWMutex
|
|
|
|
|
|
|
|
// current status
|
|
|
|
isRunning bool
|
|
|
|
healthy bool
|
|
|
|
err error
|
|
|
|
lastUpdate time.Time
|
|
|
|
|
|
|
|
// commands is the channel to read commands from; when to stop.
|
|
|
|
commands chan command
|
|
|
|
|
|
|
|
runChecker func()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *checker) initDefaults() {
|
|
|
|
c.interval = 60 * time.Second
|
|
|
|
c.timeout = 15 * time.Second
|
|
|
|
c.mutex = &sync.RWMutex{}
|
|
|
|
c.isRunning = false
|
|
|
|
c.err = nil
|
|
|
|
c.healthy = true
|
|
|
|
c.lastUpdate = time.Now()
|
|
|
|
c.commands = make(chan command)
|
|
|
|
|
|
|
|
c.runChecker = func() {
|
|
|
|
panic("BUG: implement runChecker() in the final checker struct")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *checker) start() {
|
|
|
|
if c.isRunning {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
go c.runChecker()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *checker) stop() {
|
|
|
|
c.commands <- stopCommand
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *checker) isHealthy() (bool, error) {
|
|
|
|
// check for the last update, it should be within
|
|
|
|
//
|
|
|
|
// c.lastUpdate < (c.interval + c.timeout)
|
|
|
|
//
|
|
|
|
// Without such a check, a single slow write/read could trigger actions
|
|
|
|
// to recover an unhealthy volume already.
|
|
|
|
//
|
|
|
|
// It is required to check, in case the write or read in the go routine
|
|
|
|
// is blocked.
|
|
|
|
|
|
|
|
delay := time.Since(c.lastUpdate)
|
|
|
|
if delay > (c.interval + c.timeout) {
|
|
|
|
c.mutex.Lock()
|
|
|
|
c.healthy = false
|
|
|
|
c.err = fmt.Errorf("health-check has not responded for %f seconds", delay.Seconds())
|
|
|
|
c.mutex.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
// read lock to get consistency between the return values
|
|
|
|
c.mutex.RLock()
|
|
|
|
defer c.mutex.RUnlock()
|
|
|
|
|
|
|
|
return c.healthy, c.err
|
|
|
|
}
|