2019-06-20 19:30:40 +00:00
|
|
|
/*
|
|
|
|
Copyright 2019 The Ceph-CSI Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package liveness
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"time"
|
|
|
|
|
2020-04-17 09:23:49 +00:00
|
|
|
"github.com/ceph/ceph-csi/internal/util"
|
2021-08-24 15:03:25 +00:00
|
|
|
"github.com/ceph/ceph-csi/internal/util/log"
|
2020-04-15 03:38:16 +00:00
|
|
|
|
2019-06-20 19:30:40 +00:00
|
|
|
connlib "github.com/kubernetes-csi/csi-lib-utils/connection"
|
2020-04-14 07:04:33 +00:00
|
|
|
"github.com/kubernetes-csi/csi-lib-utils/metrics"
|
2019-06-20 19:30:40 +00:00
|
|
|
"github.com/kubernetes-csi/csi-lib-utils/rpc"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2019-09-05 18:25:50 +00:00
|
|
|
"google.golang.org/grpc"
|
2019-06-20 19:30:40 +00:00
|
|
|
)
|
|
|
|
|
2021-07-13 12:21:05 +00:00
|
|
|
var liveness = prometheus.NewGauge(prometheus.GaugeOpts{
|
|
|
|
Namespace: "csi",
|
|
|
|
Name: "liveness",
|
|
|
|
Help: "Liveness Probe",
|
|
|
|
})
|
2019-06-20 19:30:40 +00:00
|
|
|
|
2019-09-05 18:25:50 +00:00
|
|
|
func getLiveness(timeout time.Duration, csiConn *grpc.ClientConn) {
|
2019-06-20 19:30:40 +00:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
|
|
|
defer cancel()
|
|
|
|
|
2021-08-24 15:03:25 +00:00
|
|
|
log.TraceLogMsg("Sending probe request to CSI driver")
|
2019-06-20 19:30:40 +00:00
|
|
|
ready, err := rpc.Probe(ctx, csiConn)
|
|
|
|
if err != nil {
|
|
|
|
liveness.Set(0)
|
2021-08-24 15:03:25 +00:00
|
|
|
log.ErrorLogMsg("health check failed: %v", err)
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2019-06-20 19:30:40 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if !ready {
|
|
|
|
liveness.Set(0)
|
2021-08-24 15:03:25 +00:00
|
|
|
log.ErrorLogMsg("driver responded but is not ready")
|
2021-07-22 05:45:17 +00:00
|
|
|
|
2019-06-20 19:30:40 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
liveness.Set(1)
|
2021-08-24 15:03:25 +00:00
|
|
|
log.ExtendedLogMsg("Health check succeeded")
|
2019-06-20 19:30:40 +00:00
|
|
|
}
|
|
|
|
|
2020-04-14 07:04:33 +00:00
|
|
|
func recordLiveness(endpoint, drivername string, pollTime, timeout time.Duration) {
|
|
|
|
liveMetricsManager := metrics.NewCSIMetricsManager(drivername)
|
2019-06-20 19:30:40 +00:00
|
|
|
// register prometheus metrics
|
|
|
|
err := prometheus.Register(liveness)
|
|
|
|
if err != nil {
|
2021-08-24 15:03:25 +00:00
|
|
|
log.FatalLogMsg(err.Error())
|
2019-06-20 19:30:40 +00:00
|
|
|
}
|
|
|
|
|
2024-06-11 04:57:14 +00:00
|
|
|
csiConn, err := connlib.Connect(context.Background(), endpoint, liveMetricsManager)
|
2019-09-05 18:25:50 +00:00
|
|
|
if err != nil {
|
|
|
|
// connlib should retry forever so a returned error should mean
|
|
|
|
// the grpc client is misconfigured rather than an error on the network
|
2021-08-24 15:03:25 +00:00
|
|
|
log.FatalLogMsg("failed to establish connection to CSI driver: %v", err)
|
2019-09-05 18:25:50 +00:00
|
|
|
}
|
|
|
|
|
2019-06-20 19:30:40 +00:00
|
|
|
// get liveness periodically
|
|
|
|
ticker := time.NewTicker(pollTime)
|
|
|
|
defer ticker.Stop()
|
|
|
|
for range ticker.C {
|
2019-09-05 18:25:50 +00:00
|
|
|
getLiveness(timeout, csiConn)
|
2019-06-20 19:30:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-19 12:21:03 +00:00
|
|
|
// Run starts liveness collection and prometheus endpoint.
|
2019-08-14 05:57:45 +00:00
|
|
|
func Run(conf *util.Config) {
|
2021-08-24 15:03:25 +00:00
|
|
|
log.ExtendedLogMsg("Liveness Running")
|
2019-06-20 19:30:40 +00:00
|
|
|
|
|
|
|
// start liveness collection
|
2020-04-14 07:04:33 +00:00
|
|
|
go recordLiveness(conf.Endpoint, conf.DriverName, conf.PollTime, conf.PoolTimeout)
|
2019-06-20 19:30:40 +00:00
|
|
|
|
|
|
|
// start up prometheus endpoint
|
2019-08-21 09:28:02 +00:00
|
|
|
util.StartMetricsServer(conf)
|
2019-06-20 19:30:40 +00:00
|
|
|
}
|