mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-13 10:33:35 +00:00
prometheus liveness probe sidecar
Signed-off-by: Daniel-Pivonka dpivonka@redhat.com
This commit is contained in:
committed by
mergify[bot]
parent
2ca575b99d
commit
d621a58207
310
vendor/github.com/kubernetes-csi/csi-lib-utils/connection/connection.go
generated
vendored
Normal file
310
vendor/github.com/kubernetes-csi/csi-lib-utils/connection/connection.go
generated
vendored
Normal file
@ -0,0 +1,310 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package connection
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"github.com/container-storage-interface/spec/lib/go/csi"
|
||||
"github.com/kubernetes-csi/csi-lib-utils/protosanitizer"
|
||||
"google.golang.org/grpc"
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
const (
|
||||
// Interval of logging connection errors
|
||||
connectionLoggingInterval = 10 * time.Second
|
||||
|
||||
// Interval of trying to call Probe() until it succeeds
|
||||
probeInterval = 1 * time.Second
|
||||
)
|
||||
|
||||
const terminationLogPath = "/dev/termination-log"
|
||||
|
||||
// Connect opens insecure gRPC connection to a CSI driver. Address must be either absolute path to UNIX domain socket
|
||||
// file or have format '<protocol>://', following gRPC name resolution mechanism at
|
||||
// https://github.com/grpc/grpc/blob/master/doc/naming.md.
|
||||
//
|
||||
// The function tries to connect indefinitely every second until it connects. The function automatically disables TLS
|
||||
// and adds interceptor for logging of all gRPC messages at level 5.
|
||||
//
|
||||
// For a connection to a Unix Domain socket, the behavior after
|
||||
// loosing the connection is configurable. The default is to
|
||||
// log the connection loss and reestablish a connection. Applications
|
||||
// which need to know about a connection loss can be notified by
|
||||
// passing a callback with OnConnectionLoss and in that callback
|
||||
// can decide what to do:
|
||||
// - exit the application with os.Exit
|
||||
// - invalidate cached information
|
||||
// - disable the reconnect, which will cause all gRPC method calls to fail with status.Unavailable
|
||||
//
|
||||
// For other connections, the default behavior from gRPC is used and
|
||||
// loss of connection is not detected reliably.
|
||||
func Connect(address string, options ...Option) (*grpc.ClientConn, error) {
|
||||
return connect(address, []grpc.DialOption{}, options)
|
||||
}
|
||||
|
||||
// Option is the type of all optional parameters for Connect.
|
||||
type Option func(o *options)
|
||||
|
||||
// OnConnectionLoss registers a callback that will be invoked when the
|
||||
// connection got lost. If that callback returns true, the connection
|
||||
// is reestablished. Otherwise the connection is left as it is and
|
||||
// all future gRPC calls using it will fail with status.Unavailable.
|
||||
func OnConnectionLoss(reconnect func() bool) Option {
|
||||
return func(o *options) {
|
||||
o.reconnect = reconnect
|
||||
}
|
||||
}
|
||||
|
||||
// ExitOnConnectionLoss returns callback for OnConnectionLoss() that writes
|
||||
// an error to /dev/termination-log and exits.
|
||||
func ExitOnConnectionLoss() func() bool {
|
||||
return func() bool {
|
||||
terminationMsg := "Lost connection to CSI driver, exiting"
|
||||
if err := ioutil.WriteFile(terminationLogPath, []byte(terminationMsg), 0644); err != nil {
|
||||
klog.Errorf("%s: %s", terminationLogPath, err)
|
||||
}
|
||||
klog.Fatalf(terminationMsg)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
type options struct {
|
||||
reconnect func() bool
|
||||
}
|
||||
|
||||
// connect is the internal implementation of Connect. It has more options to enable testing.
|
||||
func connect(address string, dialOptions []grpc.DialOption, connectOptions []Option) (*grpc.ClientConn, error) {
|
||||
var o options
|
||||
for _, option := range connectOptions {
|
||||
option(&o)
|
||||
}
|
||||
|
||||
dialOptions = append(dialOptions,
|
||||
grpc.WithInsecure(), // Don't use TLS, it's usually local Unix domain socket in a container.
|
||||
grpc.WithBackoffMaxDelay(time.Second), // Retry every second after failure.
|
||||
grpc.WithBlock(), // Block until connection succeeds.
|
||||
grpc.WithUnaryInterceptor(LogGRPC), // Log all messages.
|
||||
)
|
||||
unixPrefix := "unix://"
|
||||
if strings.HasPrefix(address, "/") {
|
||||
// It looks like filesystem path.
|
||||
address = unixPrefix + address
|
||||
}
|
||||
|
||||
if strings.HasPrefix(address, unixPrefix) {
|
||||
// state variables for the custom dialer
|
||||
haveConnected := false
|
||||
lostConnection := false
|
||||
reconnect := true
|
||||
|
||||
dialOptions = append(dialOptions, grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
|
||||
if haveConnected && !lostConnection {
|
||||
// We have detected a loss of connection for the first time. Decide what to do...
|
||||
// Record this once. TODO (?): log at regular time intervals.
|
||||
klog.Errorf("Lost connection to %s.", address)
|
||||
// Inform caller and let it decide? Default is to reconnect.
|
||||
if o.reconnect != nil {
|
||||
reconnect = o.reconnect()
|
||||
}
|
||||
lostConnection = true
|
||||
}
|
||||
if !reconnect {
|
||||
return nil, errors.New("connection lost, reconnecting disabled")
|
||||
}
|
||||
conn, err := net.DialTimeout("unix", address[len(unixPrefix):], timeout)
|
||||
if err == nil {
|
||||
// Connection restablished.
|
||||
haveConnected = true
|
||||
lostConnection = false
|
||||
}
|
||||
return conn, err
|
||||
}))
|
||||
} else if o.reconnect != nil {
|
||||
return nil, errors.New("OnConnectionLoss callback only supported for unix:// addresses")
|
||||
}
|
||||
|
||||
klog.Infof("Connecting to %s", address)
|
||||
|
||||
// Connect in background.
|
||||
var conn *grpc.ClientConn
|
||||
var err error
|
||||
ready := make(chan bool)
|
||||
go func() {
|
||||
conn, err = grpc.Dial(address, dialOptions...)
|
||||
close(ready)
|
||||
}()
|
||||
|
||||
// Log error every connectionLoggingInterval
|
||||
ticker := time.NewTicker(connectionLoggingInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Wait until Dial() succeeds.
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
klog.Warningf("Still connecting to %s", address)
|
||||
|
||||
case <-ready:
|
||||
return conn, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LogGRPC is gPRC unary interceptor for logging of CSI messages at level 5. It removes any secrets from the message.
|
||||
func LogGRPC(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
|
||||
klog.V(5).Infof("GRPC call: %s", method)
|
||||
klog.V(5).Infof("GRPC request: %s", protosanitizer.StripSecrets(req))
|
||||
err := invoker(ctx, method, req, reply, cc, opts...)
|
||||
klog.V(5).Infof("GRPC response: %s", protosanitizer.StripSecrets(reply))
|
||||
klog.V(5).Infof("GRPC error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// GetDriverName returns name of CSI driver.
|
||||
func GetDriverName(ctx context.Context, conn *grpc.ClientConn) (string, error) {
|
||||
client := csi.NewIdentityClient(conn)
|
||||
|
||||
req := csi.GetPluginInfoRequest{}
|
||||
rsp, err := client.GetPluginInfo(ctx, &req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
name := rsp.GetName()
|
||||
if name == "" {
|
||||
return "", fmt.Errorf("driver name is empty")
|
||||
}
|
||||
return name, nil
|
||||
}
|
||||
|
||||
// PluginCapabilitySet is set of CSI plugin capabilities. Only supported capabilities are in the map.
|
||||
type PluginCapabilitySet map[csi.PluginCapability_Service_Type]bool
|
||||
|
||||
// GetPluginCapabilities returns set of supported capabilities of CSI driver.
|
||||
func GetPluginCapabilities(ctx context.Context, conn *grpc.ClientConn) (PluginCapabilitySet, error) {
|
||||
client := csi.NewIdentityClient(conn)
|
||||
req := csi.GetPluginCapabilitiesRequest{}
|
||||
rsp, err := client.GetPluginCapabilities(ctx, &req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
caps := PluginCapabilitySet{}
|
||||
for _, cap := range rsp.GetCapabilities() {
|
||||
if cap == nil {
|
||||
continue
|
||||
}
|
||||
srv := cap.GetService()
|
||||
if srv == nil {
|
||||
continue
|
||||
}
|
||||
t := srv.GetType()
|
||||
caps[t] = true
|
||||
}
|
||||
return caps, nil
|
||||
}
|
||||
|
||||
// ControllerCapabilitySet is set of CSI controller capabilities. Only supported capabilities are in the map.
|
||||
type ControllerCapabilitySet map[csi.ControllerServiceCapability_RPC_Type]bool
|
||||
|
||||
// GetControllerCapabilities returns set of supported controller capabilities of CSI driver.
|
||||
func GetControllerCapabilities(ctx context.Context, conn *grpc.ClientConn) (ControllerCapabilitySet, error) {
|
||||
client := csi.NewControllerClient(conn)
|
||||
req := csi.ControllerGetCapabilitiesRequest{}
|
||||
rsp, err := client.ControllerGetCapabilities(ctx, &req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
caps := ControllerCapabilitySet{}
|
||||
for _, cap := range rsp.GetCapabilities() {
|
||||
if cap == nil {
|
||||
continue
|
||||
}
|
||||
rpc := cap.GetRpc()
|
||||
if rpc == nil {
|
||||
continue
|
||||
}
|
||||
t := rpc.GetType()
|
||||
caps[t] = true
|
||||
}
|
||||
return caps, nil
|
||||
}
|
||||
|
||||
// ProbeForever calls Probe() of a CSI driver and waits until the driver becomes ready.
|
||||
// Any error other than timeout is returned.
|
||||
func ProbeForever(conn *grpc.ClientConn, singleProbeTimeout time.Duration) error {
|
||||
for {
|
||||
klog.Info("Probing CSI driver for readiness")
|
||||
ready, err := probeOnce(conn, singleProbeTimeout)
|
||||
if err != nil {
|
||||
st, ok := status.FromError(err)
|
||||
if !ok {
|
||||
// This is not gRPC error. The probe must have failed before gRPC
|
||||
// method was called, otherwise we would get gRPC error.
|
||||
return fmt.Errorf("CSI driver probe failed: %s", err)
|
||||
}
|
||||
if st.Code() != codes.DeadlineExceeded {
|
||||
return fmt.Errorf("CSI driver probe failed: %s", err)
|
||||
}
|
||||
// Timeout -> driver is not ready. Fall through to sleep() below.
|
||||
klog.Warning("CSI driver probe timed out")
|
||||
} else {
|
||||
if ready {
|
||||
return nil
|
||||
}
|
||||
klog.Warning("CSI driver is not ready")
|
||||
}
|
||||
// Timeout was returned or driver is not ready.
|
||||
time.Sleep(probeInterval)
|
||||
}
|
||||
}
|
||||
|
||||
// probeOnce is a helper to simplify defer cancel()
|
||||
func probeOnce(conn *grpc.ClientConn, timeout time.Duration) (bool, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
return Probe(ctx, conn)
|
||||
}
|
||||
|
||||
// Probe calls driver Probe() just once and returns its result without any processing.
|
||||
func Probe(ctx context.Context, conn *grpc.ClientConn) (ready bool, err error) {
|
||||
client := csi.NewIdentityClient(conn)
|
||||
|
||||
req := csi.ProbeRequest{}
|
||||
rsp, err := client.Probe(ctx, &req)
|
||||
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
r := rsp.GetReady()
|
||||
if r == nil {
|
||||
// "If not present, the caller SHALL assume that the plugin is in a ready state"
|
||||
return true, nil
|
||||
}
|
||||
return r.GetValue(), nil
|
||||
}
|
Reference in New Issue
Block a user