mirror of
https://github.com/ceph/ceph-csi.git
synced 2024-11-10 00:10:20 +00:00
cephfs: enable VolumeCondition with new health-checker
The HealthChecker is configured to use the Staging path pf the volume, with a `.csi/` subdirectory. In the future this directory could be a directory that is not under the Published directory. Fixes: #4219 Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
parent
54fc65a561
commit
7d96cafad7
@ -25,6 +25,7 @@ import (
|
||||
casceph "github.com/ceph/ceph-csi/internal/csi-addons/cephfs"
|
||||
csiaddons "github.com/ceph/ceph-csi/internal/csi-addons/server"
|
||||
csicommon "github.com/ceph/ceph-csi/internal/csi-common"
|
||||
hc "github.com/ceph/ceph-csi/internal/health-checker"
|
||||
"github.com/ceph/ceph-csi/internal/journal"
|
||||
"github.com/ceph/ceph-csi/internal/util"
|
||||
"github.com/ceph/ceph-csi/internal/util/log"
|
||||
@ -82,6 +83,7 @@ func NewNodeServer(
|
||||
VolumeLocks: util.NewVolumeLocks(),
|
||||
kernelMountOptions: kernelMountOptions,
|
||||
fuseMountOptions: fuseMountOptions,
|
||||
healthChecker: hc.NewHealthCheckManager(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,7 @@ import (
|
||||
"github.com/ceph/ceph-csi/internal/cephfs/store"
|
||||
fsutil "github.com/ceph/ceph-csi/internal/cephfs/util"
|
||||
csicommon "github.com/ceph/ceph-csi/internal/csi-common"
|
||||
hc "github.com/ceph/ceph-csi/internal/health-checker"
|
||||
"github.com/ceph/ceph-csi/internal/util"
|
||||
"github.com/ceph/ceph-csi/internal/util/fscrypt"
|
||||
"github.com/ceph/ceph-csi/internal/util/log"
|
||||
@ -47,6 +48,7 @@ type NodeServer struct {
|
||||
VolumeLocks *util.VolumeLocks
|
||||
kernelMountOptions string
|
||||
fuseMountOptions string
|
||||
healthChecker hc.Manager
|
||||
}
|
||||
|
||||
func getCredentialsForVolume(
|
||||
@ -228,6 +230,8 @@ func (ns *NodeServer) NodeStageVolume(
|
||||
return nil, status.Error(codes.Internal, err.Error())
|
||||
}
|
||||
|
||||
ns.startSharedHealthChecker(ctx, req.GetVolumeId(), stagingTargetPath)
|
||||
|
||||
return &csi.NodeStageVolumeResponse{}, nil
|
||||
}
|
||||
|
||||
@ -270,9 +274,24 @@ func (ns *NodeServer) NodeStageVolume(
|
||||
}
|
||||
}
|
||||
|
||||
ns.startSharedHealthChecker(ctx, req.GetVolumeId(), stagingTargetPath)
|
||||
|
||||
return &csi.NodeStageVolumeResponse{}, nil
|
||||
}
|
||||
|
||||
// startSharedHealthChecker starts a health-checker on the stagingTargetPath.
|
||||
// This checker can be shared between multiple containers.
|
||||
//
|
||||
// TODO: start a FileChecker for read-writable volumes that have an app-data subdir.
|
||||
func (ns *NodeServer) startSharedHealthChecker(ctx context.Context, volumeID, dir string) {
|
||||
// The StatChecker works for volumes that do not have a dedicated app-data
|
||||
// subdirectory, or are read-only.
|
||||
err := ns.healthChecker.StartSharedChecker(volumeID, dir, hc.StatCheckerType)
|
||||
if err != nil {
|
||||
log.WarningLog(ctx, "failed to start healthchecker: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (ns *NodeServer) mount(
|
||||
ctx context.Context,
|
||||
mnt mounter.VolumeMounter,
|
||||
@ -479,7 +498,8 @@ func (ns *NodeServer) NodePublishVolume(
|
||||
|
||||
// Ensure staging target path is a mountpoint.
|
||||
|
||||
if isMnt, err := util.IsMountPoint(ns.Mounter, stagingTargetPath); err != nil {
|
||||
isMnt, err := util.IsMountPoint(ns.Mounter, stagingTargetPath)
|
||||
if err != nil {
|
||||
log.ErrorLog(ctx, "stat failed: %v", err)
|
||||
|
||||
return nil, status.Error(codes.Internal, err.Error())
|
||||
@ -491,7 +511,7 @@ func (ns *NodeServer) NodePublishVolume(
|
||||
|
||||
// Check if the volume is already mounted
|
||||
|
||||
isMnt, err := util.IsMountPoint(ns.Mounter, targetPath)
|
||||
isMnt, err = util.IsMountPoint(ns.Mounter, targetPath)
|
||||
if err != nil {
|
||||
log.ErrorLog(ctx, "stat failed: %v", err)
|
||||
|
||||
@ -545,6 +565,10 @@ func (ns *NodeServer) NodeUnpublishVolume(
|
||||
// considering kubelet make sure node operations like unpublish/unstage...etc can not be called
|
||||
// at same time, an explicit locking at time of nodeunpublish is not required.
|
||||
targetPath := req.GetTargetPath()
|
||||
|
||||
// stop the health-checker that may have been started in NodeGetVolumeStats()
|
||||
ns.healthChecker.StopChecker(req.GetVolumeId(), targetPath)
|
||||
|
||||
isMnt, err := util.IsMountPoint(ns.Mounter, targetPath)
|
||||
if err != nil {
|
||||
log.ErrorLog(ctx, "stat failed: %v", err)
|
||||
@ -599,6 +623,9 @@ func (ns *NodeServer) NodeUnstageVolume(
|
||||
}
|
||||
|
||||
volID := req.GetVolumeId()
|
||||
|
||||
ns.healthChecker.StopSharedChecker(volID)
|
||||
|
||||
if acquired := ns.VolumeLocks.TryAcquire(volID); !acquired {
|
||||
log.ErrorLog(ctx, util.VolumeOperationAlreadyExistsFmt, volID)
|
||||
|
||||
@ -670,6 +697,13 @@ func (ns *NodeServer) NodeGetCapabilities(
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: &csi.NodeServiceCapability_Rpc{
|
||||
Rpc: &csi.NodeServiceCapability_RPC{
|
||||
Type: csi.NodeServiceCapability_RPC_VOLUME_CONDITION,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: &csi.NodeServiceCapability_Rpc{
|
||||
Rpc: &csi.NodeServiceCapability_RPC{
|
||||
@ -694,6 +728,35 @@ func (ns *NodeServer) NodeGetVolumeStats(
|
||||
return nil, status.Error(codes.InvalidArgument, err.Error())
|
||||
}
|
||||
|
||||
// health check first, return without stats if unhealthy
|
||||
healthy, msg := ns.healthChecker.IsHealthy(req.GetVolumeId(), targetPath)
|
||||
|
||||
// If healthy and an error is returned, it means that the checker was not
|
||||
// started. This could happen when the node-plugin was restarted and the
|
||||
// volume is already staged and published.
|
||||
if healthy && msg != nil {
|
||||
// Start a StatChecker for the mounted targetPath, this prevents
|
||||
// writing a file in the user-visible location. Ideally a (shared)
|
||||
// FileChecker is started with the stagingTargetPath, but we can't
|
||||
// get the stagingPath from the request easily.
|
||||
// TODO: resolve the stagingPath like rbd.getStagingPath() does
|
||||
err = ns.healthChecker.StartChecker(req.GetVolumeId(), targetPath, hc.StatCheckerType)
|
||||
if err != nil {
|
||||
log.WarningLog(ctx, "failed to start healthchecker: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// !healthy indicates a problem with the volume
|
||||
if !healthy {
|
||||
return &csi.NodeGetVolumeStatsResponse{
|
||||
VolumeCondition: &csi.VolumeCondition{
|
||||
Abnormal: true,
|
||||
Message: msg.Error(),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// warning: stat() may hang on an unhealthy volume
|
||||
stat, err := os.Stat(targetPath)
|
||||
if err != nil {
|
||||
if util.IsCorruptedMountError(err) {
|
||||
|
Loading…
Reference in New Issue
Block a user