Merge pull request #429 from yati1998/DFBUGS-1011

DFBUGS-1011: Prevent dataloss due to the concurrent RPC calls (occurrence is very low)
This commit is contained in:
openshift-merge-bot[bot] 2025-02-25 17:21:01 +00:00 committed by GitHub
commit edb16f257e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 42 additions and 20 deletions

View File

@ -22,8 +22,8 @@ curl -X GET http://10.109.65.142:8080/metrics 2>/dev/null | grep csi
csi_liveness 1 csi_liveness 1
``` ```
Promethues can be deployed through the promethues operator described [here](https://coreos.com/operators/prometheus/docs/latest/user-guides/getting-started.html). Prometheus can be deployed through the prometheus operator described [here](https://coreos.com/operators/prometheus/docs/latest/user-guides/getting-started.html).
The [service-monitor](../deploy/service-monitor.yaml) will tell promethues how The [service-monitor](../deploy/service-monitor.yaml) will tell prometheus how
to pull metrics out of CSI. to pull metrics out of CSI.
Each CSI pod has a service to expose the endpoint to prometheus. By default, rbd Each CSI pod has a service to expose the endpoint to prometheus. By default, rbd

View File

@ -125,7 +125,7 @@ parameters:
# "file": Enable file encryption on the mounted filesystem # "file": Enable file encryption on the mounted filesystem
# "block": Encrypt RBD block device # "block": Encrypt RBD block device
# When unspecified assume type "block". "file" and "block" are # When unspecified assume type "block". "file" and "block" are
# mutally exclusive. # mutually exclusive.
# encryptionType: "block" # encryptionType: "block"
# (optional) Use external key management system for encryption passphrases by # (optional) Use external key management system for encryption passphrases by

View File

@ -450,6 +450,13 @@ func (ns *NodeServer) NodePublishVolume(
targetPath := req.GetTargetPath() targetPath := req.GetTargetPath()
volID := fsutil.VolumeID(req.GetVolumeId()) volID := fsutil.VolumeID(req.GetVolumeId())
if acquired := ns.VolumeLocks.TryAcquire(targetPath); !acquired {
log.ErrorLog(ctx, util.TargetPathOperationAlreadyExistsFmt, targetPath)
return nil, status.Errorf(codes.Aborted, util.TargetPathOperationAlreadyExistsFmt, targetPath)
}
defer ns.VolumeLocks.Release(targetPath)
volOptions := &store.VolumeOptions{} volOptions := &store.VolumeOptions{}
defer volOptions.Destroy() defer volOptions.Destroy()
@ -462,9 +469,6 @@ func (ns *NodeServer) NodePublishVolume(
return nil, status.Errorf(codes.Internal, "failed to create mounter for volume %s: %v", volID, err.Error()) return nil, status.Errorf(codes.Internal, "failed to create mounter for volume %s: %v", volID, err.Error())
} }
// Considering kubelet make sure the stage and publish operations
// are serialized, we dont need any extra locking in nodePublish
if err = util.CreateMountPoint(targetPath); err != nil { if err = util.CreateMountPoint(targetPath); err != nil {
log.ErrorLog(ctx, "failed to create mount point at %s: %v", targetPath, err) log.ErrorLog(ctx, "failed to create mount point at %s: %v", targetPath, err)
@ -555,12 +559,17 @@ func (ns *NodeServer) NodeUnpublishVolume(
return nil, err return nil, err
} }
// considering kubelet make sure node operations like unpublish/unstage...etc can not be called
// at same time, an explicit locking at time of nodeunpublish is not required.
targetPath := req.GetTargetPath() targetPath := req.GetTargetPath()
volID := req.GetVolumeId()
if acquired := ns.VolumeLocks.TryAcquire(targetPath); !acquired {
log.ErrorLog(ctx, util.TargetPathOperationAlreadyExistsFmt, targetPath)
return nil, status.Errorf(codes.Aborted, util.TargetPathOperationAlreadyExistsFmt, targetPath)
}
defer ns.VolumeLocks.Release(targetPath)
// stop the health-checker that may have been started in NodeGetVolumeStats() // stop the health-checker that may have been started in NodeGetVolumeStats()
ns.healthChecker.StopChecker(req.GetVolumeId(), targetPath) ns.healthChecker.StopChecker(volID, targetPath)
isMnt, err := util.IsMountPoint(ns.Mounter, targetPath) isMnt, err := util.IsMountPoint(ns.Mounter, targetPath)
if err != nil { if err != nil {
@ -583,7 +592,7 @@ func (ns *NodeServer) NodeUnpublishVolume(
isMnt = true isMnt = true
} }
if !isMnt { if !isMnt {
if err = os.RemoveAll(targetPath); err != nil { if err = os.Remove(targetPath); err != nil {
return nil, status.Error(codes.Internal, err.Error()) return nil, status.Error(codes.Internal, err.Error())
} }

View File

@ -151,8 +151,8 @@ func validateMounter(m string) error {
return nil return nil
} }
func (v *VolumeOptions) DetectMounter(options map[string]string) error { func (vo *VolumeOptions) DetectMounter(options map[string]string) error {
return extractMounter(&v.Mounter, options) return extractMounter(&vo.Mounter, options)
} }
func extractMounter(dest *string, options map[string]string) error { func extractMounter(dest *string, options map[string]string) error {

View File

@ -37,7 +37,7 @@ type checker struct {
// timeout contains the delay (interval + timeout) // timeout contains the delay (interval + timeout)
timeout time.Duration timeout time.Duration
// mutex protects against concurrent access to healty, err and // mutex protects against concurrent access to healthy, err and
// lastUpdate // lastUpdate
mutex *sync.RWMutex mutex *sync.RWMutex

View File

@ -709,8 +709,12 @@ func (ns *NodeServer) NodePublishVolume(
volID := req.GetVolumeId() volID := req.GetVolumeId()
stagingPath += "/" + volID stagingPath += "/" + volID
// Considering kubelet make sure the stage and publish operations if acquired := ns.VolumeLocks.TryAcquire(targetPath); !acquired {
// are serialized, we dont need any extra locking in nodePublish log.ErrorLog(ctx, util.TargetPathOperationAlreadyExistsFmt, targetPath)
return nil, status.Errorf(codes.Aborted, util.TargetPathOperationAlreadyExistsFmt, targetPath)
}
defer ns.VolumeLocks.Release(targetPath)
// Check if that target path exists properly // Check if that target path exists properly
notMnt, err := ns.createTargetMountPath(ctx, targetPath, isBlock) notMnt, err := ns.createTargetMountPath(ctx, targetPath, isBlock)
@ -913,8 +917,14 @@ func (ns *NodeServer) NodeUnpublishVolume(
} }
targetPath := req.GetTargetPath() targetPath := req.GetTargetPath()
// considering kubelet make sure node operations like unpublish/unstage...etc can not be called
// at same time, an explicit locking at time of nodeunpublish is not required. if acquired := ns.VolumeLocks.TryAcquire(targetPath); !acquired {
log.ErrorLog(ctx, util.TargetPathOperationAlreadyExistsFmt, targetPath)
return nil, status.Errorf(codes.Aborted, util.TargetPathOperationAlreadyExistsFmt, targetPath)
}
defer ns.VolumeLocks.Release(targetPath)
isMnt, err := ns.Mounter.IsMountPoint(targetPath) isMnt, err := ns.Mounter.IsMountPoint(targetPath)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
@ -927,7 +937,7 @@ func (ns *NodeServer) NodeUnpublishVolume(
return nil, status.Error(codes.NotFound, err.Error()) return nil, status.Error(codes.NotFound, err.Error())
} }
if !isMnt { if !isMnt {
if err = os.RemoveAll(targetPath); err != nil { if err = os.Remove(targetPath); err != nil {
return nil, status.Error(codes.Internal, err.Error()) return nil, status.Error(codes.Internal, err.Error())
} }
@ -938,7 +948,7 @@ func (ns *NodeServer) NodeUnpublishVolume(
return nil, status.Error(codes.Internal, err.Error()) return nil, status.Error(codes.Internal, err.Error())
} }
if err = os.RemoveAll(targetPath); err != nil { if err = os.Remove(targetPath); err != nil {
return nil, status.Error(codes.Internal, err.Error()) return nil, status.Error(codes.Internal, err.Error())
} }

View File

@ -60,7 +60,7 @@ func Test_getCrushLocationMap(t *testing.T) {
want: map[string]string{"zone": "zone1"}, want: map[string]string{"zone": "zone1"},
}, },
{ {
name: "multuple matching crushlocation and node labels", name: "multiple matching crushlocation and node labels",
args: input{ args: input{
crushLocationLabels: "topology.io/zone,topology.io/rack", crushLocationLabels: "topology.io/zone,topology.io/rack",
nodeLabels: map[string]string{ nodeLabels: map[string]string{

View File

@ -28,6 +28,9 @@ const (
// SnapshotOperationAlreadyExistsFmt string format to return for concurrent operation. // SnapshotOperationAlreadyExistsFmt string format to return for concurrent operation.
SnapshotOperationAlreadyExistsFmt = "an operation with the given Snapshot ID %s already exists" SnapshotOperationAlreadyExistsFmt = "an operation with the given Snapshot ID %s already exists"
// TargetPathOperationAlreadyExistsFmt string format to return for concurrent operation on target path.
TargetPathOperationAlreadyExistsFmt = "an operation with the given target path %s already exists"
) )
// VolumeLocks implements a map with atomic operations. It stores a set of all volume IDs // VolumeLocks implements a map with atomic operations. It stores a set of all volume IDs