Rework multi-node-multi-writer feature

This commit reverts the initial implementation of the multi-node-multi-writer feature: commit: b5b8e4646094d0ec1f3dfe96060a183c863d7d1a It replaces that implementation with a more restrictive version that only allows multi-node-multi-writer for volumes of type `block` With this change there are no volume parameters required in the stoarge class, we also fail any attempt to create a file based device with multi-node-multi-write being specified, this way a user doesn't have to wait until they try and do the publish before realizing it doesn't work.
2025-06-03 04:16:42 +00:00 · 2019-03-13 18:18:04 -06:00 · 2019-03-13 18:18:04 -06:00 · 6ec1196f47
commit 6ec1196f47
parent a164169fd3
7 changed files with 152 additions and 5 deletions
--- a/2
+++ b/2
@ -31,7 +31,7 @@ go-test:
 	./scripts/test-go.sh
 static-check:
-	./scripts/lint-go.sh  
+	./scripts/lint-go.sh
 	./scripts/lint-text.sh
 rbdplugin:
--- a/examples/README.md
+++ b/examples/README.md
@ -114,3 +114,102 @@ To restore the snapshot to a new PVC, deploy
 kubectl create -f pvc-restore.yaml
 kubectl create -f pod-restore.yaml
 ```
 ## How to test RBD MULTI_NODE_MULTI_WRITER BLOCK feature
 Requires feature-gates: `BlockVolume=true` `CSIBlockVolume=true`
 *NOTE* The MULTI_NODE_MULTI_WRITER capability is only available for
 Volumes that are of access_type `block`
 *WARNING*  This feature is strictly for workloads that know how to deal
 with concurrent access to the Volume (eg Active/Passive applications).
 Using RWX modes on non clustered file systems with applications trying
 to simultaneously access the Volume will likely result in data corruption!
 Following are examples for issuing a request for a `Block`
 `ReadWriteMany` Claim, and using the resultant Claim for a POD
 ```yaml
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: block-pvc
 spec:
  accessModes:
  - ReadWriteMany
  volumeMode: Block
  resources:
    requests:
      storage: 1Gi
  storageClassName: csi-rbd
 ```
 Create a POD that uses this PVC:
 ```yaml
 apiVersion: v1
 kind: Pod
 metadata:
  name: my-pod
 spec:
  containers:
    - name: my-container
      image: debian
      command: ["/bin/bash", "-c"]
      args: [ "tail -f /dev/null" ]
      volumeDevices:
        - devicePath: /dev/rbdblock
          name: my-volume
      imagePullPolicy: IfNotPresent
  volumes:
    - name: my-volume
      persistentVolumeClaim:
        claimName: block-pvc
 ```
 Now, we can create a second POD (ensure the POD is scheduled on a different
 node; multiwriter single node works without this feature) that also uses this
 PVC at the same time, again wait for the pod to enter running state, and verify
 the block device is available.
 ```yaml
 apiVersion: v1
 kind: Pod
 metadata:
  name: another-pod
 spec:
  containers:
    - name: my-container
      image: debian
      command: ["/bin/bash", "-c"]
      args: [ "tail -f /dev/null" ]
      volumeDevices:
        - devicePath: /dev/rbdblock
          name: my-volume
      imagePullPolicy: IfNotPresent
  volumes:
    - name: my-volume
      persistentVolumeClaim:
        claimName: block-pvc
 ```
 Wait for the PODs to enter Running state, check that our block device
 is available in the container at `/dev/rdbblock` in both containers:
 ```bash
 $ kubectl exec -it my-pod -- fdisk -l /dev/rbdblock
 Disk /dev/rbdblock: 1 GiB, 1073741824 bytes, 2097152 sectors
 Units: sectors of 1 * 512 = 512 bytes
 Sector size (logical/physical): 512 bytes / 512 bytes
 I/O size (minimum/optimal): 4194304 bytes / 4194304 bytes
 ```
 ```bash
 $ kubectl exec -it another-pod -- fdisk -l /dev/rbdblock
 Disk /dev/rbdblock: 1 GiB, 1073741824 bytes, 2097152 sectors
 Units: sectors of 1 * 512 = 512 bytes
 Sector size (logical/physical): 512 bytes / 512 bytes
 I/O size (minimum/optimal): 4194304 bytes / 4194304 bytes
 ```
--- a/pkg/rbd/controllerserver.go
+++ b/pkg/rbd/controllerserver.go
@ -93,7 +93,25 @@ func (cs *ControllerServer) validateVolumeReq(req *csi.CreateVolumeRequest) erro
 func parseVolCreateRequest(req *csi.CreateVolumeRequest) (*rbdVolume, error) {
 	// TODO (sbezverk) Last check for not exceeding total storage capacity
-	rbdVol, err := getRBDVolumeOptions(req.GetParameters())
+	isMultiNode := false
 	isBlock := false
 	for _, cap := range req.VolumeCapabilities {
 		// Only checking SINGLE_NODE_SINGLE_WRITER here because regardless of the other types (MULTI READER) we need to implement the same logic to ignore the in-use response
 		if cap.GetAccessMode().GetMode() != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
 			isMultiNode = true
 		}
 		if cap.GetBlock() != nil {
 			isBlock = true
 		}
 	}
 	// We want to fail early if the user is trying to create a RWX on a non-block type device
 	if isMultiNode && !isBlock {
 		return nil, status.Error(codes.InvalidArgument, "multi node access modes are only supported on rbd `block` type volumes")
 	}
 	// if it's NOT SINGLE_NODE_WRITER and it's BLOCK we'll set the parameter to ignore the in-use checks
 	rbdVol, err := getRBDVolumeOptions(req.GetParameters(), (isMultiNode && isBlock))
 	if err != nil {
 		return nil, status.Error(codes.InvalidArgument, err.Error())
 	}
--- a/pkg/rbd/nodeserver.go
+++ b/pkg/rbd/nodeserver.go
@ -48,6 +48,7 @@ type NodeServer struct {
 func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
 	targetPath := req.GetTargetPath()
 	targetPathMutex.LockKey(targetPath)
 	disableInUseChecks := false
 	defer func() {
 		if err := targetPathMutex.UnlockKey(targetPath); err != nil {
@ -70,7 +71,19 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
 	if !notMnt {
 		return &csi.NodePublishVolumeResponse{}, nil
 	}
-	volOptions, err := getRBDVolumeOptions(req.GetVolumeContext())
+
 	// MULTI_NODE_MULTI_WRITER is supported by default for Block access type volumes
 	if req.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER {
 		if isBlock {
 			disableInUseChecks = true
 		} else {
 			klog.Warningf("MULTI_NODE_MULTI_WRITER currently only supported with volumes of access type `block`, invalid AccessMode for volume: %v", req.GetVolumeId())
 			e := fmt.Errorf("rbd: MULTI_NODE_MULTI_WRITER access mode only allowed with BLOCK access type")
 			return nil, status.Error(codes.InvalidArgument, e.Error())
 		}
 	}
 	volOptions, err := getRBDVolumeOptions(req.GetVolumeContext(), disableInUseChecks)
 	if err != nil {
 		return nil, err
 	}
--- a/pkg/rbd/rbd.go
+++ b/pkg/rbd/rbd.go
@ -102,7 +102,14 @@ func (r *Driver) Run(driverName, nodeID, endpoint string, containerized bool, ca
 		csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS,
 		csi.ControllerServiceCapability_RPC_CLONE_VOLUME,
 	})
-	r.cd.AddVolumeCapabilityAccessModes([]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER})
+
 	// We only support the multi-writer option when using block, but it's a supported capability for the plugin in general
 	// In addition, we want to add the remaining modes like MULTI_NODE_READER_ONLY,
 	// MULTI_NODE_SINGLE_WRITER etc, but need to do some verification of RO modes first
 	// will work those as follow up features
 	r.cd.AddVolumeCapabilityAccessModes(
 		[]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
 			csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER})
 	// Create GRPC servers
 	r.ids = NewIdentityServer(r.cd)
--- a/pkg/rbd/rbd_attach.go
+++ b/pkg/rbd/rbd_attach.go
@ -258,6 +258,7 @@ func attachRBDImage(volOptions *rbdVolume, userID string, credentials map[string
 			Factor:   rbdImageWatcherFactor,
 			Steps:    rbdImageWatcherSteps,
 		}
 		err = waitForrbdImage(backoff, volOptions, userID, credentials)
 		if err != nil {
@ -313,6 +314,10 @@ func waitForrbdImage(backoff wait.Backoff, volOptions *rbdVolume, userID string,
 		if err != nil {
 			return false, fmt.Errorf("fail to check rbd image status with: (%v), rbd output: (%s)", err, rbdOutput)
 		}
 		if (volOptions.DisableInUseChecks) && (used) {
 			klog.V(2).Info("valid multi-node attach requested, ignoring watcher in-use result")
 			return used, nil
 		}
 		return !used, nil
 	})
 	// return error if rbd image has not become available for the specified timeout
--- a/pkg/rbd/rbd_util.go
+++ b/pkg/rbd/rbd_util.go
@ -51,6 +51,7 @@ type rbdVolume struct {
 	AdminID            string `json:"adminId"`
 	UserID             string `json:"userId"`
 	Mounter            string `json:"mounter"`
 	DisableInUseChecks bool   `json:"disableInUseChecks"`
 }
 type rbdSnapshot struct {
@ -226,7 +227,7 @@ func execCommand(command string, args []string) ([]byte, error) {
 	return cmd.CombinedOutput()
 }
-func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) {
+func getRBDVolumeOptions(volOptions map[string]string, disableInUseChecks bool) (*rbdVolume, error) {
 	var ok bool
 	rbdVol := &rbdVolume{}
 	rbdVol.Pool, ok = volOptions["pool"]
@ -259,6 +260,10 @@ func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) {
 		}
 	}
 	klog.V(3).Infof("setting disableInUseChecks on rbd volume to: %v", disableInUseChecks)
 	rbdVol.DisableInUseChecks = disableInUseChecks
 	getCredsFromVol(rbdVol, volOptions)
 	return rbdVol, nil
 }