Rework multi-node-multi-writer feature

This commit reverts the initial implementation of the
multi-node-multi-writer feature:
  commit: b5b8e46460

It replaces that implementation with a more restrictive version that
only allows multi-node-multi-writer for volumes of type `block`

With this change there are no volume parameters required in the stoarge
class, we also fail any attempt to create a file based device with
multi-node-multi-write being specified, this way a user doesn't have to
wait until they try and do the publish before realizing it doesn't work.
This commit is contained in:
j-griffith 2019-03-13 18:18:04 -06:00
parent a164169fd3
commit 6ec1196f47
7 changed files with 152 additions and 5 deletions

View File

@ -31,7 +31,7 @@ go-test:
./scripts/test-go.sh ./scripts/test-go.sh
static-check: static-check:
./scripts/lint-go.sh ./scripts/lint-go.sh
./scripts/lint-text.sh ./scripts/lint-text.sh
rbdplugin: rbdplugin:

View File

@ -114,3 +114,102 @@ To restore the snapshot to a new PVC, deploy
kubectl create -f pvc-restore.yaml kubectl create -f pvc-restore.yaml
kubectl create -f pod-restore.yaml kubectl create -f pod-restore.yaml
``` ```
## How to test RBD MULTI_NODE_MULTI_WRITER BLOCK feature
Requires feature-gates: `BlockVolume=true` `CSIBlockVolume=true`
*NOTE* The MULTI_NODE_MULTI_WRITER capability is only available for
Volumes that are of access_type `block`
*WARNING* This feature is strictly for workloads that know how to deal
with concurrent access to the Volume (eg Active/Passive applications).
Using RWX modes on non clustered file systems with applications trying
to simultaneously access the Volume will likely result in data corruption!
Following are examples for issuing a request for a `Block`
`ReadWriteMany` Claim, and using the resultant Claim for a POD
```yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: block-pvc
spec:
accessModes:
- ReadWriteMany
volumeMode: Block
resources:
requests:
storage: 1Gi
storageClassName: csi-rbd
```
Create a POD that uses this PVC:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: my-pod
spec:
containers:
- name: my-container
image: debian
command: ["/bin/bash", "-c"]
args: [ "tail -f /dev/null" ]
volumeDevices:
- devicePath: /dev/rbdblock
name: my-volume
imagePullPolicy: IfNotPresent
volumes:
- name: my-volume
persistentVolumeClaim:
claimName: block-pvc
```
Now, we can create a second POD (ensure the POD is scheduled on a different
node; multiwriter single node works without this feature) that also uses this
PVC at the same time, again wait for the pod to enter running state, and verify
the block device is available.
```yaml
apiVersion: v1
kind: Pod
metadata:
name: another-pod
spec:
containers:
- name: my-container
image: debian
command: ["/bin/bash", "-c"]
args: [ "tail -f /dev/null" ]
volumeDevices:
- devicePath: /dev/rbdblock
name: my-volume
imagePullPolicy: IfNotPresent
volumes:
- name: my-volume
persistentVolumeClaim:
claimName: block-pvc
```
Wait for the PODs to enter Running state, check that our block device
is available in the container at `/dev/rdbblock` in both containers:
```bash
$ kubectl exec -it my-pod -- fdisk -l /dev/rbdblock
Disk /dev/rbdblock: 1 GiB, 1073741824 bytes, 2097152 sectors
Units: sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 4194304 bytes / 4194304 bytes
```
```bash
$ kubectl exec -it another-pod -- fdisk -l /dev/rbdblock
Disk /dev/rbdblock: 1 GiB, 1073741824 bytes, 2097152 sectors
Units: sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 4194304 bytes / 4194304 bytes
```

View File

@ -93,7 +93,25 @@ func (cs *ControllerServer) validateVolumeReq(req *csi.CreateVolumeRequest) erro
func parseVolCreateRequest(req *csi.CreateVolumeRequest) (*rbdVolume, error) { func parseVolCreateRequest(req *csi.CreateVolumeRequest) (*rbdVolume, error) {
// TODO (sbezverk) Last check for not exceeding total storage capacity // TODO (sbezverk) Last check for not exceeding total storage capacity
rbdVol, err := getRBDVolumeOptions(req.GetParameters()) isMultiNode := false
isBlock := false
for _, cap := range req.VolumeCapabilities {
// Only checking SINGLE_NODE_SINGLE_WRITER here because regardless of the other types (MULTI READER) we need to implement the same logic to ignore the in-use response
if cap.GetAccessMode().GetMode() != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
isMultiNode = true
}
if cap.GetBlock() != nil {
isBlock = true
}
}
// We want to fail early if the user is trying to create a RWX on a non-block type device
if isMultiNode && !isBlock {
return nil, status.Error(codes.InvalidArgument, "multi node access modes are only supported on rbd `block` type volumes")
}
// if it's NOT SINGLE_NODE_WRITER and it's BLOCK we'll set the parameter to ignore the in-use checks
rbdVol, err := getRBDVolumeOptions(req.GetParameters(), (isMultiNode && isBlock))
if err != nil { if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error()) return nil, status.Error(codes.InvalidArgument, err.Error())
} }

View File

@ -48,6 +48,7 @@ type NodeServer struct {
func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) { func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
targetPath := req.GetTargetPath() targetPath := req.GetTargetPath()
targetPathMutex.LockKey(targetPath) targetPathMutex.LockKey(targetPath)
disableInUseChecks := false
defer func() { defer func() {
if err := targetPathMutex.UnlockKey(targetPath); err != nil { if err := targetPathMutex.UnlockKey(targetPath); err != nil {
@ -70,7 +71,19 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
if !notMnt { if !notMnt {
return &csi.NodePublishVolumeResponse{}, nil return &csi.NodePublishVolumeResponse{}, nil
} }
volOptions, err := getRBDVolumeOptions(req.GetVolumeContext())
// MULTI_NODE_MULTI_WRITER is supported by default for Block access type volumes
if req.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER {
if isBlock {
disableInUseChecks = true
} else {
klog.Warningf("MULTI_NODE_MULTI_WRITER currently only supported with volumes of access type `block`, invalid AccessMode for volume: %v", req.GetVolumeId())
e := fmt.Errorf("rbd: MULTI_NODE_MULTI_WRITER access mode only allowed with BLOCK access type")
return nil, status.Error(codes.InvalidArgument, e.Error())
}
}
volOptions, err := getRBDVolumeOptions(req.GetVolumeContext(), disableInUseChecks)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -102,7 +102,14 @@ func (r *Driver) Run(driverName, nodeID, endpoint string, containerized bool, ca
csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS, csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS,
csi.ControllerServiceCapability_RPC_CLONE_VOLUME, csi.ControllerServiceCapability_RPC_CLONE_VOLUME,
}) })
r.cd.AddVolumeCapabilityAccessModes([]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER})
// We only support the multi-writer option when using block, but it's a supported capability for the plugin in general
// In addition, we want to add the remaining modes like MULTI_NODE_READER_ONLY,
// MULTI_NODE_SINGLE_WRITER etc, but need to do some verification of RO modes first
// will work those as follow up features
r.cd.AddVolumeCapabilityAccessModes(
[]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER})
// Create GRPC servers // Create GRPC servers
r.ids = NewIdentityServer(r.cd) r.ids = NewIdentityServer(r.cd)

View File

@ -258,6 +258,7 @@ func attachRBDImage(volOptions *rbdVolume, userID string, credentials map[string
Factor: rbdImageWatcherFactor, Factor: rbdImageWatcherFactor,
Steps: rbdImageWatcherSteps, Steps: rbdImageWatcherSteps,
} }
err = waitForrbdImage(backoff, volOptions, userID, credentials) err = waitForrbdImage(backoff, volOptions, userID, credentials)
if err != nil { if err != nil {
@ -313,6 +314,10 @@ func waitForrbdImage(backoff wait.Backoff, volOptions *rbdVolume, userID string,
if err != nil { if err != nil {
return false, fmt.Errorf("fail to check rbd image status with: (%v), rbd output: (%s)", err, rbdOutput) return false, fmt.Errorf("fail to check rbd image status with: (%v), rbd output: (%s)", err, rbdOutput)
} }
if (volOptions.DisableInUseChecks) && (used) {
klog.V(2).Info("valid multi-node attach requested, ignoring watcher in-use result")
return used, nil
}
return !used, nil return !used, nil
}) })
// return error if rbd image has not become available for the specified timeout // return error if rbd image has not become available for the specified timeout

View File

@ -51,6 +51,7 @@ type rbdVolume struct {
AdminID string `json:"adminId"` AdminID string `json:"adminId"`
UserID string `json:"userId"` UserID string `json:"userId"`
Mounter string `json:"mounter"` Mounter string `json:"mounter"`
DisableInUseChecks bool `json:"disableInUseChecks"`
} }
type rbdSnapshot struct { type rbdSnapshot struct {
@ -226,7 +227,7 @@ func execCommand(command string, args []string) ([]byte, error) {
return cmd.CombinedOutput() return cmd.CombinedOutput()
} }
func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) { func getRBDVolumeOptions(volOptions map[string]string, disableInUseChecks bool) (*rbdVolume, error) {
var ok bool var ok bool
rbdVol := &rbdVolume{} rbdVol := &rbdVolume{}
rbdVol.Pool, ok = volOptions["pool"] rbdVol.Pool, ok = volOptions["pool"]
@ -259,6 +260,10 @@ func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) {
} }
} }
klog.V(3).Infof("setting disableInUseChecks on rbd volume to: %v", disableInUseChecks)
rbdVol.DisableInUseChecks = disableInUseChecks
getCredsFromVol(rbdVol, volOptions) getCredsFromVol(rbdVol, volOptions)
return rbdVol, nil return rbdVol, nil
} }