mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-01-17 10:19:30 +00:00
Rework multi-node-multi-writer feature
This commit reverts the initial implementation of the
multi-node-multi-writer feature:
commit: b5b8e46460
It replaces that implementation with a more restrictive version that
only allows multi-node-multi-writer for volumes of type `block`
With this change there are no volume parameters required in the stoarge
class, we also fail any attempt to create a file based device with
multi-node-multi-write being specified, this way a user doesn't have to
wait until they try and do the publish before realizing it doesn't work.
This commit is contained in:
parent
a164169fd3
commit
6ec1196f47
2
Makefile
2
Makefile
@ -31,7 +31,7 @@ go-test:
|
||||
./scripts/test-go.sh
|
||||
|
||||
static-check:
|
||||
./scripts/lint-go.sh
|
||||
./scripts/lint-go.sh
|
||||
./scripts/lint-text.sh
|
||||
|
||||
rbdplugin:
|
||||
|
@ -114,3 +114,102 @@ To restore the snapshot to a new PVC, deploy
|
||||
kubectl create -f pvc-restore.yaml
|
||||
kubectl create -f pod-restore.yaml
|
||||
```
|
||||
|
||||
## How to test RBD MULTI_NODE_MULTI_WRITER BLOCK feature
|
||||
|
||||
Requires feature-gates: `BlockVolume=true` `CSIBlockVolume=true`
|
||||
|
||||
*NOTE* The MULTI_NODE_MULTI_WRITER capability is only available for
|
||||
Volumes that are of access_type `block`
|
||||
|
||||
*WARNING* This feature is strictly for workloads that know how to deal
|
||||
with concurrent access to the Volume (eg Active/Passive applications).
|
||||
Using RWX modes on non clustered file systems with applications trying
|
||||
to simultaneously access the Volume will likely result in data corruption!
|
||||
|
||||
Following are examples for issuing a request for a `Block`
|
||||
`ReadWriteMany` Claim, and using the resultant Claim for a POD
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: block-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
volumeMode: Block
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
storageClassName: csi-rbd
|
||||
```
|
||||
|
||||
Create a POD that uses this PVC:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: my-pod
|
||||
spec:
|
||||
containers:
|
||||
- name: my-container
|
||||
image: debian
|
||||
command: ["/bin/bash", "-c"]
|
||||
args: [ "tail -f /dev/null" ]
|
||||
volumeDevices:
|
||||
- devicePath: /dev/rbdblock
|
||||
name: my-volume
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumes:
|
||||
- name: my-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: block-pvc
|
||||
|
||||
```
|
||||
|
||||
Now, we can create a second POD (ensure the POD is scheduled on a different
|
||||
node; multiwriter single node works without this feature) that also uses this
|
||||
PVC at the same time, again wait for the pod to enter running state, and verify
|
||||
the block device is available.
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: another-pod
|
||||
spec:
|
||||
containers:
|
||||
- name: my-container
|
||||
image: debian
|
||||
command: ["/bin/bash", "-c"]
|
||||
args: [ "tail -f /dev/null" ]
|
||||
volumeDevices:
|
||||
- devicePath: /dev/rbdblock
|
||||
name: my-volume
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumes:
|
||||
- name: my-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: block-pvc
|
||||
```
|
||||
|
||||
Wait for the PODs to enter Running state, check that our block device
|
||||
is available in the container at `/dev/rdbblock` in both containers:
|
||||
|
||||
```bash
|
||||
$ kubectl exec -it my-pod -- fdisk -l /dev/rbdblock
|
||||
Disk /dev/rbdblock: 1 GiB, 1073741824 bytes, 2097152 sectors
|
||||
Units: sectors of 1 * 512 = 512 bytes
|
||||
Sector size (logical/physical): 512 bytes / 512 bytes
|
||||
I/O size (minimum/optimal): 4194304 bytes / 4194304 bytes
|
||||
```
|
||||
|
||||
```bash
|
||||
$ kubectl exec -it another-pod -- fdisk -l /dev/rbdblock
|
||||
Disk /dev/rbdblock: 1 GiB, 1073741824 bytes, 2097152 sectors
|
||||
Units: sectors of 1 * 512 = 512 bytes
|
||||
Sector size (logical/physical): 512 bytes / 512 bytes
|
||||
I/O size (minimum/optimal): 4194304 bytes / 4194304 bytes
|
||||
```
|
||||
|
@ -93,7 +93,25 @@ func (cs *ControllerServer) validateVolumeReq(req *csi.CreateVolumeRequest) erro
|
||||
func parseVolCreateRequest(req *csi.CreateVolumeRequest) (*rbdVolume, error) {
|
||||
// TODO (sbezverk) Last check for not exceeding total storage capacity
|
||||
|
||||
rbdVol, err := getRBDVolumeOptions(req.GetParameters())
|
||||
isMultiNode := false
|
||||
isBlock := false
|
||||
for _, cap := range req.VolumeCapabilities {
|
||||
// Only checking SINGLE_NODE_SINGLE_WRITER here because regardless of the other types (MULTI READER) we need to implement the same logic to ignore the in-use response
|
||||
if cap.GetAccessMode().GetMode() != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
|
||||
isMultiNode = true
|
||||
}
|
||||
if cap.GetBlock() != nil {
|
||||
isBlock = true
|
||||
}
|
||||
}
|
||||
|
||||
// We want to fail early if the user is trying to create a RWX on a non-block type device
|
||||
if isMultiNode && !isBlock {
|
||||
return nil, status.Error(codes.InvalidArgument, "multi node access modes are only supported on rbd `block` type volumes")
|
||||
}
|
||||
|
||||
// if it's NOT SINGLE_NODE_WRITER and it's BLOCK we'll set the parameter to ignore the in-use checks
|
||||
rbdVol, err := getRBDVolumeOptions(req.GetParameters(), (isMultiNode && isBlock))
|
||||
if err != nil {
|
||||
return nil, status.Error(codes.InvalidArgument, err.Error())
|
||||
}
|
||||
|
@ -48,6 +48,7 @@ type NodeServer struct {
|
||||
func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
|
||||
targetPath := req.GetTargetPath()
|
||||
targetPathMutex.LockKey(targetPath)
|
||||
disableInUseChecks := false
|
||||
|
||||
defer func() {
|
||||
if err := targetPathMutex.UnlockKey(targetPath); err != nil {
|
||||
@ -70,7 +71,19 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
|
||||
if !notMnt {
|
||||
return &csi.NodePublishVolumeResponse{}, nil
|
||||
}
|
||||
volOptions, err := getRBDVolumeOptions(req.GetVolumeContext())
|
||||
|
||||
// MULTI_NODE_MULTI_WRITER is supported by default for Block access type volumes
|
||||
if req.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER {
|
||||
if isBlock {
|
||||
disableInUseChecks = true
|
||||
} else {
|
||||
klog.Warningf("MULTI_NODE_MULTI_WRITER currently only supported with volumes of access type `block`, invalid AccessMode for volume: %v", req.GetVolumeId())
|
||||
e := fmt.Errorf("rbd: MULTI_NODE_MULTI_WRITER access mode only allowed with BLOCK access type")
|
||||
return nil, status.Error(codes.InvalidArgument, e.Error())
|
||||
}
|
||||
}
|
||||
|
||||
volOptions, err := getRBDVolumeOptions(req.GetVolumeContext(), disableInUseChecks)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -102,7 +102,14 @@ func (r *Driver) Run(driverName, nodeID, endpoint string, containerized bool, ca
|
||||
csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS,
|
||||
csi.ControllerServiceCapability_RPC_CLONE_VOLUME,
|
||||
})
|
||||
r.cd.AddVolumeCapabilityAccessModes([]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER})
|
||||
|
||||
// We only support the multi-writer option when using block, but it's a supported capability for the plugin in general
|
||||
// In addition, we want to add the remaining modes like MULTI_NODE_READER_ONLY,
|
||||
// MULTI_NODE_SINGLE_WRITER etc, but need to do some verification of RO modes first
|
||||
// will work those as follow up features
|
||||
r.cd.AddVolumeCapabilityAccessModes(
|
||||
[]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
|
||||
csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER})
|
||||
|
||||
// Create GRPC servers
|
||||
r.ids = NewIdentityServer(r.cd)
|
||||
|
@ -258,6 +258,7 @@ func attachRBDImage(volOptions *rbdVolume, userID string, credentials map[string
|
||||
Factor: rbdImageWatcherFactor,
|
||||
Steps: rbdImageWatcherSteps,
|
||||
}
|
||||
|
||||
err = waitForrbdImage(backoff, volOptions, userID, credentials)
|
||||
|
||||
if err != nil {
|
||||
@ -313,6 +314,10 @@ func waitForrbdImage(backoff wait.Backoff, volOptions *rbdVolume, userID string,
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("fail to check rbd image status with: (%v), rbd output: (%s)", err, rbdOutput)
|
||||
}
|
||||
if (volOptions.DisableInUseChecks) && (used) {
|
||||
klog.V(2).Info("valid multi-node attach requested, ignoring watcher in-use result")
|
||||
return used, nil
|
||||
}
|
||||
return !used, nil
|
||||
})
|
||||
// return error if rbd image has not become available for the specified timeout
|
||||
|
@ -51,6 +51,7 @@ type rbdVolume struct {
|
||||
AdminID string `json:"adminId"`
|
||||
UserID string `json:"userId"`
|
||||
Mounter string `json:"mounter"`
|
||||
DisableInUseChecks bool `json:"disableInUseChecks"`
|
||||
}
|
||||
|
||||
type rbdSnapshot struct {
|
||||
@ -226,7 +227,7 @@ func execCommand(command string, args []string) ([]byte, error) {
|
||||
return cmd.CombinedOutput()
|
||||
}
|
||||
|
||||
func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) {
|
||||
func getRBDVolumeOptions(volOptions map[string]string, disableInUseChecks bool) (*rbdVolume, error) {
|
||||
var ok bool
|
||||
rbdVol := &rbdVolume{}
|
||||
rbdVol.Pool, ok = volOptions["pool"]
|
||||
@ -259,6 +260,10 @@ func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
klog.V(3).Infof("setting disableInUseChecks on rbd volume to: %v", disableInUseChecks)
|
||||
rbdVol.DisableInUseChecks = disableInUseChecks
|
||||
|
||||
getCredsFromVol(rbdVol, volOptions)
|
||||
return rbdVol, nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user