mirror of
https://github.com/ceph/ceph-csi.git
synced 2024-12-18 11:00:25 +00:00
Rework multi-node-multi-writer feature
This commit reverts the initial implementation of the
multi-node-multi-writer feature:
commit: b5b8e46460
It replaces that implementation with a more restrictive version that
only allows multi-node-multi-writer for volumes of type `block`
With this change there are no volume parameters required in the stoarge
class, we also fail any attempt to create a file based device with
multi-node-multi-write being specified, this way a user doesn't have to
wait until they try and do the publish before realizing it doesn't work.
This commit is contained in:
parent
a164169fd3
commit
6ec1196f47
@ -114,3 +114,102 @@ To restore the snapshot to a new PVC, deploy
|
|||||||
kubectl create -f pvc-restore.yaml
|
kubectl create -f pvc-restore.yaml
|
||||||
kubectl create -f pod-restore.yaml
|
kubectl create -f pod-restore.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## How to test RBD MULTI_NODE_MULTI_WRITER BLOCK feature
|
||||||
|
|
||||||
|
Requires feature-gates: `BlockVolume=true` `CSIBlockVolume=true`
|
||||||
|
|
||||||
|
*NOTE* The MULTI_NODE_MULTI_WRITER capability is only available for
|
||||||
|
Volumes that are of access_type `block`
|
||||||
|
|
||||||
|
*WARNING* This feature is strictly for workloads that know how to deal
|
||||||
|
with concurrent access to the Volume (eg Active/Passive applications).
|
||||||
|
Using RWX modes on non clustered file systems with applications trying
|
||||||
|
to simultaneously access the Volume will likely result in data corruption!
|
||||||
|
|
||||||
|
Following are examples for issuing a request for a `Block`
|
||||||
|
`ReadWriteMany` Claim, and using the resultant Claim for a POD
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: block-pvc
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
volumeMode: Block
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 1Gi
|
||||||
|
storageClassName: csi-rbd
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a POD that uses this PVC:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: my-pod
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: my-container
|
||||||
|
image: debian
|
||||||
|
command: ["/bin/bash", "-c"]
|
||||||
|
args: [ "tail -f /dev/null" ]
|
||||||
|
volumeDevices:
|
||||||
|
- devicePath: /dev/rbdblock
|
||||||
|
name: my-volume
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
volumes:
|
||||||
|
- name: my-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: block-pvc
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, we can create a second POD (ensure the POD is scheduled on a different
|
||||||
|
node; multiwriter single node works without this feature) that also uses this
|
||||||
|
PVC at the same time, again wait for the pod to enter running state, and verify
|
||||||
|
the block device is available.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: another-pod
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: my-container
|
||||||
|
image: debian
|
||||||
|
command: ["/bin/bash", "-c"]
|
||||||
|
args: [ "tail -f /dev/null" ]
|
||||||
|
volumeDevices:
|
||||||
|
- devicePath: /dev/rbdblock
|
||||||
|
name: my-volume
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
volumes:
|
||||||
|
- name: my-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: block-pvc
|
||||||
|
```
|
||||||
|
|
||||||
|
Wait for the PODs to enter Running state, check that our block device
|
||||||
|
is available in the container at `/dev/rdbblock` in both containers:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ kubectl exec -it my-pod -- fdisk -l /dev/rbdblock
|
||||||
|
Disk /dev/rbdblock: 1 GiB, 1073741824 bytes, 2097152 sectors
|
||||||
|
Units: sectors of 1 * 512 = 512 bytes
|
||||||
|
Sector size (logical/physical): 512 bytes / 512 bytes
|
||||||
|
I/O size (minimum/optimal): 4194304 bytes / 4194304 bytes
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ kubectl exec -it another-pod -- fdisk -l /dev/rbdblock
|
||||||
|
Disk /dev/rbdblock: 1 GiB, 1073741824 bytes, 2097152 sectors
|
||||||
|
Units: sectors of 1 * 512 = 512 bytes
|
||||||
|
Sector size (logical/physical): 512 bytes / 512 bytes
|
||||||
|
I/O size (minimum/optimal): 4194304 bytes / 4194304 bytes
|
||||||
|
```
|
||||||
|
@ -93,7 +93,25 @@ func (cs *ControllerServer) validateVolumeReq(req *csi.CreateVolumeRequest) erro
|
|||||||
func parseVolCreateRequest(req *csi.CreateVolumeRequest) (*rbdVolume, error) {
|
func parseVolCreateRequest(req *csi.CreateVolumeRequest) (*rbdVolume, error) {
|
||||||
// TODO (sbezverk) Last check for not exceeding total storage capacity
|
// TODO (sbezverk) Last check for not exceeding total storage capacity
|
||||||
|
|
||||||
rbdVol, err := getRBDVolumeOptions(req.GetParameters())
|
isMultiNode := false
|
||||||
|
isBlock := false
|
||||||
|
for _, cap := range req.VolumeCapabilities {
|
||||||
|
// Only checking SINGLE_NODE_SINGLE_WRITER here because regardless of the other types (MULTI READER) we need to implement the same logic to ignore the in-use response
|
||||||
|
if cap.GetAccessMode().GetMode() != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
|
||||||
|
isMultiNode = true
|
||||||
|
}
|
||||||
|
if cap.GetBlock() != nil {
|
||||||
|
isBlock = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We want to fail early if the user is trying to create a RWX on a non-block type device
|
||||||
|
if isMultiNode && !isBlock {
|
||||||
|
return nil, status.Error(codes.InvalidArgument, "multi node access modes are only supported on rbd `block` type volumes")
|
||||||
|
}
|
||||||
|
|
||||||
|
// if it's NOT SINGLE_NODE_WRITER and it's BLOCK we'll set the parameter to ignore the in-use checks
|
||||||
|
rbdVol, err := getRBDVolumeOptions(req.GetParameters(), (isMultiNode && isBlock))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, status.Error(codes.InvalidArgument, err.Error())
|
return nil, status.Error(codes.InvalidArgument, err.Error())
|
||||||
}
|
}
|
||||||
|
@ -48,6 +48,7 @@ type NodeServer struct {
|
|||||||
func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
|
func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
|
||||||
targetPath := req.GetTargetPath()
|
targetPath := req.GetTargetPath()
|
||||||
targetPathMutex.LockKey(targetPath)
|
targetPathMutex.LockKey(targetPath)
|
||||||
|
disableInUseChecks := false
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
if err := targetPathMutex.UnlockKey(targetPath); err != nil {
|
if err := targetPathMutex.UnlockKey(targetPath); err != nil {
|
||||||
@ -70,7 +71,19 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
|
|||||||
if !notMnt {
|
if !notMnt {
|
||||||
return &csi.NodePublishVolumeResponse{}, nil
|
return &csi.NodePublishVolumeResponse{}, nil
|
||||||
}
|
}
|
||||||
volOptions, err := getRBDVolumeOptions(req.GetVolumeContext())
|
|
||||||
|
// MULTI_NODE_MULTI_WRITER is supported by default for Block access type volumes
|
||||||
|
if req.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER {
|
||||||
|
if isBlock {
|
||||||
|
disableInUseChecks = true
|
||||||
|
} else {
|
||||||
|
klog.Warningf("MULTI_NODE_MULTI_WRITER currently only supported with volumes of access type `block`, invalid AccessMode for volume: %v", req.GetVolumeId())
|
||||||
|
e := fmt.Errorf("rbd: MULTI_NODE_MULTI_WRITER access mode only allowed with BLOCK access type")
|
||||||
|
return nil, status.Error(codes.InvalidArgument, e.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
volOptions, err := getRBDVolumeOptions(req.GetVolumeContext(), disableInUseChecks)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -102,7 +102,14 @@ func (r *Driver) Run(driverName, nodeID, endpoint string, containerized bool, ca
|
|||||||
csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS,
|
csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS,
|
||||||
csi.ControllerServiceCapability_RPC_CLONE_VOLUME,
|
csi.ControllerServiceCapability_RPC_CLONE_VOLUME,
|
||||||
})
|
})
|
||||||
r.cd.AddVolumeCapabilityAccessModes([]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER})
|
|
||||||
|
// We only support the multi-writer option when using block, but it's a supported capability for the plugin in general
|
||||||
|
// In addition, we want to add the remaining modes like MULTI_NODE_READER_ONLY,
|
||||||
|
// MULTI_NODE_SINGLE_WRITER etc, but need to do some verification of RO modes first
|
||||||
|
// will work those as follow up features
|
||||||
|
r.cd.AddVolumeCapabilityAccessModes(
|
||||||
|
[]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
|
||||||
|
csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER})
|
||||||
|
|
||||||
// Create GRPC servers
|
// Create GRPC servers
|
||||||
r.ids = NewIdentityServer(r.cd)
|
r.ids = NewIdentityServer(r.cd)
|
||||||
|
@ -258,6 +258,7 @@ func attachRBDImage(volOptions *rbdVolume, userID string, credentials map[string
|
|||||||
Factor: rbdImageWatcherFactor,
|
Factor: rbdImageWatcherFactor,
|
||||||
Steps: rbdImageWatcherSteps,
|
Steps: rbdImageWatcherSteps,
|
||||||
}
|
}
|
||||||
|
|
||||||
err = waitForrbdImage(backoff, volOptions, userID, credentials)
|
err = waitForrbdImage(backoff, volOptions, userID, credentials)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -313,6 +314,10 @@ func waitForrbdImage(backoff wait.Backoff, volOptions *rbdVolume, userID string,
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("fail to check rbd image status with: (%v), rbd output: (%s)", err, rbdOutput)
|
return false, fmt.Errorf("fail to check rbd image status with: (%v), rbd output: (%s)", err, rbdOutput)
|
||||||
}
|
}
|
||||||
|
if (volOptions.DisableInUseChecks) && (used) {
|
||||||
|
klog.V(2).Info("valid multi-node attach requested, ignoring watcher in-use result")
|
||||||
|
return used, nil
|
||||||
|
}
|
||||||
return !used, nil
|
return !used, nil
|
||||||
})
|
})
|
||||||
// return error if rbd image has not become available for the specified timeout
|
// return error if rbd image has not become available for the specified timeout
|
||||||
|
@ -51,6 +51,7 @@ type rbdVolume struct {
|
|||||||
AdminID string `json:"adminId"`
|
AdminID string `json:"adminId"`
|
||||||
UserID string `json:"userId"`
|
UserID string `json:"userId"`
|
||||||
Mounter string `json:"mounter"`
|
Mounter string `json:"mounter"`
|
||||||
|
DisableInUseChecks bool `json:"disableInUseChecks"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type rbdSnapshot struct {
|
type rbdSnapshot struct {
|
||||||
@ -226,7 +227,7 @@ func execCommand(command string, args []string) ([]byte, error) {
|
|||||||
return cmd.CombinedOutput()
|
return cmd.CombinedOutput()
|
||||||
}
|
}
|
||||||
|
|
||||||
func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) {
|
func getRBDVolumeOptions(volOptions map[string]string, disableInUseChecks bool) (*rbdVolume, error) {
|
||||||
var ok bool
|
var ok bool
|
||||||
rbdVol := &rbdVolume{}
|
rbdVol := &rbdVolume{}
|
||||||
rbdVol.Pool, ok = volOptions["pool"]
|
rbdVol.Pool, ok = volOptions["pool"]
|
||||||
@ -259,6 +260,10 @@ func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
klog.V(3).Infof("setting disableInUseChecks on rbd volume to: %v", disableInUseChecks)
|
||||||
|
rbdVol.DisableInUseChecks = disableInUseChecks
|
||||||
|
|
||||||
getCredsFromVol(rbdVol, volOptions)
|
getCredsFromVol(rbdVol, volOptions)
|
||||||
return rbdVol, nil
|
return rbdVol, nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user