Revert "Add multiNodeWritable option for RBD Volumes"

This reverts commit b5b8e46460.
This commit is contained in:
j-griffith 2019-03-13 12:19:14 -06:00
parent 2a25666109
commit a164169fd3
9 changed files with 8 additions and 238 deletions

View File

@ -58,21 +58,6 @@ Parameter | Required | Description
`csi.storage.k8s.io/provisioner-secret-name`, `csi.storage.k8s.io/node-publish-secret-name` | for Kubernetes | name of the Kubernetes Secret object containing Ceph client credentials. Both parameters should have the same value
`csi.storage.k8s.io/provisioner-secret-namespace`, `csi.storage.k8s.io/node-publish-secret-namespace` | for Kubernetes | namespaces of the above Secret objects
`mounter`| no | if set to `rbd-nbd`, use `rbd-nbd` on nodes that have `rbd-nbd` and `nbd` kernel modules to map rbd images
`fsType` | no | allows setting to `ext3 | ext-4 | xfs`, default is `ext-4`
`multiNodeWritable` | no | if set to `enabled` allows RBD volumes with MultiNode Access Modes to bypass watcher checks. By default multiple attachments of an RBD volume are NOT allowed. Even if this option is set in the StorageClass, it's ignored if a standard SingleNodeWriter Access Mode is requested
**Warning for multiNodeWritable:**
*NOTE* the `multiNodeWritable` setting is NOT safe for use by workloads
that are not designed to coordinate access. This does NOT add any sort
of a clustered filesystem or write syncronization, it's specifically for
special workloads that handle access coordination on their own
(ie Active/Passive scenarios).
Using this mode for general purposes *WILL RESULT IN DATA CORRUPTION*.
We attempt to limit exposure to trouble here but ignoring the Storage Class
setting unless your Volume explicitly asks for multi node access, and assume
you know what you're doing.
**Required secrets:**

View File

@ -114,167 +114,3 @@ To restore the snapshot to a new PVC, deploy
kubectl create -f pvc-restore.yaml
kubectl create -f pod-restore.yaml
```
## How to enable multi node attach support for RBD
*WARNING* This feature is strictly for workloads that know how to deal
with concurrent acces to the Volume (eg Active/Passive applications).
Using RWX modes on non clustered file systems with applications trying
to simultaneously access the Volume will likely result in data corruption!
### Example process to test the multiNodeWritable feature
Modify your current storage class, or create a new storage class specifically
for multi node writers by adding the `multiNodeWritable: "enabled"` entry to
your parameters. Here's an example:
```yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: csi-rbd
provisioner: csi-rbdplugin
parameters:
monitors: rook-ceph-mon-b.rook-ceph.svc.cluster.local:6789
pool: rbd
imageFormat: "2"
imageFeatures: layering
csiProvisionerSecretName: csi-rbd-secret
csiProvisionerSecretNamespace: default
csiNodePublishSecretName: csi-rbd-secret
csiNodePublishSecretNamespace: default
adminid: admin
userid: admin
fsType: xfs
multiNodeWritable: "enabled"
reclaimPolicy: Delete
```
Now, you can request Claims from the configured storage class that include
the `ReadWriteMany` access mode:
```yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: pvc-1
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Gi
storageClassName: csi-rbd
```
Create a POD that uses this PVC:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: test-1
spec:
containers:
- name: web-server
image: nginx
volumeMounts:
- name: mypvc
mountPath: /var/lib/www/html
volumes:
- name: mypvc
persistentVolumeClaim:
claimName: pvc-1
readOnly: false
```
Wait for the POD to enter Running state, write some data to
`/var/lib/www/html`
Now, we can create a second POD (ensure the POD is scheduled on a different
node; multiwriter single node works without this feature) that also uses this
PVC at the same time
```yaml
apiVersion: v1
kind: Pod
metadata:
name: test-2
spec:
containers:
- name: web-server
image: nginx
volumeMounts:
- name: mypvc
mountPath: /var/lib/www/html
volumes:
- name: mypvc
persistentVolumeClaim:
claimName: pvc-1
readOnly: false
```
If you access the pod you can check that your data is avaialable at
`/var/lib/www/html`
## Testing Raw Block feature in kubernetes with RBD volumes
CSI block volume support is feature-gated and turned off by default. To run CSI
with block volume support enabled, a cluster administrator must enable the
feature for each Kubernetes component using the following feature gate flags:
--feature-gates=BlockVolume=true,CSIBlockVolume=true
these feature-gates must be enabled on both api-server and kubelet
### create a raw-block PVC
```yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: raw-block-pvc
spec:
accessModes:
- ReadWriteOnce
volumeMode: Block
resources:
requests:
storage: 1Gi
storageClassName: csi-rbd
```
create raw block pvc
```console
kubectl create -f raw-block-pvc.yaml
```
### create a pod to mount raw-block PVC
```yaml
---
apiVersion: v1
kind: Pod
metadata:
name: pod-with-raw-block-volume
spec:
containers:
- name: fc-container
image: fedora:26
command: ["/bin/sh", "-c"]
args: [ "tail -f /dev/null" ]
volumeDevices:
- name: data
devicePath: /dev/xvda
volumes:
- name: data
persistentVolumeClaim:
claimName: raw-block-pvc
```
Create a POD that uses raw block PVC
```console
kubectl create -f raw-block-pod.yaml
```

View File

@ -35,7 +35,4 @@ parameters:
userid: kubernetes
# uncomment the following to use rbd-nbd as mounter on supported nodes
# mounter: rbd-nbd
# fsType: xfs
# uncomment the following line to enable multi-attach on RBD volumes
# multiNodeWritable: enabled
reclaimPolicy: Delete

View File

@ -21,7 +21,6 @@ import (
"os/exec"
"sort"
"strconv"
"strings"
"syscall"
csicommon "github.com/ceph/ceph-csi/pkg/csi-common"
@ -94,16 +93,7 @@ func (cs *ControllerServer) validateVolumeReq(req *csi.CreateVolumeRequest) erro
func parseVolCreateRequest(req *csi.CreateVolumeRequest) (*rbdVolume, error) {
// TODO (sbezverk) Last check for not exceeding total storage capacity
// MultiNodeWriters are accepted but they're only for special cases, and we skip the watcher checks for them which isn't the greatest
// let's make sure we ONLY skip that if the user is requesting a MULTI Node accessible mode
disableMultiWriter := true
for _, am := range req.VolumeCapabilities {
if am.GetAccessMode().GetMode() != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
disableMultiWriter = false
}
}
rbdVol, err := getRBDVolumeOptions(req.GetParameters(), disableMultiWriter)
rbdVol, err := getRBDVolumeOptions(req.GetParameters())
if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
@ -344,20 +334,11 @@ func (cs *ControllerServer) ListVolumes(ctx context.Context, req *csi.ListVolume
// ValidateVolumeCapabilities checks whether the volume capabilities requested
// are supported.
func (cs *ControllerServer) ValidateVolumeCapabilities(ctx context.Context, req *csi.ValidateVolumeCapabilitiesRequest) (*csi.ValidateVolumeCapabilitiesResponse, error) {
params := req.GetParameters()
multiWriter := params["multiNodeWritable"]
if strings.ToLower(multiWriter) == "enabled" {
klog.V(3).Info("detected multiNodeWritable parameter in Storage Class, allowing multi-node access modes")
} else {
for _, cap := range req.VolumeCapabilities {
if cap.GetAccessMode().GetMode() != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
return &csi.ValidateVolumeCapabilitiesResponse{Message: ""}, nil
}
}
}
return &csi.ValidateVolumeCapabilitiesResponse{
Confirmed: &csi.ValidateVolumeCapabilitiesResponse_Confirmed{
VolumeCapabilities: req.VolumeCapabilities,

View File

@ -70,19 +70,10 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
if !notMnt {
return &csi.NodePublishVolumeResponse{}, nil
}
// if our access mode is a simple SINGLE_NODE_WRITER we're going to ignore the SC directive and use the
// watcher still
ignoreMultiWriterEnabled := true
if req.VolumeCapability.AccessMode.Mode != csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER {
ignoreMultiWriterEnabled = false
}
volOptions, err := getRBDVolumeOptions(req.GetVolumeContext(), ignoreMultiWriterEnabled)
volOptions, err := getRBDVolumeOptions(req.GetVolumeContext())
if err != nil {
return nil, err
}
volOptions.VolName = volName
// Mapping RBD image
devicePath, err := attachRBDImage(volOptions, volOptions.UserID, req.GetSecrets())

View File

@ -102,12 +102,7 @@ func (r *Driver) Run(driverName, nodeID, endpoint string, containerized bool, ca
csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS,
csi.ControllerServiceCapability_RPC_CLONE_VOLUME,
})
// TODO: JDG Should also look at remaining modes like MULT_NODE_READER (SINGLE_READER)
r.cd.AddVolumeCapabilityAccessModes(
[]csi.VolumeCapability_AccessMode_Mode{
csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER})
r.cd.AddVolumeCapabilityAccessModes([]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER})
// Create GRPC servers
r.ids = NewIdentityServer(r.cd)

View File

@ -313,16 +313,8 @@ func waitForrbdImage(backoff wait.Backoff, volOptions *rbdVolume, userID string,
if err != nil {
return false, fmt.Errorf("fail to check rbd image status with: (%v), rbd output: (%s)", err, rbdOutput)
}
// In the case of multiattach we want to short circuit the retries when used (so r`if used; return used`)
// otherwise we're setting this to false which translates to !ok, which means backoff and try again
// NOTE: we ONLY do this if an multi-node access mode is requested for this volume
if (strings.ToLower(volOptions.MultiNodeWritable) == "enabled") && (used) {
klog.V(2).Info("detected MultiNodeWritable enabled, ignoring watcher in-use result")
return used, nil
}
return !used, nil
})
// return error if rbd image has not become available for the specified timeout
if err == wait.ErrWaitTimeout {
return fmt.Errorf("rbd image %s is still being used", imagePath)

View File

@ -51,7 +51,6 @@ type rbdVolume struct {
AdminID string `json:"adminId"`
UserID string `json:"userId"`
Mounter string `json:"mounter"`
MultiNodeWritable string `json:"multiNodeWritable"`
}
type rbdSnapshot struct {
@ -227,7 +226,7 @@ func execCommand(command string, args []string) ([]byte, error) {
return cmd.CombinedOutput()
}
func getRBDVolumeOptions(volOptions map[string]string, ignoreMultiNodeWritable bool) (*rbdVolume, error) {
func getRBDVolumeOptions(volOptions map[string]string) (*rbdVolume, error) {
var ok bool
rbdVol := &rbdVolume{}
rbdVol.Pool, ok = volOptions["pool"]
@ -261,12 +260,6 @@ func getRBDVolumeOptions(volOptions map[string]string, ignoreMultiNodeWritable b
}
getCredsFromVol(rbdVol, volOptions)
klog.V(3).Infof("ignoreMultiNodeWritable flag in parse getRBDVolumeOptions is: %v", ignoreMultiNodeWritable)
// If the volume we're working with is NOT requesting multi-node attach then don't treat it special, ignore the setting in the SC and just keep our watcher checks
if !ignoreMultiNodeWritable {
rbdVol.MultiNodeWritable = volOptions["multiNodeWritable"]
}
return rbdVol, nil
}