mirror of
https://github.com/ceph/ceph-csi.git
synced 2024-11-22 14:20:19 +00:00
d02e50aa9b
Existing config maps are now replaced with rados omaps that help store information regarding the requested volume names and the rbd image names backing the same. Further to detect cluster, pool and which image a volume ID refers to, changes to volume ID encoding has been done as per provided design specification in the stateless ceph-csi proposal. Additional changes and updates, - Updated documentation - Updated manifests - Updated Helm chart - Addressed a few csi-test failures Signed-off-by: ShyamsundarR <srangana@redhat.com>
237 lines
9.7 KiB
Go
237 lines
9.7 KiB
Go
/*
|
|
Copyright 2018 The Ceph-CSI Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package rbd
|
|
|
|
import (
|
|
csicommon "github.com/ceph/ceph-csi/pkg/csi-common"
|
|
"github.com/ceph/ceph-csi/pkg/util"
|
|
|
|
"github.com/container-storage-interface/spec/lib/go/csi"
|
|
"k8s.io/klog"
|
|
"k8s.io/kubernetes/pkg/util/mount"
|
|
"k8s.io/utils/exec"
|
|
"k8s.io/utils/nsenter"
|
|
)
|
|
|
|
/*
|
|
RADOS omaps usage:
|
|
|
|
This note details how we preserve idempotent nature of create requests and retain the relationship
|
|
between orchestrator (CO) generated Names and plugin generated names for images and snapshots
|
|
|
|
The implementation uses Ceph RADOS omaps to preserve the relationship between request name and
|
|
generated image (or snapshot) name. There are 4 types of omaps in use,
|
|
- A "csi.volumes.[csi-id]" (or "csi.volumes"+.+CSIInstanceID), we call this the csiVolsDirectory
|
|
- stores keys named using the CO generated names for volume requests
|
|
- keys are named "csi.volume."+[CO generated VolName]
|
|
- Key value contains the RBD image uuid that is created or will be created, for the CO provided
|
|
name
|
|
|
|
- A "csi.snaps.[csi-id]" (or "csi.snaps"+.+CSIInstanceID), we refer to this as the csiSnapsDirectory
|
|
- stores keys named using the CO generated names for snapshot requests
|
|
- keys are named "csi.snap."+[CO generated SnapName]
|
|
- Key value contains the RBD snapshot uuid that is created or will be created, for the CO
|
|
provided name
|
|
|
|
- A per image omap named "rbd.csi.volume."+[RBD image uuid], we refer to this as the rbdImageOMap
|
|
- stores a single key named "csi.volname", that has the value of the CO generated VolName that
|
|
this image refers to
|
|
|
|
- A per snapshot omap named "rbd.csi.snap."+[RBD snapshot uuid], we refer to this as the snapOMap
|
|
- stores a key named "csi.snapname", that has the value of the CO generated SnapName that this
|
|
snapshot refers to
|
|
- also stores another key named "csi.source", that has the value of the image name that is the
|
|
source of the snapshot
|
|
|
|
Creation of omaps:
|
|
When a volume create request is received (or a snapshot create, the snapshot is not detailed in this
|
|
comment further as the process is similar),
|
|
- The csiVolsDirectory is consulted to find if there is already a key with the CO VolName, and if present,
|
|
it is used to read its references to reach the RBD image that backs this VolName, to check if the
|
|
RBD image can satisfy the requirements for the request
|
|
- If during the process of checking the same, it is found that some linking information is stale
|
|
or missing, the corresponding keys upto the key in the csiVolsDirectory is cleaned up, to start afresh
|
|
- If the key with the CO VolName is not found, or was cleaned up, the request is treated as a
|
|
new create request, and an rbdImageOMap is created first with a generated uuid, this ensures that we
|
|
do not use a uuid that is already in use
|
|
- Next, a key with the VolName is created in the csiVolsDirectory, and its value is updated to store the
|
|
generated uuid
|
|
- This is followed by updating the rbdImageOMap with the VolName in the rbdImageCSIVolNameKey
|
|
- Finally, the image is created (or promoted from a snapshot, if content source was provided) using
|
|
the uuid and a corresponding image name prefix (rbdImgNamePrefix or rbdSnapNamePrefix)
|
|
|
|
The entire operation is locked based on VolName hash, to ensure there is only ever a single entity
|
|
modifying the related omaps for a given VolName.
|
|
|
|
This ensures idempotent nature of creates, as the same CO generated VolName would attempt to use
|
|
the same RBD image name to serve the request, as the relations are saved in the respective omaps.
|
|
|
|
Deletion of omaps:
|
|
Delete requests would not contain the VolName, hence deletion uses the volume ID, which is encoded
|
|
with the image name in it, to find the image and the rbdImageOMap. The rbdImageOMap is read to get
|
|
the VolName that this image points to. This VolName can be further used to read and delete the key
|
|
from the csiVolsDirectory.
|
|
|
|
As we trace back and find the VolName, we also take a hash based lock on the VolName before
|
|
proceeding with deleting the image and the related omap entries, to ensure there is only ever a
|
|
single entity modifying the related omaps for a given VolName.
|
|
*/
|
|
|
|
const (
|
|
// volIDVersion is the version number of volume ID encoding scheme
|
|
volIDVersion uint16 = 1
|
|
rbdDefaultAdminID = "admin"
|
|
rbdDefaultUserID = rbdDefaultAdminID
|
|
|
|
// csiConfigFile is the location of the CSI config file
|
|
csiConfigFile = "/etc/ceph-csi-config/config.json"
|
|
|
|
// CSI volume-name keyname prefix, for key in csiVolsDirectory, suffix is the CSI passed volume name
|
|
csiVolNameKeyPrefix = "csi.volume."
|
|
// Per RBD image object map name prefix, suffix is the RBD image uuid
|
|
rbdImageOMapPrefix = "csi.volume."
|
|
// CSI volume-name key in per RBD image object map, containing CSI volume-name for which the
|
|
// image was created
|
|
rbdImageCSIVolNameKey = "csi.volname"
|
|
// RBD image name prefix, suffix is a uuid generated per image
|
|
rbdImgNamePrefix = "csi-vol-"
|
|
|
|
//CSI snap-name keyname prefix, for key in csiSnapsDirectory, suffix is the CSI passed snapshot name
|
|
csiSnapNameKeyPrefix = "csi.snap."
|
|
// Per RBD snapshot object map name prefix, suffix is the RBD image uuid
|
|
rbdSnapOMapPrefix = "csi.snap."
|
|
// CSI snap-name key in per RBD snapshot object map, containing CSI snapshot-name for which the
|
|
// snapshot was created
|
|
rbdSnapCSISnapNameKey = "csi.snapname"
|
|
// source image name key in per RBD snapshot object map, containing RBD source image name for
|
|
// which the snapshot was created
|
|
rbdSnapSourceImageKey = "csi.source"
|
|
// RBD snapshot name prefix, suffix is a uuid generated per snapshot
|
|
rbdSnapNamePrefix = "csi-snap-"
|
|
)
|
|
|
|
// PluginFolder defines the location of ceph plugin
|
|
var PluginFolder = "/var/lib/kubelet/plugins/"
|
|
|
|
// Driver contains the default identity,node and controller struct
|
|
type Driver struct {
|
|
cd *csicommon.CSIDriver
|
|
|
|
ids *IdentityServer
|
|
ns *NodeServer
|
|
cs *ControllerServer
|
|
}
|
|
|
|
var (
|
|
version = "1.0.0"
|
|
// CSIInstanceID is the instance ID that is unique to an instance of CSI, used when sharing
|
|
// ceph clusters across CSI instances, to differentiate omap names per CSI instance
|
|
CSIInstanceID = "default"
|
|
// csiVolsDirectory is the name of the CSI volumes object map that contains CSI volume-name
|
|
// based keys
|
|
csiVolsDirectory = "csi.volumes"
|
|
// csiSnapsDirectory is the name of the CSI snapshots object map that contains CSI snapshot-name based keys
|
|
csiSnapsDirectory = "csi.snaps"
|
|
)
|
|
|
|
// NewDriver returns new rbd driver
|
|
func NewDriver() *Driver {
|
|
return &Driver{}
|
|
}
|
|
|
|
// NewIdentityServer initialize a identity server for rbd CSI driver
|
|
func NewIdentityServer(d *csicommon.CSIDriver) *IdentityServer {
|
|
return &IdentityServer{
|
|
DefaultIdentityServer: csicommon.NewDefaultIdentityServer(d),
|
|
}
|
|
}
|
|
|
|
// NewControllerServer initialize a controller server for rbd CSI driver
|
|
func NewControllerServer(d *csicommon.CSIDriver) *ControllerServer {
|
|
return &ControllerServer{
|
|
DefaultControllerServer: csicommon.NewDefaultControllerServer(d),
|
|
}
|
|
}
|
|
|
|
// NewNodeServer initialize a node server for rbd CSI driver.
|
|
func NewNodeServer(d *csicommon.CSIDriver, containerized bool) (*NodeServer, error) {
|
|
mounter := mount.New("")
|
|
if containerized {
|
|
ne, err := nsenter.NewNsenter(nsenter.DefaultHostRootFsPath, exec.New())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
mounter = mount.NewNsenterMounter("", ne)
|
|
}
|
|
return &NodeServer{
|
|
DefaultNodeServer: csicommon.NewDefaultNodeServer(d),
|
|
mounter: mounter,
|
|
}, nil
|
|
}
|
|
|
|
// Run start a non-blocking grpc controller,node and identityserver for
|
|
// rbd CSI driver which can serve multiple parallel requests
|
|
func (r *Driver) Run(driverName, nodeID, endpoint, instanceID string, containerized bool) {
|
|
var err error
|
|
|
|
klog.Infof("Driver: %v version: %v", driverName, version)
|
|
|
|
// Create ceph.conf for use with CLI commands
|
|
if err = util.WriteCephConfig(); err != nil {
|
|
klog.Fatalf("failed to write ceph configuration file (%v)", err)
|
|
}
|
|
|
|
// Use passed in instance ID, if provided for omap suffix naming
|
|
if instanceID != "" {
|
|
CSIInstanceID = instanceID
|
|
}
|
|
csiVolsDirectory = csiVolsDirectory + "." + CSIInstanceID
|
|
csiSnapsDirectory = csiSnapsDirectory + "." + CSIInstanceID
|
|
|
|
// Initialize default library driver
|
|
r.cd = csicommon.NewCSIDriver(driverName, version, nodeID)
|
|
if r.cd == nil {
|
|
klog.Fatalln("Failed to initialize CSI Driver.")
|
|
}
|
|
r.cd.AddControllerServiceCapabilities([]csi.ControllerServiceCapability_RPC_Type{
|
|
csi.ControllerServiceCapability_RPC_CREATE_DELETE_VOLUME,
|
|
csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT,
|
|
csi.ControllerServiceCapability_RPC_CLONE_VOLUME,
|
|
})
|
|
|
|
// We only support the multi-writer option when using block, but it's a supported capability for the plugin in general
|
|
// In addition, we want to add the remaining modes like MULTI_NODE_READER_ONLY,
|
|
// MULTI_NODE_SINGLE_WRITER etc, but need to do some verification of RO modes first
|
|
// will work those as follow up features
|
|
r.cd.AddVolumeCapabilityAccessModes(
|
|
[]csi.VolumeCapability_AccessMode_Mode{csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER,
|
|
csi.VolumeCapability_AccessMode_MULTI_NODE_MULTI_WRITER})
|
|
|
|
// Create GRPC servers
|
|
r.ids = NewIdentityServer(r.cd)
|
|
r.ns, err = NewNodeServer(r.cd, containerized)
|
|
if err != nil {
|
|
klog.Fatalf("failed to start node server, err %v\n", err)
|
|
}
|
|
|
|
r.cs = NewControllerServer(r.cd)
|
|
|
|
s := csicommon.NewNonBlockingGRPCServer()
|
|
s.Start(endpoint, r.ids, r.cs, r.ns)
|
|
s.Wait()
|
|
}
|