mirror of
https://github.com/ceph/ceph-csi.git
synced 2024-11-22 22:30:23 +00:00
Refactor voljournal to aid reuse with CephFS
and to also inmprove the code reuse in rbd itself. Signed-off-by: ShyamsundarR <srangana@redhat.com>
This commit is contained in:
parent
b6b7cf2c3d
commit
1406f29dcd
@ -146,7 +146,7 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
|
|||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errDefer := unreserveVol(rbdVol, req.GetSecrets())
|
errDefer := undoVolReservation(rbdVol, req.GetSecrets())
|
||||||
if errDefer != nil {
|
if errDefer != nil {
|
||||||
klog.Warningf("failed undoing reservation of volume: %s (%s)", req.GetName(), errDefer)
|
klog.Warningf("failed undoing reservation of volume: %s (%s)", req.GetName(), errDefer)
|
||||||
}
|
}
|
||||||
@ -257,7 +257,7 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if err := unreserveVol(rbdVol, req.GetSecrets()); err != nil {
|
if err := undoVolReservation(rbdVol, req.GetSecrets()); err != nil {
|
||||||
return nil, status.Error(codes.Internal, err.Error())
|
return nil, status.Error(codes.Internal, err.Error())
|
||||||
}
|
}
|
||||||
return &csi.DeleteVolumeResponse{}, nil
|
return &csi.DeleteVolumeResponse{}, nil
|
||||||
@ -347,7 +347,7 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS
|
|||||||
// check for the requested source volume id and already allocated source volume id
|
// check for the requested source volume id and already allocated source volume id
|
||||||
found, err := checkSnapExists(rbdSnap, req.GetSecrets())
|
found, err := checkSnapExists(rbdSnap, req.GetSecrets())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if _, ok := err.(ErrSnapNameConflict); ok {
|
if _, ok := err.(util.ErrSnapNameConflict); ok {
|
||||||
return nil, status.Error(codes.AlreadyExists, err.Error())
|
return nil, status.Error(codes.AlreadyExists, err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -371,7 +371,7 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS
|
|||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errDefer := unreserveSnap(rbdSnap, req.GetSecrets())
|
errDefer := undoSnapReservation(rbdSnap, req.GetSecrets())
|
||||||
if errDefer != nil {
|
if errDefer != nil {
|
||||||
klog.Warningf("failed undoing reservation of snapshot: %s %v", req.GetName(), errDefer)
|
klog.Warningf("failed undoing reservation of snapshot: %s %v", req.GetName(), errDefer)
|
||||||
}
|
}
|
||||||
@ -483,7 +483,7 @@ func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteS
|
|||||||
if _, ok := err.(ErrSnapNotFound); !ok {
|
if _, ok := err.(ErrSnapNotFound); !ok {
|
||||||
return nil, status.Error(codes.Internal, err.Error())
|
return nil, status.Error(codes.Internal, err.Error())
|
||||||
}
|
}
|
||||||
if err := unreserveSnap(rbdSnap, req.GetSecrets()); err != nil {
|
if err := undoSnapReservation(rbdSnap, req.GetSecrets()); err != nil {
|
||||||
return nil, status.Error(codes.Internal, err.Error())
|
return nil, status.Error(codes.Internal, err.Error())
|
||||||
}
|
}
|
||||||
return &csi.DeleteSnapshotResponse{}, nil
|
return &csi.DeleteSnapshotResponse{}, nil
|
||||||
|
@ -37,17 +37,6 @@ func (e ErrSnapNotFound) Error() string {
|
|||||||
return e.err.Error()
|
return e.err.Error()
|
||||||
}
|
}
|
||||||
|
|
||||||
// ErrSnapNameConflict is generated when a requested CSI snap name already exists on RBD but with
|
|
||||||
// different properties, and hence is in conflict with the passed in CSI volume name
|
|
||||||
type ErrSnapNameConflict struct {
|
|
||||||
requestName string
|
|
||||||
err error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e ErrSnapNameConflict) Error() string {
|
|
||||||
return e.err.Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
// ErrVolNameConflict is generated when a requested CSI volume name already exists on RBD but with
|
// ErrVolNameConflict is generated when a requested CSI volume name already exists on RBD but with
|
||||||
// different properties, and hence is in conflict with the passed in CSI volume name
|
// different properties, and hence is in conflict with the passed in CSI volume name
|
||||||
type ErrVolNameConflict struct {
|
type ErrVolNameConflict struct {
|
||||||
|
@ -123,7 +123,7 @@ func (ns *NodeServer) getVolumeName(req *csi.NodePublishVolumeRequest) (string,
|
|||||||
return "", status.Error(codes.InvalidArgument, err.Error())
|
return "", status.Error(codes.InvalidArgument, err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
return rbdImgNamePrefix + vi.ObjectUUID, nil
|
return volJournal.NamingPrefix() + vi.ObjectUUID, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ns *NodeServer) mountVolume(req *csi.NodePublishVolumeRequest, devicePath string) error {
|
func (ns *NodeServer) mountVolume(req *csi.NodePublishVolumeRequest, devicePath string) error {
|
||||||
|
114
pkg/rbd/rbd.go
114
pkg/rbd/rbd.go
@ -27,70 +27,6 @@ import (
|
|||||||
"k8s.io/utils/nsenter"
|
"k8s.io/utils/nsenter"
|
||||||
)
|
)
|
||||||
|
|
||||||
/*
|
|
||||||
RADOS omaps usage:
|
|
||||||
|
|
||||||
This note details how we preserve idempotent nature of create requests and retain the relationship
|
|
||||||
between orchestrator (CO) generated Names and plugin generated names for images and snapshots
|
|
||||||
|
|
||||||
The implementation uses Ceph RADOS omaps to preserve the relationship between request name and
|
|
||||||
generated image (or snapshot) name. There are 4 types of omaps in use,
|
|
||||||
- A "csi.volumes.[csi-id]" (or "csi.volumes"+.+CSIInstanceID), we call this the csiVolsDirectory
|
|
||||||
- stores keys named using the CO generated names for volume requests
|
|
||||||
- keys are named "csi.volume."+[CO generated VolName]
|
|
||||||
- Key value contains the RBD image uuid that is created or will be created, for the CO provided
|
|
||||||
name
|
|
||||||
|
|
||||||
- A "csi.snaps.[csi-id]" (or "csi.snaps"+.+CSIInstanceID), we refer to this as the csiSnapsDirectory
|
|
||||||
- stores keys named using the CO generated names for snapshot requests
|
|
||||||
- keys are named "csi.snap."+[CO generated SnapName]
|
|
||||||
- Key value contains the RBD snapshot uuid that is created or will be created, for the CO
|
|
||||||
provided name
|
|
||||||
|
|
||||||
- A per image omap named "rbd.csi.volume."+[RBD image uuid], we refer to this as the rbdImageOMap
|
|
||||||
- stores a single key named "csi.volname", that has the value of the CO generated VolName that
|
|
||||||
this image refers to
|
|
||||||
|
|
||||||
- A per snapshot omap named "rbd.csi.snap."+[RBD snapshot uuid], we refer to this as the snapOMap
|
|
||||||
- stores a key named "csi.snapname", that has the value of the CO generated SnapName that this
|
|
||||||
snapshot refers to
|
|
||||||
- also stores another key named "csi.source", that has the value of the image name that is the
|
|
||||||
source of the snapshot
|
|
||||||
|
|
||||||
Creation of omaps:
|
|
||||||
When a volume create request is received (or a snapshot create, the snapshot is not detailed in this
|
|
||||||
comment further as the process is similar),
|
|
||||||
- The csiVolsDirectory is consulted to find if there is already a key with the CO VolName, and if present,
|
|
||||||
it is used to read its references to reach the RBD image that backs this VolName, to check if the
|
|
||||||
RBD image can satisfy the requirements for the request
|
|
||||||
- If during the process of checking the same, it is found that some linking information is stale
|
|
||||||
or missing, the corresponding keys upto the key in the csiVolsDirectory is cleaned up, to start afresh
|
|
||||||
- If the key with the CO VolName is not found, or was cleaned up, the request is treated as a
|
|
||||||
new create request, and an rbdImageOMap is created first with a generated uuid, this ensures that we
|
|
||||||
do not use a uuid that is already in use
|
|
||||||
- Next, a key with the VolName is created in the csiVolsDirectory, and its value is updated to store the
|
|
||||||
generated uuid
|
|
||||||
- This is followed by updating the rbdImageOMap with the VolName in the rbdImageCSIVolNameKey
|
|
||||||
- Finally, the image is created (or promoted from a snapshot, if content source was provided) using
|
|
||||||
the uuid and a corresponding image name prefix (rbdImgNamePrefix or rbdSnapNamePrefix)
|
|
||||||
|
|
||||||
The entire operation is locked based on VolName hash, to ensure there is only ever a single entity
|
|
||||||
modifying the related omaps for a given VolName.
|
|
||||||
|
|
||||||
This ensures idempotent nature of creates, as the same CO generated VolName would attempt to use
|
|
||||||
the same RBD image name to serve the request, as the relations are saved in the respective omaps.
|
|
||||||
|
|
||||||
Deletion of omaps:
|
|
||||||
Delete requests would not contain the VolName, hence deletion uses the volume ID, which is encoded
|
|
||||||
with the image name in it, to find the image and the rbdImageOMap. The rbdImageOMap is read to get
|
|
||||||
the VolName that this image points to. This VolName can be further used to read and delete the key
|
|
||||||
from the csiVolsDirectory.
|
|
||||||
|
|
||||||
As we trace back and find the VolName, we also take a hash based lock on the VolName before
|
|
||||||
proceeding with deleting the image and the related omap entries, to ensure there is only ever a
|
|
||||||
single entity modifying the related omaps for a given VolName.
|
|
||||||
*/
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// volIDVersion is the version number of volume ID encoding scheme
|
// volIDVersion is the version number of volume ID encoding scheme
|
||||||
volIDVersion uint16 = 1
|
volIDVersion uint16 = 1
|
||||||
@ -99,34 +35,8 @@ const (
|
|||||||
|
|
||||||
// csiConfigFile is the location of the CSI config file
|
// csiConfigFile is the location of the CSI config file
|
||||||
csiConfigFile = "/etc/ceph-csi-config/config.json"
|
csiConfigFile = "/etc/ceph-csi-config/config.json"
|
||||||
|
|
||||||
// CSI volume-name keyname prefix, for key in csiVolsDirectory, suffix is the CSI passed volume name
|
|
||||||
csiVolNameKeyPrefix = "csi.volume."
|
|
||||||
// Per RBD image object map name prefix, suffix is the RBD image uuid
|
|
||||||
rbdImageOMapPrefix = "csi.volume."
|
|
||||||
// CSI volume-name key in per RBD image object map, containing CSI volume-name for which the
|
|
||||||
// image was created
|
|
||||||
rbdImageCSIVolNameKey = "csi.volname"
|
|
||||||
// RBD image name prefix, suffix is a uuid generated per image
|
|
||||||
rbdImgNamePrefix = "csi-vol-"
|
|
||||||
|
|
||||||
//CSI snap-name keyname prefix, for key in csiSnapsDirectory, suffix is the CSI passed snapshot name
|
|
||||||
csiSnapNameKeyPrefix = "csi.snap."
|
|
||||||
// Per RBD snapshot object map name prefix, suffix is the RBD image uuid
|
|
||||||
rbdSnapOMapPrefix = "csi.snap."
|
|
||||||
// CSI snap-name key in per RBD snapshot object map, containing CSI snapshot-name for which the
|
|
||||||
// snapshot was created
|
|
||||||
rbdSnapCSISnapNameKey = "csi.snapname"
|
|
||||||
// source image name key in per RBD snapshot object map, containing RBD source image name for
|
|
||||||
// which the snapshot was created
|
|
||||||
rbdSnapSourceImageKey = "csi.source"
|
|
||||||
// RBD snapshot name prefix, suffix is a uuid generated per snapshot
|
|
||||||
rbdSnapNamePrefix = "csi-snap-"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// PluginFolder defines the location of ceph plugin
|
|
||||||
var PluginFolder = "/var/lib/kubelet/plugins/"
|
|
||||||
|
|
||||||
// Driver contains the default identity,node and controller struct
|
// Driver contains the default identity,node and controller struct
|
||||||
type Driver struct {
|
type Driver struct {
|
||||||
cd *csicommon.CSIDriver
|
cd *csicommon.CSIDriver
|
||||||
@ -138,14 +48,18 @@ type Driver struct {
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
|
|
||||||
|
// PluginFolder defines the location of ceph plugin
|
||||||
|
PluginFolder = "/var/lib/kubelet/plugins/"
|
||||||
|
|
||||||
// CSIInstanceID is the instance ID that is unique to an instance of CSI, used when sharing
|
// CSIInstanceID is the instance ID that is unique to an instance of CSI, used when sharing
|
||||||
// ceph clusters across CSI instances, to differentiate omap names per CSI instance
|
// ceph clusters across CSI instances, to differentiate omap names per CSI instance
|
||||||
CSIInstanceID = "default"
|
CSIInstanceID = "default"
|
||||||
// csiVolsDirectory is the name of the CSI volumes object map that contains CSI volume-name
|
|
||||||
// based keys
|
// volJournal and snapJournal are used to maintain RADOS based journals for CO generated
|
||||||
csiVolsDirectory = "csi.volumes"
|
// VolumeName to backing RBD images
|
||||||
// csiSnapsDirectory is the name of the CSI snapshots object map that contains CSI snapshot-name based keys
|
volJournal *util.CSIJournal
|
||||||
csiSnapsDirectory = "csi.snaps"
|
snapJournal *util.CSIJournal
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewDriver returns new rbd driver
|
// NewDriver returns new rbd driver
|
||||||
@ -199,8 +113,14 @@ func (r *Driver) Run(driverName, nodeID, endpoint, instanceID string, containeri
|
|||||||
if instanceID != "" {
|
if instanceID != "" {
|
||||||
CSIInstanceID = instanceID
|
CSIInstanceID = instanceID
|
||||||
}
|
}
|
||||||
csiVolsDirectory = csiVolsDirectory + "." + CSIInstanceID
|
|
||||||
csiSnapsDirectory = csiSnapsDirectory + "." + CSIInstanceID
|
// Get an instance of the volume and snapshot journal keys
|
||||||
|
volJournal = util.NewCSIVolumeJournal()
|
||||||
|
snapJournal = util.NewCSISnapshotJournal()
|
||||||
|
|
||||||
|
// Update keys with CSI instance suffix
|
||||||
|
volJournal.SetCSIDirectorySuffix(CSIInstanceID)
|
||||||
|
snapJournal.SetCSIDirectorySuffix(CSIInstanceID)
|
||||||
|
|
||||||
// Initialize default library driver
|
// Initialize default library driver
|
||||||
r.cd = csicommon.NewCSIDriver(driverName, version, nodeID)
|
r.cd = csicommon.NewCSIDriver(driverName, version, nodeID)
|
||||||
|
307
pkg/rbd/rbd_journal.go
Normal file
307
pkg/rbd/rbd_journal.go
Normal file
@ -0,0 +1,307 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2019 The Ceph-CSI Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package rbd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/ceph/ceph-csi/pkg/util"
|
||||||
|
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"k8s.io/klog"
|
||||||
|
)
|
||||||
|
|
||||||
|
func validateNonEmptyField(field, fieldName, structName string) error {
|
||||||
|
if field == "" {
|
||||||
|
return fmt.Errorf("value '%s' in '%s' structure cannot be empty", fieldName, structName)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func validateRbdSnap(rbdSnap *rbdSnapshot) error {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdSnap.RequestName, "RequestName", "rbdSnapshot"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdSnap.Monitors, "Monitors", "rbdSnapshot"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdSnap.AdminID, "AdminID", "rbdSnapshot"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdSnap.Pool, "Pool", "rbdSnapshot"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdSnap.RbdImageName, "RbdImageName", "rbdSnapshot"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdSnap.ClusterID, "ClusterID", "rbdSnapshot"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func validateRbdVol(rbdVol *rbdVolume) error {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdVol.RequestName, "RequestName", "rbdVolume"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdVol.Monitors, "Monitors", "rbdVolume"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdVol.AdminID, "AdminID", "rbdVolume"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdVol.Pool, "Pool", "rbdVolume"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = validateNonEmptyField(rbdVol.ClusterID, "ClusterID", "rbdVolume"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if rbdVol.VolSize == 0 {
|
||||||
|
return errors.New("value 'VolSize' in 'rbdVolume' structure cannot be 0")
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
checkSnapExists, and its counterpart checkVolExists, function checks if the passed in rbdSnapshot
|
||||||
|
or rbdVolume exists on the backend.
|
||||||
|
|
||||||
|
**NOTE:** These functions manipulate the rados omaps that hold information regarding
|
||||||
|
volume names as requested by the CSI drivers. Hence, these need to be invoked only when the
|
||||||
|
respective CSI driver generated snapshot or volume name based locks are held, as otherwise racy
|
||||||
|
access to these omaps may end up leaving them in an inconsistent state.
|
||||||
|
|
||||||
|
These functions need enough information about cluster and pool (ie, Monitors, Pool, IDs filled in)
|
||||||
|
to operate. They further require that the RequestName element of the structure have a valid value
|
||||||
|
to operate on and determine if the said RequestName already exists on the backend.
|
||||||
|
|
||||||
|
These functions populate the snapshot or the image name, its attributes and the CSI snapshot/volume
|
||||||
|
ID for the same when successful.
|
||||||
|
|
||||||
|
These functions also cleanup omap reservations that are stale. I.e when omap entries exist and
|
||||||
|
backing images or snapshots are missing, or one of the omaps exist and the next is missing. This is
|
||||||
|
because, the order of omap creation and deletion are inverse of each other, and protected by the
|
||||||
|
request name lock, and hence any stale omaps are leftovers from incomplete transactions and are
|
||||||
|
hence safe to garbage collect.
|
||||||
|
*/
|
||||||
|
func checkSnapExists(rbdSnap *rbdSnapshot, credentials map[string]string) (bool, error) {
|
||||||
|
err := validateRbdSnap(rbdSnap)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
key, err := getKey(rbdSnap.AdminID, credentials)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
snapUUID, err := snapJournal.CheckReservation(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool,
|
||||||
|
rbdSnap.RequestName, rbdSnap.RbdImageName)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
if snapUUID == "" {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
rbdSnap.RbdSnapName = snapJournal.NamingPrefix() + snapUUID
|
||||||
|
|
||||||
|
// Fetch on-disk image attributes
|
||||||
|
err = updateSnapWithImageInfo(rbdSnap, credentials)
|
||||||
|
if err != nil {
|
||||||
|
if _, ok := err.(ErrSnapNotFound); ok {
|
||||||
|
err = snapJournal.UndoReservation(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool,
|
||||||
|
rbdSnap.RbdSnapName, rbdSnap.RequestName)
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// found a snapshot already available, process and return its information
|
||||||
|
rbdSnap.SnapID, err = util.GenerateVolID(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool,
|
||||||
|
rbdSnap.ClusterID, snapUUID, volIDVersion)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
klog.V(4).Infof("found existing snap (%s) with snap name (%s) for request (%s)",
|
||||||
|
rbdSnap.SnapID, rbdSnap.RbdSnapName, rbdSnap.RequestName)
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Check comment on checkSnapExists, to understand how this function behaves
|
||||||
|
|
||||||
|
**NOTE:** These functions manipulate the rados omaps that hold information regarding
|
||||||
|
volume names as requested by the CSI drivers. Hence, these need to be invoked only when the
|
||||||
|
respective CSI snapshot or volume name based locks are held, as otherwise racy access to these
|
||||||
|
omaps may end up leaving the omaps in an inconsistent state.
|
||||||
|
*/
|
||||||
|
func checkVolExists(rbdVol *rbdVolume, credentials map[string]string) (bool, error) {
|
||||||
|
err := validateRbdVol(rbdVol)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
key, err := getKey(rbdVol.AdminID, credentials)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
imageUUID, err := volJournal.CheckReservation(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool,
|
||||||
|
rbdVol.RequestName, "")
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
if imageUUID == "" {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
rbdVol.RbdImageName = volJournal.NamingPrefix() + imageUUID
|
||||||
|
|
||||||
|
// NOTE: Return volsize should be on-disk volsize, not request vol size, so
|
||||||
|
// save it for size checks before fetching image data
|
||||||
|
requestSize := rbdVol.VolSize
|
||||||
|
// Fetch on-disk image attributes and compare against request
|
||||||
|
err = updateVolWithImageInfo(rbdVol, credentials)
|
||||||
|
if err != nil {
|
||||||
|
if _, ok := err.(ErrImageNotFound); ok {
|
||||||
|
err = volJournal.UndoReservation(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool,
|
||||||
|
rbdVol.RbdImageName, rbdVol.RequestName)
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// size checks
|
||||||
|
if rbdVol.VolSize < requestSize {
|
||||||
|
err = fmt.Errorf("image with the same name (%s) but with different size already exists",
|
||||||
|
rbdVol.RbdImageName)
|
||||||
|
return false, ErrVolNameConflict{rbdVol.RbdImageName, err}
|
||||||
|
}
|
||||||
|
// TODO: We should also ensure image features and format is the same
|
||||||
|
|
||||||
|
// found a volume already available, process and return it!
|
||||||
|
rbdVol.VolID, err = util.GenerateVolID(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool,
|
||||||
|
rbdVol.ClusterID, imageUUID, volIDVersion)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
klog.V(4).Infof("found existng volume (%s) with image name (%s) for request (%s)",
|
||||||
|
rbdVol.VolID, rbdVol.RbdImageName, rbdVol.RequestName)
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// reserveSnap is a helper routine to request a rbdSnapshot name reservation and generate the
|
||||||
|
// volume ID for the generated name
|
||||||
|
func reserveSnap(rbdSnap *rbdSnapshot, credentials map[string]string) error {
|
||||||
|
key, err := getKey(rbdSnap.AdminID, credentials)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
snapUUID, err := snapJournal.ReserveName(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool,
|
||||||
|
rbdSnap.RequestName, rbdSnap.RbdImageName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
rbdSnap.SnapID, err = util.GenerateVolID(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool,
|
||||||
|
rbdSnap.ClusterID, snapUUID, volIDVersion)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
rbdSnap.RbdSnapName = snapJournal.NamingPrefix() + snapUUID
|
||||||
|
|
||||||
|
klog.V(4).Infof("generated Volume ID (%s) and image name (%s) for request name (%s)",
|
||||||
|
rbdSnap.SnapID, rbdSnap.RbdImageName, rbdSnap.RequestName)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// reserveVol is a helper routine to request a rbdVolume name reservation and generate the
|
||||||
|
// volume ID for the generated name
|
||||||
|
func reserveVol(rbdVol *rbdVolume, credentials map[string]string) error {
|
||||||
|
key, err := getKey(rbdVol.AdminID, credentials)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
imageUUID, err := volJournal.ReserveName(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool,
|
||||||
|
rbdVol.RequestName, "")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
rbdVol.VolID, err = util.GenerateVolID(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool,
|
||||||
|
rbdVol.ClusterID, imageUUID, volIDVersion)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
rbdVol.RbdImageName = volJournal.NamingPrefix() + imageUUID
|
||||||
|
|
||||||
|
klog.V(4).Infof("generated Volume ID (%s) and image name (%s) for request name (%s)",
|
||||||
|
rbdVol.VolID, rbdVol.RbdImageName, rbdVol.RequestName)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// undoSnapReservation is a helper routine to undo a name reservation for rbdSnapshot
|
||||||
|
func undoSnapReservation(rbdSnap *rbdSnapshot, credentials map[string]string) error {
|
||||||
|
key, err := getKey(rbdSnap.AdminID, credentials)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = snapJournal.UndoReservation(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool,
|
||||||
|
rbdSnap.RbdSnapName, rbdSnap.RequestName)
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// undoVolReservation is a helper routine to undo a name reservation for rbdVolume
|
||||||
|
func undoVolReservation(rbdVol *rbdVolume, credentials map[string]string) error {
|
||||||
|
key, err := getKey(rbdVol.AdminID, credentials)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = volJournal.UndoReservation(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool,
|
||||||
|
rbdVol.RbdImageName, rbdVol.RequestName)
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
@ -211,7 +211,7 @@ func deleteImage(pOpts *rbdVolume, adminID string, credentials map[string]string
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = unreserveVol(pOpts, credentials)
|
err = undoVolReservation(pOpts, credentials)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("failed to remove reservation for volume (%s) with backing image (%s) (%s)",
|
klog.Errorf("failed to remove reservation for volume (%s) with backing image (%s) (%s)",
|
||||||
pOpts.RequestName, pOpts.RbdImageName, err)
|
pOpts.RequestName, pOpts.RbdImageName, err)
|
||||||
@ -292,7 +292,7 @@ func genSnapFromSnapID(rbdSnap *rbdSnapshot, snapshotID string, credentials map[
|
|||||||
|
|
||||||
rbdSnap.ClusterID = vi.ClusterID
|
rbdSnap.ClusterID = vi.ClusterID
|
||||||
options["clusterID"] = rbdSnap.ClusterID
|
options["clusterID"] = rbdSnap.ClusterID
|
||||||
rbdSnap.RbdSnapName = rbdSnapNamePrefix + vi.ObjectUUID
|
rbdSnap.RbdSnapName = snapJournal.NamingPrefix() + vi.ObjectUUID
|
||||||
|
|
||||||
rbdSnap.Monitors, _, err = getMonsAndClusterID(options)
|
rbdSnap.Monitors, _, err = getMonsAndClusterID(options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -311,16 +311,8 @@ func genSnapFromSnapID(rbdSnap *rbdSnapshot, snapshotID string, credentials map[
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: fetch all omap vals in one call, than make multiple listomapvals
|
rbdSnap.RequestName, rbdSnap.RbdImageName, err = snapJournal.GetObjectUUIDData(rbdSnap.Monitors,
|
||||||
snapUUID := strings.TrimPrefix(rbdSnap.RbdSnapName, rbdSnapNamePrefix)
|
rbdSnap.AdminID, key, rbdSnap.Pool, vi.ObjectUUID, true)
|
||||||
rbdSnap.RequestName, err = util.GetOMapValue(rbdSnap.Monitors, rbdSnap.AdminID,
|
|
||||||
key, rbdSnap.Pool, rbdSnapOMapPrefix+snapUUID, rbdSnapCSISnapNameKey)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
rbdSnap.RbdImageName, err = util.GetOMapValue(rbdSnap.Monitors, rbdSnap.AdminID,
|
|
||||||
key, rbdSnap.Pool, rbdSnapOMapPrefix+snapUUID, rbdSnapSourceImageKey)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -352,7 +344,7 @@ func genVolFromVolID(rbdVol *rbdVolume, volumeID string, credentials map[string]
|
|||||||
|
|
||||||
rbdVol.ClusterID = vi.ClusterID
|
rbdVol.ClusterID = vi.ClusterID
|
||||||
options["clusterID"] = rbdVol.ClusterID
|
options["clusterID"] = rbdVol.ClusterID
|
||||||
rbdVol.RbdImageName = rbdImgNamePrefix + vi.ObjectUUID
|
rbdVol.RbdImageName = volJournal.NamingPrefix() + vi.ObjectUUID
|
||||||
|
|
||||||
rbdVol.Monitors, _, err = getMonsAndClusterID(options)
|
rbdVol.Monitors, _, err = getMonsAndClusterID(options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -372,9 +364,8 @@ func genVolFromVolID(rbdVol *rbdVolume, volumeID string, credentials map[string]
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
imageUUID := strings.TrimPrefix(rbdVol.RbdImageName, rbdImgNamePrefix)
|
rbdVol.RequestName, _, err = volJournal.GetObjectUUIDData(rbdVol.Monitors,
|
||||||
rbdVol.RequestName, err = util.GetOMapValue(rbdVol.Monitors, rbdVol.AdminID,
|
rbdVol.AdminID, key, rbdVol.Pool, vi.ObjectUUID, false)
|
||||||
key, rbdVol.Pool, rbdImageOMapPrefix+imageUUID, rbdImageCSIVolNameKey)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -608,7 +599,7 @@ func deleteSnapshot(pOpts *rbdSnapshot, adminID string, credentials map[string]s
|
|||||||
return errors.Wrapf(err, "failed to delete snapshot, command output: %s", string(output))
|
return errors.Wrapf(err, "failed to delete snapshot, command output: %s", string(output))
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := unreserveSnap(pOpts, credentials); err != nil {
|
if err := undoSnapReservation(pOpts, credentials); err != nil {
|
||||||
klog.Errorf("failed to remove reservation for snapname (%s) with backing snap (%s) on image (%s) (%s)",
|
klog.Errorf("failed to remove reservation for snapname (%s) with backing snap (%s) on image (%s) (%s)",
|
||||||
pOpts.RequestName, pOpts.RbdSnapName, pOpts.RbdImageName, err)
|
pOpts.RequestName, pOpts.RbdSnapName, pOpts.RbdImageName, err)
|
||||||
}
|
}
|
||||||
|
@ -1,554 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright 2019 The Ceph-CSI Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package rbd
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/ceph/ceph-csi/pkg/util"
|
|
||||||
|
|
||||||
"github.com/pborman/uuid"
|
|
||||||
"github.com/pkg/errors"
|
|
||||||
"k8s.io/klog"
|
|
||||||
)
|
|
||||||
|
|
||||||
func validateNonEmptyField(field, fieldName, structName string) error {
|
|
||||||
if field == "" {
|
|
||||||
return fmt.Errorf("value '%s' in '%s' structure cannot be empty", fieldName, structName)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func validateRbdSnap(rbdSnap *rbdSnapshot) error {
|
|
||||||
if err := validateNonEmptyField(rbdSnap.RequestName, "RequestName", "rbdSnapshot"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := validateNonEmptyField(rbdSnap.Monitors, "Monitors", "rbdSnapshot"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := validateNonEmptyField(rbdSnap.AdminID, "AdminID", "rbdSnapshot"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := validateNonEmptyField(rbdSnap.Pool, "Pool", "rbdSnapshot"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := validateNonEmptyField(rbdSnap.RbdImageName, "RbdImageName", "rbdSnapshot"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := validateNonEmptyField(rbdSnap.ClusterID, "ClusterID", "rbdSnapshot"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func validateRbdVol(rbdVol *rbdVolume) error {
|
|
||||||
if err := validateNonEmptyField(rbdVol.RequestName, "RequestName", "rbdVolume"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := validateNonEmptyField(rbdVol.Monitors, "Monitors", "rbdVolume"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := validateNonEmptyField(rbdVol.AdminID, "AdminID", "rbdVolume"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := validateNonEmptyField(rbdVol.Pool, "Pool", "rbdVolume"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := validateNonEmptyField(rbdVol.ClusterID, "ClusterID", "rbdVolume"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if rbdVol.VolSize == 0 {
|
|
||||||
return errors.New("value 'VolSize' in 'rbdVolume' structure cannot be 0")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
checkSnapExists, and its counterpart checkVolExists, function as checks to determine if passed
|
|
||||||
in rbdSnapshot or rbdVolume exists on the backend.
|
|
||||||
|
|
||||||
**NOTE:** These functions manipulate the rados omaps that hold information regarding
|
|
||||||
volume names as requested by the CSI drivers. Hence, these need to be invoked only when the
|
|
||||||
respective CSI driver generated snapshot or volume name based locks are held, as otherwise racy
|
|
||||||
access to these omaps may end up leaving them in an inconsistent state.
|
|
||||||
|
|
||||||
These functions need enough information about cluster and pool (ie, Monitors, Pool, IDs filled in)
|
|
||||||
to operate. They further require that the RequestName element of the structure have a valid value
|
|
||||||
to operate on and determine if the said RequestName already exists on the backend.
|
|
||||||
|
|
||||||
These functions populate the snapshot or the image name, its attributes and the CSI snapshot/volume
|
|
||||||
ID for the same when succesful.
|
|
||||||
|
|
||||||
These functions also cleanup omap reservations that are stale. I.e when omap entries exist and
|
|
||||||
backing images or snapshots are missing, or one of the omaps exist and the next is missing. This is
|
|
||||||
because, the order of omap creation and deletion are inverse of each other, and protected by the
|
|
||||||
request name lock, and hence any stale omaps are leftovers from incomplete transactions and are
|
|
||||||
hence safe to garbage collect.
|
|
||||||
*/
|
|
||||||
func checkSnapExists(rbdSnap *rbdSnapshot, credentials map[string]string) (found bool, err error) {
|
|
||||||
if err = validateRbdSnap(rbdSnap); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
key, err := getKey(rbdSnap.AdminID, credentials)
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if request name is already part of the snaps omap
|
|
||||||
snapUUID, err := util.GetOMapValue(rbdSnap.Monitors, rbdSnap.AdminID,
|
|
||||||
key, rbdSnap.Pool, csiSnapsDirectory, csiSnapNameKeyPrefix+rbdSnap.RequestName)
|
|
||||||
if err != nil {
|
|
||||||
// error should specifically be not found, for image to be absent, any other error
|
|
||||||
// is not conclusive, and we should not proceed
|
|
||||||
if _, ok := err.(util.ErrKeyNotFound); ok {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
rbdSnap.RbdSnapName = rbdSnapNamePrefix + snapUUID
|
|
||||||
|
|
||||||
// TODO: use listomapvals to dump all keys instead of reading them one-by-one
|
|
||||||
// check if the snapshot image omap is present
|
|
||||||
savedSnapName, err := util.GetOMapValue(rbdSnap.Monitors, rbdSnap.AdminID,
|
|
||||||
key, rbdSnap.Pool, rbdSnapOMapPrefix+snapUUID, rbdSnapCSISnapNameKey)
|
|
||||||
if err != nil {
|
|
||||||
if _, ok := err.(util.ErrKeyNotFound); ok {
|
|
||||||
err = unreserveSnap(rbdSnap, credentials)
|
|
||||||
}
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if snapshot image omap points back to the request name
|
|
||||||
if savedSnapName != rbdSnap.RequestName {
|
|
||||||
// NOTE: This should never be possible, hence no cleanup, but log error
|
|
||||||
// and return, as cleanup may need to occur manually!
|
|
||||||
return false, fmt.Errorf("internal state inconsistent, omap snap"+
|
|
||||||
" names disagree, request name (%s) snap name (%s) image omap"+
|
|
||||||
" snap name (%s)", rbdSnap.RequestName, rbdSnap.RbdSnapName, savedSnapName)
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if the snapshot source image omap is present
|
|
||||||
savedVolName, err := util.GetOMapValue(rbdSnap.Monitors, rbdSnap.AdminID,
|
|
||||||
key, rbdSnap.Pool, rbdSnapOMapPrefix+snapUUID, rbdSnapSourceImageKey)
|
|
||||||
if err != nil {
|
|
||||||
if _, ok := err.(util.ErrKeyNotFound); ok {
|
|
||||||
err = unreserveSnap(rbdSnap, credentials)
|
|
||||||
}
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if snapshot source image omap points back to the source volume passed in
|
|
||||||
if savedVolName != rbdSnap.RbdImageName {
|
|
||||||
// NOTE: This can happen if there is a snapname conflict, and we alerady have a snapshot
|
|
||||||
// with the same name pointing to a different RBD image as the source
|
|
||||||
err = fmt.Errorf("snapname points to different image, request name (%s)"+
|
|
||||||
" image name (%s) image omap"+" volume name (%s)",
|
|
||||||
rbdSnap.RequestName, rbdSnap.RbdImageName, savedVolName)
|
|
||||||
return false, ErrSnapNameConflict{rbdSnap.RequestName, err}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch on-disk image attributes
|
|
||||||
err = updateSnapWithImageInfo(rbdSnap, credentials)
|
|
||||||
if err != nil {
|
|
||||||
if _, ok := err.(ErrSnapNotFound); ok {
|
|
||||||
err = unreserveSnap(rbdSnap, credentials)
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// found a snapshot already available, process and return its information
|
|
||||||
poolID, err := util.GetPoolID(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool)
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
vi := util.CSIIdentifier{
|
|
||||||
PoolID: poolID,
|
|
||||||
EncodingVersion: volIDVersion,
|
|
||||||
ClusterID: rbdSnap.ClusterID,
|
|
||||||
ObjectUUID: snapUUID,
|
|
||||||
}
|
|
||||||
rbdSnap.SnapID, err = vi.ComposeCSIID()
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
klog.V(4).Infof("Found existing snap (%s) with snap name (%s) for request (%s)",
|
|
||||||
rbdSnap.SnapID, rbdSnap.RbdSnapName, rbdSnap.RequestName)
|
|
||||||
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Check comment on checkSnapExists, to understand how this function behaves
|
|
||||||
|
|
||||||
**NOTE:** These functions manipulate the rados omaps that hold information regarding
|
|
||||||
volume names as requested by the CSI drivers. Hence, these need to be invoked only when the
|
|
||||||
respective CSI snapshot or volume name based locks are held, as otherwise racy access to these
|
|
||||||
omaps may end up leaving the omaps in an inconsistent state.
|
|
||||||
*/
|
|
||||||
func checkVolExists(rbdVol *rbdVolume, credentials map[string]string) (found bool, err error) {
|
|
||||||
var vi util.CSIIdentifier
|
|
||||||
|
|
||||||
if err = validateRbdVol(rbdVol); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
key, err := getKey(rbdVol.AdminID, credentials)
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if request name is already part of the volumes omap
|
|
||||||
imageUUID, err := util.GetOMapValue(rbdVol.Monitors, rbdVol.AdminID,
|
|
||||||
key, rbdVol.Pool, csiVolsDirectory, csiVolNameKeyPrefix+rbdVol.RequestName)
|
|
||||||
if err != nil {
|
|
||||||
// error should specifically be not found, for image to be absent, any other error
|
|
||||||
// is not conclusive, and we should not proceed
|
|
||||||
if _, ok := err.(util.ErrKeyNotFound); ok {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
rbdVol.RbdImageName = rbdImgNamePrefix + imageUUID
|
|
||||||
|
|
||||||
// check if the image omap is present
|
|
||||||
savedVolName, err := util.GetOMapValue(rbdVol.Monitors, rbdVol.AdminID,
|
|
||||||
key, rbdVol.Pool, rbdImageOMapPrefix+imageUUID, rbdImageCSIVolNameKey)
|
|
||||||
if err != nil {
|
|
||||||
if _, ok := err.(util.ErrKeyNotFound); ok {
|
|
||||||
err = unreserveVol(rbdVol, credentials)
|
|
||||||
}
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if image omap points back to the request name
|
|
||||||
if savedVolName != rbdVol.RequestName {
|
|
||||||
// NOTE: This should never be possible, hence no cleanup, but log error
|
|
||||||
// and return, as cleanup may need to occur manually!
|
|
||||||
return false, fmt.Errorf("internal state inconsistent, omap volume"+
|
|
||||||
" names disagree, request name (%s) image name (%s) image omap"+
|
|
||||||
" volume name (%s)", rbdVol.RequestName, rbdVol.RbdImageName, savedVolName)
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: Return volsize should be on-disk volsize, not request vol size, so
|
|
||||||
// save it for size checks before fetching image data
|
|
||||||
requestSize := rbdVol.VolSize
|
|
||||||
// Fetch on-disk image attributes and compare against request
|
|
||||||
err = updateVolWithImageInfo(rbdVol, credentials)
|
|
||||||
if err != nil {
|
|
||||||
if _, ok := err.(ErrImageNotFound); ok {
|
|
||||||
err = unreserveVol(rbdVol, credentials)
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// size checks
|
|
||||||
if rbdVol.VolSize < requestSize {
|
|
||||||
err = fmt.Errorf("image with the same name (%s) but with different size already exists",
|
|
||||||
rbdVol.RbdImageName)
|
|
||||||
return false, ErrVolNameConflict{rbdVol.RbdImageName, err}
|
|
||||||
}
|
|
||||||
// TODO: We should also ensure image features and format is the same
|
|
||||||
|
|
||||||
// found a volume already available, process and return it!
|
|
||||||
poolID, err := util.GetPoolID(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool)
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
vi = util.CSIIdentifier{
|
|
||||||
PoolID: poolID,
|
|
||||||
EncodingVersion: volIDVersion,
|
|
||||||
ClusterID: rbdVol.ClusterID,
|
|
||||||
ObjectUUID: imageUUID,
|
|
||||||
}
|
|
||||||
rbdVol.VolID, err = vi.ComposeCSIID()
|
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
klog.V(4).Infof("Found existng volume (%s) with image name (%s) for request (%s)",
|
|
||||||
rbdVol.VolID, rbdVol.RbdImageName, rbdVol.RequestName)
|
|
||||||
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
unreserveSnap and unreserveVol remove omaps associated with the snapshot and the image name,
|
|
||||||
and also remove the corresponding request name key in the snaps or volumes omaps respectively.
|
|
||||||
|
|
||||||
This is performed within the request name lock, to ensure that requests with the same name do not
|
|
||||||
manipulate the omap entries concurrently.
|
|
||||||
*/
|
|
||||||
func unreserveSnap(rbdSnap *rbdSnapshot, credentials map[string]string) error {
|
|
||||||
key, err := getKey(rbdSnap.AdminID, credentials)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// delete snap image omap (first, inverse of create order)
|
|
||||||
snapUUID := strings.TrimPrefix(rbdSnap.RbdSnapName, rbdSnapNamePrefix)
|
|
||||||
err = util.RemoveObject(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool, rbdSnapOMapPrefix+snapUUID)
|
|
||||||
if err != nil {
|
|
||||||
if _, ok := err.(util.ErrObjectNotFound); !ok {
|
|
||||||
klog.Errorf("failed removing oMap %s (%s)", rbdSnapOMapPrefix+snapUUID, err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// delete the request name omap key (last, inverse of create order)
|
|
||||||
err = util.RemoveOMapKey(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool,
|
|
||||||
csiSnapsDirectory, csiSnapNameKeyPrefix+rbdSnap.RequestName)
|
|
||||||
if err != nil {
|
|
||||||
klog.Errorf("failed removing oMap key %s (%s)", csiSnapNameKeyPrefix+rbdSnap.RequestName, err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func unreserveVol(rbdVol *rbdVolume, credentials map[string]string) error {
|
|
||||||
key, err := getKey(rbdVol.AdminID, credentials)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// delete image omap (first, inverse of create order)
|
|
||||||
imageUUID := strings.TrimPrefix(rbdVol.RbdImageName, rbdImgNamePrefix)
|
|
||||||
err = util.RemoveObject(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool, rbdImageOMapPrefix+imageUUID)
|
|
||||||
if err != nil {
|
|
||||||
if _, ok := err.(util.ErrObjectNotFound); !ok {
|
|
||||||
klog.Errorf("failed removing oMap %s (%s)", rbdImageOMapPrefix+imageUUID, err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// delete the request name omap key (last, inverse of create order)
|
|
||||||
err = util.RemoveOMapKey(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool,
|
|
||||||
csiVolsDirectory, csiVolNameKeyPrefix+rbdVol.RequestName)
|
|
||||||
if err != nil {
|
|
||||||
klog.Errorf("failed removing oMap key %s (%s)", csiVolNameKeyPrefix+rbdVol.RequestName, err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// reserveOMapName creates an omap with passed in oMapNamePrefix and a generated <uuid>.
|
|
||||||
// It ensures generated omap name does not already exist and if conflicts are detected, a set
|
|
||||||
// number of retires with newer uuids are attempted before returning an error
|
|
||||||
func reserveOMapName(monitors, adminID, key, poolName, oMapNamePrefix string) (string, error) {
|
|
||||||
var iterUUID string
|
|
||||||
|
|
||||||
maxAttempts := 5
|
|
||||||
attempt := 1
|
|
||||||
for attempt <= maxAttempts {
|
|
||||||
// generate a uuid for the image name
|
|
||||||
iterUUID = uuid.NewUUID().String()
|
|
||||||
|
|
||||||
err := util.CreateObject(monitors, adminID, key, poolName, oMapNamePrefix+iterUUID)
|
|
||||||
if err != nil {
|
|
||||||
if _, ok := err.(util.ErrObjectExists); ok {
|
|
||||||
attempt++
|
|
||||||
// try again with a different uuid, for maxAttempts tries
|
|
||||||
klog.V(4).Infof("uuid (%s) conflict detected, retrying (attempt %d of %d)",
|
|
||||||
iterUUID, attempt, maxAttempts)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
if attempt > maxAttempts {
|
|
||||||
return "", errors.New("uuid conflicts exceeds retry threshold")
|
|
||||||
}
|
|
||||||
|
|
||||||
return iterUUID, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
reserveSnap and reserveVol add respective entries to the volumes and snapshots omaps, post
|
|
||||||
generating a target snapshot or image name for use. Further, these functions create the snapshot or
|
|
||||||
image name omaps, to store back pointers to the CSI generated request names.
|
|
||||||
|
|
||||||
This is performed within the request name lock, to ensure that requests with the same name do not
|
|
||||||
manipulate the omap entries concurrently.
|
|
||||||
*/
|
|
||||||
func reserveSnap(rbdSnap *rbdSnapshot, credentials map[string]string) error {
|
|
||||||
var vi util.CSIIdentifier
|
|
||||||
|
|
||||||
key, err := getKey(rbdSnap.AdminID, credentials)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
poolID, err := util.GetPoolID(rbdSnap.Monitors, rbdSnap.AdminID, key,
|
|
||||||
rbdSnap.Pool)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create the snapUUID based omap first, to reserve the same and avoid conflicts
|
|
||||||
// NOTE: If any service loss occurs post creation of the snap omap, and before
|
|
||||||
// setting the omap key (rbdSnapCSISnapNameKey) to point back to the snaps omap, the
|
|
||||||
// snap omap key will leak
|
|
||||||
snapUUID, err := reserveOMapName(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool,
|
|
||||||
rbdSnapOMapPrefix)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create request snapUUID key in csi snaps omap and store the uuid based
|
|
||||||
// snap name into it
|
|
||||||
err = util.SetOMapKeyValue(rbdSnap.Monitors, rbdSnap.AdminID, key,
|
|
||||||
rbdSnap.Pool, csiSnapsDirectory, csiSnapNameKeyPrefix+rbdSnap.RequestName, snapUUID)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if err != nil {
|
|
||||||
klog.Warningf("reservation failed for volume: %s", rbdSnap.RequestName)
|
|
||||||
errDefer := unreserveSnap(rbdSnap, credentials)
|
|
||||||
if errDefer != nil {
|
|
||||||
klog.Warningf("failed undoing reservation of snapshot: %s (%v)",
|
|
||||||
rbdSnap.RequestName, errDefer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Create snap name based omap and store CSI request name key and source information
|
|
||||||
err = util.SetOMapKeyValue(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool,
|
|
||||||
rbdSnapOMapPrefix+snapUUID, rbdSnapCSISnapNameKey, rbdSnap.RequestName)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = util.SetOMapKeyValue(rbdSnap.Monitors, rbdSnap.AdminID, key, rbdSnap.Pool,
|
|
||||||
rbdSnapOMapPrefix+snapUUID, rbdSnapSourceImageKey, rbdSnap.RbdImageName)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// generate the volume ID to return to the CO system
|
|
||||||
vi = util.CSIIdentifier{
|
|
||||||
PoolID: poolID,
|
|
||||||
EncodingVersion: volIDVersion,
|
|
||||||
ClusterID: rbdSnap.ClusterID,
|
|
||||||
ObjectUUID: snapUUID,
|
|
||||||
}
|
|
||||||
rbdSnap.SnapID, err = vi.ComposeCSIID()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
rbdSnap.RbdSnapName = rbdSnapNamePrefix + snapUUID
|
|
||||||
klog.V(4).Infof("Generated Volume ID (%s) and image name (%s) for request name (%s)",
|
|
||||||
rbdSnap.SnapID, rbdSnap.RbdImageName, rbdSnap.RequestName)
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func reserveVol(rbdVol *rbdVolume, credentials map[string]string) error {
|
|
||||||
var vi util.CSIIdentifier
|
|
||||||
|
|
||||||
key, err := getKey(rbdVol.AdminID, credentials)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
poolID, err := util.GetPoolID(rbdVol.Monitors, rbdVol.AdminID, key,
|
|
||||||
rbdVol.Pool)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create the imageUUID based omap first, to reserve the same and avoid conflicts
|
|
||||||
// NOTE: If any service loss occurs post creation of the image omap, and before
|
|
||||||
// setting the omap key (rbdImageCSIVolNameKey) to point back to the volumes omap,
|
|
||||||
// the image omap key will leak
|
|
||||||
imageUUID, err := reserveOMapName(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool, rbdImageOMapPrefix)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create request volName key in csi volumes omap and store the uuid based
|
|
||||||
// image name into it
|
|
||||||
err = util.SetOMapKeyValue(rbdVol.Monitors, rbdVol.AdminID, key,
|
|
||||||
rbdVol.Pool, csiVolsDirectory, csiVolNameKeyPrefix+rbdVol.RequestName, imageUUID)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if err != nil {
|
|
||||||
klog.Warningf("reservation failed for volume: %s", rbdVol.RequestName)
|
|
||||||
errDefer := unreserveVol(rbdVol, credentials)
|
|
||||||
if errDefer != nil {
|
|
||||||
klog.Warningf("failed undoing reservation of volume: %s (%v)",
|
|
||||||
rbdVol.RequestName, errDefer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Create image name based omap and store CSI request volume name key and data
|
|
||||||
err = util.SetOMapKeyValue(rbdVol.Monitors, rbdVol.AdminID, key, rbdVol.Pool,
|
|
||||||
rbdImageOMapPrefix+imageUUID, rbdImageCSIVolNameKey, rbdVol.RequestName)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// generate the volume ID to return to the CO system
|
|
||||||
vi = util.CSIIdentifier{
|
|
||||||
PoolID: poolID,
|
|
||||||
EncodingVersion: volIDVersion,
|
|
||||||
ClusterID: rbdVol.ClusterID,
|
|
||||||
ObjectUUID: imageUUID,
|
|
||||||
}
|
|
||||||
rbdVol.VolID, err = vi.ComposeCSIID()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
rbdVol.RbdImageName = rbdImgNamePrefix + imageUUID
|
|
||||||
klog.V(4).Infof("Generated Volume ID (%s) and image name (%s) for request name (%s)",
|
|
||||||
rbdVol.VolID, rbdVol.RbdImageName, rbdVol.RequestName)
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
@ -164,7 +164,8 @@ func GetOMapValue(monitors, adminID, key, poolName, oMapName, oMapKey string) (s
|
|||||||
"-p", poolName,
|
"-p", poolName,
|
||||||
"getomapval", oMapName, oMapKey, tmpFile.Name())
|
"getomapval", oMapName, oMapKey, tmpFile.Name())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// no logs, as attempting to check for key/value is done even on regular call sequences
|
// no logs, as attempting to check for non-existent key/value is done even on
|
||||||
|
// regular call sequences
|
||||||
stdoutanderr := strings.Join([]string{string(stdout), string(stderr)}, " ")
|
stdoutanderr := strings.Join([]string{string(stdout), string(stderr)}, " ")
|
||||||
if strings.Contains(stdoutanderr, "No such key: "+poolName+"/"+oMapName+"/"+oMapKey) {
|
if strings.Contains(stdoutanderr, "No such key: "+poolName+"/"+oMapName+"/"+oMapKey) {
|
||||||
return "", ErrKeyNotFound{poolName + "/" + oMapName + "/" + oMapKey, err}
|
return "", ErrKeyNotFound{poolName + "/" + oMapName + "/" + oMapKey, err}
|
||||||
@ -175,6 +176,10 @@ func GetOMapValue(monitors, adminID, key, poolName, oMapName, oMapKey string) (s
|
|||||||
return "", ErrKeyNotFound{poolName + "/" + oMapName + "/" + oMapKey, err}
|
return "", ErrKeyNotFound{poolName + "/" + oMapName + "/" + oMapKey, err}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// log other errors for troubleshooting assistance
|
||||||
|
klog.Errorf("failed getting omap value for key (%s) from omap (%s) in pool (%s): (%v)",
|
||||||
|
oMapKey, oMapName, poolName, err)
|
||||||
|
|
||||||
return "", fmt.Errorf("error (%v) occured, command output streams is (%s)",
|
return "", fmt.Errorf("error (%v) occured, command output streams is (%s)",
|
||||||
err.Error(), stdoutanderr)
|
err.Error(), stdoutanderr)
|
||||||
}
|
}
|
||||||
|
@ -45,3 +45,14 @@ type ErrObjectNotFound struct {
|
|||||||
func (e ErrObjectNotFound) Error() string {
|
func (e ErrObjectNotFound) Error() string {
|
||||||
return e.err.Error()
|
return e.err.Error()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ErrSnapNameConflict is generated when a requested CSI snap name already exists on RBD but with
|
||||||
|
// different properties, and hence is in conflict with the passed in CSI volume name
|
||||||
|
type ErrSnapNameConflict struct {
|
||||||
|
requestName string
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e ErrSnapNameConflict) Error() string {
|
||||||
|
return e.err.Error()
|
||||||
|
}
|
||||||
|
@ -91,3 +91,24 @@ func ValidateDriverName(driverName string) error {
|
|||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GenerateVolID generates a volume ID based on passed in parameters and version, to be returned
|
||||||
|
// to the CO system
|
||||||
|
func GenerateVolID(monitors, id, key, pool, clusterID, objUUID string, volIDVersion uint16) (string, error) {
|
||||||
|
poolID, err := GetPoolID(monitors, id, key, pool)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// generate the volume ID to return to the CO system
|
||||||
|
vi := CSIIdentifier{
|
||||||
|
PoolID: poolID,
|
||||||
|
EncodingVersion: volIDVersion,
|
||||||
|
ClusterID: clusterID,
|
||||||
|
ObjectUUID: objUUID,
|
||||||
|
}
|
||||||
|
|
||||||
|
volID, err := vi.ComposeCSIID()
|
||||||
|
|
||||||
|
return volID, err
|
||||||
|
}
|
||||||
|
391
pkg/util/voljournal.go
Normal file
391
pkg/util/voljournal.go
Normal file
@ -0,0 +1,391 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2019 The Ceph-CSI Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package util
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/pborman/uuid"
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"k8s.io/klog"
|
||||||
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
RADOS omaps usage:
|
||||||
|
|
||||||
|
This note details how we preserve idempotent nature of create requests and retain the relationship
|
||||||
|
between orchestrator (CO) generated names and plugin generated names for volumes and snapshots.
|
||||||
|
|
||||||
|
NOTE: volume denotes an rbd image or a CephFS subvolume
|
||||||
|
|
||||||
|
The implementation uses Ceph RADOS omaps to preserve the relationship between request name and
|
||||||
|
generated volume (or snapshot) name. There are 4 types of omaps in use,
|
||||||
|
- A "csi.volumes.[csi-id]" (or "csi.volumes"+.+CSIInstanceID), (referred to using csiDirectory variable)
|
||||||
|
- stores keys named using the CO generated names for volume requests (prefixed with csiNameKeyPrefix)
|
||||||
|
- keys are named "csi.volume."+[CO generated VolName]
|
||||||
|
- Key value contains the volume uuid that is created, for the CO provided name
|
||||||
|
|
||||||
|
- A "csi.snaps.[csi-id]" (or "csi.snaps"+.+CSIInstanceID), (referred to using csiDirectory variable)
|
||||||
|
- stores keys named using the CO generated names for snapshot requests (prefixed with csiNameKeyPrefix)
|
||||||
|
- keys are named "csi.snap."+[CO generated SnapName]
|
||||||
|
- Key value contains the snapshot uuid that is created, for the CO provided name
|
||||||
|
|
||||||
|
- A per volume omap named "csi.volume."+[volume uuid], (referred to as CephUUIDDirectory)
|
||||||
|
- stores a single key named "csi.volname", that has the value of the CO generated VolName that
|
||||||
|
this volume refers to (referred to using csiNameKey value)
|
||||||
|
|
||||||
|
- A per snapshot omap named "rbd.csi.snap."+[RBD snapshot uuid], (referred to as CephUUIDDirectory)
|
||||||
|
- stores a key named "csi.snapname", that has the value of the CO generated SnapName that this
|
||||||
|
snapshot refers to (referred to using csiNameKey value)
|
||||||
|
- also stores another key named "csi.source", that has the value of the volume name that is the
|
||||||
|
source of the snapshot (referred to using cephSnapSourceKey value)
|
||||||
|
|
||||||
|
Creation of omaps:
|
||||||
|
When a volume create request is received (or a snapshot create, the snapshot is not detailed in this
|
||||||
|
comment further as the process is similar),
|
||||||
|
- The csiDirectory is consulted to find if there is already a key with the CO VolName, and if present,
|
||||||
|
it is used to read its references to reach the UUID that backs this VolName, to check if the
|
||||||
|
UUID based volume can satisfy the requirements for the request
|
||||||
|
- If during the process of checking the same, it is found that some linking information is stale
|
||||||
|
or missing, the corresponding keys upto the key in the csiDirectory is cleaned up, to start afresh
|
||||||
|
|
||||||
|
- If the key with the CO VolName is not found, or was cleaned up, the request is treated as a
|
||||||
|
new create request, and an CephUUIDDirectory is created first with a generated uuid, this ensures
|
||||||
|
that we do not use a uuid that is already in use
|
||||||
|
|
||||||
|
- Next, a key with the VolName is created in the csiDirectory, and its value is updated to store the
|
||||||
|
generated uuid
|
||||||
|
|
||||||
|
- This is followed by updating the CephUUIDDirectory with the VolName in the csiNameKey
|
||||||
|
|
||||||
|
- Finally, the volume is created (or promoted from a snapshot, if content source was provided),
|
||||||
|
using the uuid and a corresponding name prefix (namingPrefix) as the volume name
|
||||||
|
|
||||||
|
The entire operation is locked based on VolName hash, to ensure there is only ever a single entity
|
||||||
|
modifying the related omaps for a given VolName.
|
||||||
|
|
||||||
|
This ensures idempotent nature of creates, as the same CO generated VolName would attempt to use
|
||||||
|
the same volume uuid to serve the request, as the relations are saved in the respective omaps.
|
||||||
|
|
||||||
|
Deletion of omaps:
|
||||||
|
Delete requests would not contain the VolName, hence deletion uses the volume ID, which is encoded
|
||||||
|
with the volume uuid in it, to find the volume and the CephUUIDDirectory. The CephUUIDDirectory is
|
||||||
|
read to get the VolName that this image points to. This VolName can be further used to read and
|
||||||
|
delete the key from the csiDirectory.
|
||||||
|
|
||||||
|
As we trace back and find the VolName, we also take a hash based lock on the VolName before
|
||||||
|
proceeding with deleting the volume and the related omap entries, to ensure there is only ever a
|
||||||
|
single entity modifying the related omaps for a given VolName.
|
||||||
|
*/
|
||||||
|
|
||||||
|
type CSIJournal struct {
|
||||||
|
// csiDirectory is the name of the CSI volumes object map that contains CSI volume-name (or
|
||||||
|
// snapshot name) based keys
|
||||||
|
csiDirectory string
|
||||||
|
|
||||||
|
// CSI volume-name keyname prefix, for key in csiDirectory, suffix is the CSI passed volume name
|
||||||
|
csiNameKeyPrefix string
|
||||||
|
|
||||||
|
// Per Ceph volume (RBD/FS-subvolume) object map name prefix, suffix is the generated volume uuid
|
||||||
|
cephUUIDDirectoryPrefix string
|
||||||
|
|
||||||
|
// CSI volume-name key in per Ceph volume object map, containing CSI volume-name for which the
|
||||||
|
// Ceph volume was created
|
||||||
|
csiNameKey string
|
||||||
|
|
||||||
|
// source volume name key in per Ceph snapshot object map, containing Ceph source volume uuid
|
||||||
|
// for which the snapshot was created
|
||||||
|
cephSnapSourceKey string
|
||||||
|
|
||||||
|
// volume name prefix for naming on Ceph rbd or FS, suffix is a uuid generated per volume
|
||||||
|
namingPrefix string
|
||||||
|
}
|
||||||
|
|
||||||
|
// CSIVolumeJournal returns an instance of volume keys
|
||||||
|
func NewCSIVolumeJournal() *CSIJournal {
|
||||||
|
return &CSIJournal{
|
||||||
|
csiDirectory: "csi.volumes",
|
||||||
|
csiNameKeyPrefix: "csi.volume.",
|
||||||
|
cephUUIDDirectoryPrefix: "csi.volume.",
|
||||||
|
csiNameKey: "csi.volname",
|
||||||
|
namingPrefix: "csi-vol-",
|
||||||
|
cephSnapSourceKey: "",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CSISnapshotSnapshot returns an instance of snapshot keys
|
||||||
|
func NewCSISnapshotJournal() *CSIJournal {
|
||||||
|
return &CSIJournal{
|
||||||
|
csiDirectory: "csi.snaps",
|
||||||
|
csiNameKeyPrefix: "csi.snap.",
|
||||||
|
cephUUIDDirectoryPrefix: "csi.snap.",
|
||||||
|
csiNameKey: "csi.snapname",
|
||||||
|
namingPrefix: "csi-snap-",
|
||||||
|
cephSnapSourceKey: "csi.source",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NamingPrefix returns the value of naming prefix from the journal keys
|
||||||
|
func (cj *CSIJournal) NamingPrefix() string {
|
||||||
|
return cj.namingPrefix
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetCSIDirectorySuffix sets the given suffix for the csiDirectory omap
|
||||||
|
func (cj *CSIJournal) SetCSIDirectorySuffix(suffix string) {
|
||||||
|
cj.csiDirectory = cj.csiDirectory + "." + suffix
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
CheckReservation checks if given request name contains a valid reservation
|
||||||
|
- If there is a valid reservation, then the corresponding UUID for the volume/snapshot is returned
|
||||||
|
- If there is a reservation that is stale (or not fully cleaned up), it is garbage collected using
|
||||||
|
the UndoReservation call, as appropriate
|
||||||
|
- If a snapshot is being checked, then its source is matched to the parentName that is provided
|
||||||
|
|
||||||
|
NOTE: As the function manipulates omaps, it should be called with a lock against the request name
|
||||||
|
held, to prevent parallel operations from modifying the state of the omaps for this request name.
|
||||||
|
|
||||||
|
Return values:
|
||||||
|
- string: Contains the UUID that was reserved for the passed in reqName, empty if
|
||||||
|
there was no reservation found
|
||||||
|
- error: non-nil in case of any errors
|
||||||
|
*/
|
||||||
|
func (cj *CSIJournal) CheckReservation(monitors, id, key, pool, reqName, parentName string) (string, error) {
|
||||||
|
var snapSource bool
|
||||||
|
|
||||||
|
if parentName != "" {
|
||||||
|
if cj.cephSnapSourceKey == "" {
|
||||||
|
err := errors.New("invalid request, cephSnapSourceKey is nil")
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
snapSource = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if request name is already part of the directory omap
|
||||||
|
objUUID, err := GetOMapValue(monitors, id, key, pool, cj.csiDirectory,
|
||||||
|
cj.csiNameKeyPrefix+reqName)
|
||||||
|
if err != nil {
|
||||||
|
// error should specifically be not found, for volume to be absent, any other error
|
||||||
|
// is not conclusive, and we should not proceed
|
||||||
|
if _, ok := err.(ErrKeyNotFound); ok {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
savedReqName, savedReqParentName, err := cj.GetObjectUUIDData(monitors, id, key, pool,
|
||||||
|
objUUID, snapSource)
|
||||||
|
if err != nil {
|
||||||
|
// error should specifically be not found, for image to be absent, any other error
|
||||||
|
// is not conclusive, and we should not proceed
|
||||||
|
if _, ok := err.(ErrKeyNotFound); ok {
|
||||||
|
err = cj.UndoReservation(monitors, id, key, pool, cj.namingPrefix+objUUID, reqName)
|
||||||
|
}
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if UUID key points back to the request name
|
||||||
|
if savedReqName != reqName {
|
||||||
|
// NOTE: This should never be possible, hence no cleanup, but log error
|
||||||
|
// and return, as cleanup may need to occur manually!
|
||||||
|
return "", fmt.Errorf("internal state inconsistent, omap names mismatch,"+
|
||||||
|
" request name (%s) volume UUID (%s) volume omap name (%s)",
|
||||||
|
reqName, objUUID, savedReqName)
|
||||||
|
}
|
||||||
|
|
||||||
|
if snapSource {
|
||||||
|
// check if source UUID key points back to the parent volume passed in
|
||||||
|
if savedReqParentName != parentName {
|
||||||
|
// NOTE: This can happen if there is a snapname conflict, and we already have a snapshot
|
||||||
|
// with the same name pointing to a different UUID as the source
|
||||||
|
err = fmt.Errorf("snapname points to different volume, request name (%s)"+
|
||||||
|
" source name (%s) saved source name (%s)",
|
||||||
|
reqName, parentName, savedReqParentName)
|
||||||
|
return "", ErrSnapNameConflict{reqName, err}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return objUUID, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
UndoReservation undoes a reservation, in the reverse order of ReserveName
|
||||||
|
- The UUID directory is cleaned up before the VolName key in the csiDirectory is cleaned up
|
||||||
|
|
||||||
|
NOTE: Ensure that the Ceph volume (image or FS subvolume) backing the reservation is cleaned up
|
||||||
|
prior to cleaning up the reservation
|
||||||
|
|
||||||
|
NOTE: As the function manipulates omaps, it should be called with a lock against the request name
|
||||||
|
held, to prevent parallel operations from modifying the state of the omaps for this request name.
|
||||||
|
*/
|
||||||
|
func (cj *CSIJournal) UndoReservation(monitors, id, key, pool, volName, reqName string) error {
|
||||||
|
// delete volume UUID omap (first, inverse of create order)
|
||||||
|
// TODO: Check cases where volName can be empty, and we need to just cleanup the reqName
|
||||||
|
imageUUID := strings.TrimPrefix(volName, cj.namingPrefix)
|
||||||
|
err := RemoveObject(monitors, id, key, pool, cj.cephUUIDDirectoryPrefix+imageUUID)
|
||||||
|
if err != nil {
|
||||||
|
if _, ok := err.(ErrObjectNotFound); !ok {
|
||||||
|
klog.Errorf("failed removing oMap %s (%s)", cj.cephUUIDDirectoryPrefix+imageUUID, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// delete the request name key (last, inverse of create order)
|
||||||
|
err = RemoveOMapKey(monitors, id, key, pool, cj.csiDirectory,
|
||||||
|
cj.csiNameKeyPrefix+reqName)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("failed removing oMap key %s (%s)", cj.csiNameKeyPrefix+reqName, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// reserveOMapName creates an omap with passed in oMapNamePrefix and a generated <uuid>.
|
||||||
|
// It ensures generated omap name does not already exist and if conflicts are detected, a set
|
||||||
|
// number of retires with newer uuids are attempted before returning an error
|
||||||
|
func reserveOMapName(monitors, id, key, pool, oMapNamePrefix string) (string, error) {
|
||||||
|
var iterUUID string
|
||||||
|
|
||||||
|
maxAttempts := 5
|
||||||
|
attempt := 1
|
||||||
|
for attempt <= maxAttempts {
|
||||||
|
// generate a uuid for the image name
|
||||||
|
iterUUID = uuid.NewUUID().String()
|
||||||
|
|
||||||
|
err := CreateObject(monitors, id, key, pool, oMapNamePrefix+iterUUID)
|
||||||
|
if err != nil {
|
||||||
|
if _, ok := err.(ErrObjectExists); ok {
|
||||||
|
attempt++
|
||||||
|
// try again with a different uuid, for maxAttempts tries
|
||||||
|
klog.V(4).Infof("uuid (%s) conflict detected, retrying (attempt %d of %d)",
|
||||||
|
iterUUID, attempt, maxAttempts)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return iterUUID, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", errors.New("uuid conflicts exceeds retry threshold")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
ReserveName adds respective entries to the csiDirectory omaps, post generating a target
|
||||||
|
UUIDDirectory for use. Further, these functions update the UUIDDirectory omaps, to store back
|
||||||
|
pointers to the CSI generated request names.
|
||||||
|
|
||||||
|
NOTE: As the function manipulates omaps, it should be called with a lock against the request name
|
||||||
|
held, to prevent parallel operations from modifying the state of the omaps for this request name.
|
||||||
|
|
||||||
|
Return values:
|
||||||
|
- string: Contains the UUID that was reserved for the passed in reqName
|
||||||
|
- error: non-nil in case of any errors
|
||||||
|
*/
|
||||||
|
func (cj *CSIJournal) ReserveName(monitors, id, key, pool, reqName, parentName string) (string, error) {
|
||||||
|
var snapSource bool
|
||||||
|
|
||||||
|
if parentName != "" {
|
||||||
|
if cj.cephSnapSourceKey == "" {
|
||||||
|
err := errors.New("invalid request, cephSnapSourceKey is nil")
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
snapSource = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the UUID based omap first, to reserve the same and avoid conflicts
|
||||||
|
// NOTE: If any service loss occurs post creation of the UUID directory, and before
|
||||||
|
// setting the request name key (csiNameKey) to point back to the UUID directory, the
|
||||||
|
// UUID directory key will be leaked
|
||||||
|
volUUID, err := reserveOMapName(monitors, id, key, pool, cj.cephUUIDDirectoryPrefix)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create request name (csiNameKey) key in csiDirectory and store the UUId based
|
||||||
|
// volume name into it
|
||||||
|
err = SetOMapKeyValue(monitors, id, key, pool, cj.csiDirectory, cj.csiNameKeyPrefix+reqName,
|
||||||
|
volUUID)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
klog.Warningf("reservation failed for volume: %s", reqName)
|
||||||
|
errDefer := cj.UndoReservation(monitors, id, key, pool, cj.namingPrefix+volUUID,
|
||||||
|
reqName)
|
||||||
|
if errDefer != nil {
|
||||||
|
klog.Warningf("failed undoing reservation of volume: %s (%v)", reqName, errDefer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Update UUID directory to store CSI request name
|
||||||
|
err = SetOMapKeyValue(monitors, id, key, pool, cj.cephUUIDDirectoryPrefix+volUUID,
|
||||||
|
cj.csiNameKey, reqName)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if snapSource {
|
||||||
|
// Update UUID directory to store source volume UUID in case of snapshots
|
||||||
|
err = SetOMapKeyValue(monitors, id, key, pool, cj.cephUUIDDirectoryPrefix+volUUID,
|
||||||
|
cj.cephSnapSourceKey, parentName)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return volUUID, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
GetObjectUUIDData fetches all keys from a UUID directory
|
||||||
|
Return values:
|
||||||
|
- string: Contains the request name for the passed in UUID
|
||||||
|
- string: Contains the parent image name for the passed in UUID, if it is a snapshot
|
||||||
|
- error: non-nil in case of any errors
|
||||||
|
*/
|
||||||
|
func (cj *CSIJournal) GetObjectUUIDData(monitors, id, key, pool, objectUUID string, snapSource bool) (string, string, error) {
|
||||||
|
var sourceName string
|
||||||
|
|
||||||
|
if snapSource && cj.cephSnapSourceKey == "" {
|
||||||
|
err := errors.New("invalid request, cephSnapSourceKey is nil")
|
||||||
|
return "", "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: fetch all omap vals in one call, than make multiple listomapvals
|
||||||
|
requestName, err := GetOMapValue(monitors, id, key, pool,
|
||||||
|
cj.cephUUIDDirectoryPrefix+objectUUID, cj.csiNameKey)
|
||||||
|
if err != nil {
|
||||||
|
return "", "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if snapSource {
|
||||||
|
sourceName, err = GetOMapValue(monitors, id, key, pool,
|
||||||
|
cj.cephUUIDDirectoryPrefix+objectUUID, cj.cephSnapSourceKey)
|
||||||
|
if err != nil {
|
||||||
|
return "", "", err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return requestName, sourceName, nil
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user