2019-05-14 19:15:01 +00:00
|
|
|
/*
|
|
|
|
Copyright 2019 The Ceph-CSI Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2020-04-23 18:22:55 +00:00
|
|
|
package journal
|
2019-05-14 19:15:01 +00:00
|
|
|
|
|
|
|
import (
|
2019-08-22 17:19:06 +00:00
|
|
|
"context"
|
2020-01-24 16:26:56 +00:00
|
|
|
"encoding/binary"
|
|
|
|
"encoding/hex"
|
2019-05-14 19:15:01 +00:00
|
|
|
"fmt"
|
2020-01-24 16:26:56 +00:00
|
|
|
"strings"
|
2019-05-14 19:15:01 +00:00
|
|
|
|
2020-04-23 18:22:55 +00:00
|
|
|
"github.com/ceph/ceph-csi/internal/util"
|
|
|
|
|
2019-05-14 19:15:01 +00:00
|
|
|
"github.com/pborman/uuid"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
"k8s.io/klog"
|
|
|
|
)
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// Length of string representation of uuid, xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx is 36 bytes
|
|
|
|
const uuidEncodedLength = 36
|
|
|
|
|
2019-05-14 19:15:01 +00:00
|
|
|
/*
|
|
|
|
RADOS omaps usage:
|
|
|
|
|
|
|
|
This note details how we preserve idempotent nature of create requests and retain the relationship
|
|
|
|
between orchestrator (CO) generated names and plugin generated names for volumes and snapshots.
|
|
|
|
|
|
|
|
NOTE: volume denotes an rbd image or a CephFS subvolume
|
|
|
|
|
|
|
|
The implementation uses Ceph RADOS omaps to preserve the relationship between request name and
|
|
|
|
generated volume (or snapshot) name. There are 4 types of omaps in use,
|
|
|
|
- A "csi.volumes.[csi-id]" (or "csi.volumes"+.+CSIInstanceID), (referred to using csiDirectory variable)
|
|
|
|
- stores keys named using the CO generated names for volume requests (prefixed with csiNameKeyPrefix)
|
|
|
|
- keys are named "csi.volume."+[CO generated VolName]
|
|
|
|
- Key value contains the volume uuid that is created, for the CO provided name
|
|
|
|
|
|
|
|
- A "csi.snaps.[csi-id]" (or "csi.snaps"+.+CSIInstanceID), (referred to using csiDirectory variable)
|
|
|
|
- stores keys named using the CO generated names for snapshot requests (prefixed with csiNameKeyPrefix)
|
|
|
|
- keys are named "csi.snap."+[CO generated SnapName]
|
|
|
|
- Key value contains the snapshot uuid that is created, for the CO provided name
|
|
|
|
|
|
|
|
- A per volume omap named "csi.volume."+[volume uuid], (referred to as CephUUIDDirectory)
|
2020-02-24 13:19:42 +00:00
|
|
|
- stores the key named "csi.volname", that has the value of the CO generated VolName that
|
2019-05-14 19:15:01 +00:00
|
|
|
this volume refers to (referred to using csiNameKey value)
|
2020-02-24 13:19:42 +00:00
|
|
|
- stores the key named "csi.imagename", that has the value of the Ceph RBD image name
|
|
|
|
this volume refers to (referred to using csiImageKey value)
|
2019-05-14 19:15:01 +00:00
|
|
|
|
|
|
|
- A per snapshot omap named "rbd.csi.snap."+[RBD snapshot uuid], (referred to as CephUUIDDirectory)
|
|
|
|
- stores a key named "csi.snapname", that has the value of the CO generated SnapName that this
|
|
|
|
snapshot refers to (referred to using csiNameKey value)
|
2020-02-24 13:19:42 +00:00
|
|
|
- stores the key named "csi.imagename", that has the value of the Ceph RBD image name
|
|
|
|
this snapshot refers to (referred to using csiImageKey value)
|
|
|
|
- stores a key named "csi.source", that has the value of the volume name that is the
|
2019-05-14 19:15:01 +00:00
|
|
|
source of the snapshot (referred to using cephSnapSourceKey value)
|
|
|
|
|
|
|
|
Creation of omaps:
|
|
|
|
When a volume create request is received (or a snapshot create, the snapshot is not detailed in this
|
|
|
|
comment further as the process is similar),
|
|
|
|
- The csiDirectory is consulted to find if there is already a key with the CO VolName, and if present,
|
|
|
|
it is used to read its references to reach the UUID that backs this VolName, to check if the
|
|
|
|
UUID based volume can satisfy the requirements for the request
|
|
|
|
- If during the process of checking the same, it is found that some linking information is stale
|
|
|
|
or missing, the corresponding keys upto the key in the csiDirectory is cleaned up, to start afresh
|
|
|
|
|
|
|
|
- If the key with the CO VolName is not found, or was cleaned up, the request is treated as a
|
|
|
|
new create request, and an CephUUIDDirectory is created first with a generated uuid, this ensures
|
|
|
|
that we do not use a uuid that is already in use
|
|
|
|
|
|
|
|
- Next, a key with the VolName is created in the csiDirectory, and its value is updated to store the
|
|
|
|
generated uuid
|
|
|
|
|
2020-02-24 13:19:42 +00:00
|
|
|
- This is followed by updating the CephUUIDDirectory with the VolName in the csiNameKey and the RBD image
|
|
|
|
name in the csiImageKey
|
2019-05-14 19:15:01 +00:00
|
|
|
|
|
|
|
- Finally, the volume is created (or promoted from a snapshot, if content source was provided),
|
|
|
|
using the uuid and a corresponding name prefix (namingPrefix) as the volume name
|
|
|
|
|
|
|
|
The entire operation is locked based on VolName hash, to ensure there is only ever a single entity
|
|
|
|
modifying the related omaps for a given VolName.
|
|
|
|
|
|
|
|
This ensures idempotent nature of creates, as the same CO generated VolName would attempt to use
|
|
|
|
the same volume uuid to serve the request, as the relations are saved in the respective omaps.
|
|
|
|
|
|
|
|
Deletion of omaps:
|
|
|
|
Delete requests would not contain the VolName, hence deletion uses the volume ID, which is encoded
|
|
|
|
with the volume uuid in it, to find the volume and the CephUUIDDirectory. The CephUUIDDirectory is
|
|
|
|
read to get the VolName that this image points to. This VolName can be further used to read and
|
|
|
|
delete the key from the csiDirectory.
|
|
|
|
|
|
|
|
As we trace back and find the VolName, we also take a hash based lock on the VolName before
|
|
|
|
proceeding with deleting the volume and the related omap entries, to ensure there is only ever a
|
|
|
|
single entity modifying the related omaps for a given VolName.
|
|
|
|
*/
|
|
|
|
|
2020-02-24 13:19:42 +00:00
|
|
|
const (
|
|
|
|
defaultVolumeNamingPrefix string = "csi-vol-"
|
|
|
|
defaultSnapshotNamingPrefix string = "csi-snap-"
|
|
|
|
)
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// CSIJournal defines the interface and the required key names for the above RADOS based OMaps
|
2020-05-14 18:04:13 +00:00
|
|
|
type Config struct {
|
2019-05-14 19:15:01 +00:00
|
|
|
// csiDirectory is the name of the CSI volumes object map that contains CSI volume-name (or
|
|
|
|
// snapshot name) based keys
|
|
|
|
csiDirectory string
|
|
|
|
|
|
|
|
// CSI volume-name keyname prefix, for key in csiDirectory, suffix is the CSI passed volume name
|
|
|
|
csiNameKeyPrefix string
|
|
|
|
|
|
|
|
// Per Ceph volume (RBD/FS-subvolume) object map name prefix, suffix is the generated volume uuid
|
|
|
|
cephUUIDDirectoryPrefix string
|
|
|
|
|
|
|
|
// CSI volume-name key in per Ceph volume object map, containing CSI volume-name for which the
|
|
|
|
// Ceph volume was created
|
|
|
|
csiNameKey string
|
|
|
|
|
2020-02-24 13:19:42 +00:00
|
|
|
// CSI image-name key in per Ceph volume object map, containing RBD image-name
|
|
|
|
// of this Ceph volume
|
|
|
|
csiImageKey string
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// pool ID where csiDirectory is maintained, as it can be different from where the ceph volume
|
|
|
|
// object map is maintained, during topology based provisioning
|
|
|
|
csiJournalPool string
|
|
|
|
|
2019-05-14 19:15:01 +00:00
|
|
|
// source volume name key in per Ceph snapshot object map, containing Ceph source volume uuid
|
|
|
|
// for which the snapshot was created
|
|
|
|
cephSnapSourceKey string
|
|
|
|
|
2019-05-28 19:03:18 +00:00
|
|
|
// namespace in which the RADOS objects are stored, default is no namespace
|
|
|
|
namespace string
|
2020-01-29 11:44:45 +00:00
|
|
|
|
|
|
|
// encryptKMS in which encryption passphrase was saved, default is no encryption
|
|
|
|
encryptKMSKey string
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
2020-02-24 13:19:42 +00:00
|
|
|
// NewCSIVolumeJournal returns an instance of CSIJournal for volumes
|
2020-05-14 18:04:13 +00:00
|
|
|
func NewCSIVolumeJournal(suffix string) *Config {
|
|
|
|
return &Config{
|
2020-05-11 21:21:30 +00:00
|
|
|
csiDirectory: "csi.volumes." + suffix,
|
2019-05-14 19:15:01 +00:00
|
|
|
csiNameKeyPrefix: "csi.volume.",
|
|
|
|
cephUUIDDirectoryPrefix: "csi.volume.",
|
|
|
|
csiNameKey: "csi.volname",
|
2020-02-24 13:19:42 +00:00
|
|
|
csiImageKey: "csi.imagename",
|
2020-01-24 16:26:56 +00:00
|
|
|
csiJournalPool: "csi.journalpool",
|
2019-05-14 19:15:01 +00:00
|
|
|
cephSnapSourceKey: "",
|
2019-05-28 19:03:18 +00:00
|
|
|
namespace: "",
|
2020-01-29 11:44:45 +00:00
|
|
|
encryptKMSKey: "csi.volume.encryptKMS",
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-24 13:19:42 +00:00
|
|
|
// NewCSISnapshotJournal returns an instance of CSIJournal for snapshots
|
2020-05-14 18:04:13 +00:00
|
|
|
func NewCSISnapshotJournal(suffix string) *Config {
|
|
|
|
return &Config{
|
2020-05-11 21:21:30 +00:00
|
|
|
csiDirectory: "csi.snaps." + suffix,
|
2019-05-14 19:15:01 +00:00
|
|
|
csiNameKeyPrefix: "csi.snap.",
|
|
|
|
cephUUIDDirectoryPrefix: "csi.snap.",
|
|
|
|
csiNameKey: "csi.snapname",
|
2020-02-24 13:19:42 +00:00
|
|
|
csiImageKey: "csi.imagename",
|
2020-01-24 16:26:56 +00:00
|
|
|
csiJournalPool: "csi.journalpool",
|
2019-05-14 19:15:01 +00:00
|
|
|
cephSnapSourceKey: "csi.source",
|
2019-05-28 19:03:18 +00:00
|
|
|
namespace: "",
|
2020-01-29 11:44:45 +00:00
|
|
|
encryptKMSKey: "csi.volume.encryptKMS",
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-12 13:22:07 +00:00
|
|
|
// NewCSIVolumeJournalWithNamespace returns an instance of CSIJournal for
|
|
|
|
// volumes using a predetermined namespace value.
|
2020-05-14 18:04:13 +00:00
|
|
|
func NewCSIVolumeJournalWithNamespace(suffix, ns string) *Config {
|
2020-05-12 13:22:07 +00:00
|
|
|
j := NewCSIVolumeJournal(suffix)
|
|
|
|
j.namespace = ns
|
|
|
|
return j
|
|
|
|
}
|
|
|
|
|
2020-02-24 13:19:42 +00:00
|
|
|
// GetNameForUUID returns volume name
|
2020-05-14 18:04:13 +00:00
|
|
|
func (cj *Config) GetNameForUUID(prefix, uid string, isSnapshot bool) string {
|
2020-02-24 13:19:42 +00:00
|
|
|
if prefix == "" {
|
|
|
|
if isSnapshot {
|
|
|
|
prefix = defaultSnapshotNamingPrefix
|
|
|
|
} else {
|
|
|
|
prefix = defaultVolumeNamingPrefix
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return prefix + uid
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// ImageData contains image name and stored CSI properties
|
|
|
|
type ImageData struct {
|
|
|
|
ImageUUID string
|
|
|
|
ImagePool string
|
|
|
|
ImagePoolID int64
|
|
|
|
ImageAttributes *ImageAttributes
|
|
|
|
}
|
|
|
|
|
2020-05-12 21:05:55 +00:00
|
|
|
// Connection represents a real or virtual connection to a ceph cluster
|
|
|
|
// that can make changes to the journal(s).
|
|
|
|
type Connection struct {
|
2020-05-14 18:04:13 +00:00
|
|
|
config *Config
|
2020-05-12 21:05:55 +00:00
|
|
|
// connection metadata
|
|
|
|
monitors string
|
|
|
|
cr *util.Credentials
|
|
|
|
}
|
|
|
|
|
|
|
|
// Connect establishes a new connection to a ceph cluster for journal metadata.
|
2020-05-14 18:04:13 +00:00
|
|
|
func (cj *Config) Connect(monitors string, cr *util.Credentials) (*Connection, error) {
|
2020-05-12 21:05:55 +00:00
|
|
|
conn := &Connection{
|
|
|
|
config: cj,
|
|
|
|
monitors: monitors,
|
|
|
|
cr: cr,
|
|
|
|
}
|
|
|
|
return conn, nil
|
|
|
|
}
|
|
|
|
|
2019-05-14 19:15:01 +00:00
|
|
|
/*
|
|
|
|
CheckReservation checks if given request name contains a valid reservation
|
|
|
|
- If there is a valid reservation, then the corresponding UUID for the volume/snapshot is returned
|
|
|
|
- If there is a reservation that is stale (or not fully cleaned up), it is garbage collected using
|
|
|
|
the UndoReservation call, as appropriate
|
|
|
|
- If a snapshot is being checked, then its source is matched to the parentName that is provided
|
|
|
|
|
|
|
|
NOTE: As the function manipulates omaps, it should be called with a lock against the request name
|
|
|
|
held, to prevent parallel operations from modifying the state of the omaps for this request name.
|
|
|
|
|
|
|
|
Return values:
|
|
|
|
- string: Contains the UUID that was reserved for the passed in reqName, empty if
|
|
|
|
there was no reservation found
|
|
|
|
- error: non-nil in case of any errors
|
|
|
|
*/
|
2020-05-12 21:05:55 +00:00
|
|
|
func (conn *Connection) CheckReservation(ctx context.Context,
|
2020-01-24 16:26:56 +00:00
|
|
|
journalPool, reqName, namePrefix, parentName, kmsConfig string) (*ImageData, error) {
|
|
|
|
var (
|
|
|
|
snapSource bool
|
|
|
|
objUUID string
|
|
|
|
savedImagePool string
|
2020-04-23 18:22:55 +00:00
|
|
|
savedImagePoolID int64 = util.InvalidPoolID
|
2020-05-12 21:05:55 +00:00
|
|
|
cj = conn.config
|
2020-01-24 16:26:56 +00:00
|
|
|
)
|
2019-05-14 19:15:01 +00:00
|
|
|
|
|
|
|
if parentName != "" {
|
|
|
|
if cj.cephSnapSourceKey == "" {
|
|
|
|
err := errors.New("invalid request, cephSnapSourceKey is nil")
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, err
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
snapSource = true
|
|
|
|
}
|
|
|
|
|
|
|
|
// check if request name is already part of the directory omap
|
2020-05-12 21:05:55 +00:00
|
|
|
objUUIDAndPool, err := util.GetOMapValue(ctx, conn.monitors, conn.cr, journalPool, cj.namespace, cj.csiDirectory,
|
2019-05-14 19:15:01 +00:00
|
|
|
cj.csiNameKeyPrefix+reqName)
|
|
|
|
if err != nil {
|
|
|
|
// error should specifically be not found, for volume to be absent, any other error
|
|
|
|
// is not conclusive, and we should not proceed
|
2020-03-19 12:30:59 +00:00
|
|
|
switch err.(type) {
|
2020-04-23 18:22:55 +00:00
|
|
|
case util.ErrKeyNotFound, util.ErrPoolNotFound:
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, nil
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, err
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// check UUID only encoded value
|
|
|
|
if len(objUUIDAndPool) == uuidEncodedLength {
|
|
|
|
objUUID = objUUIDAndPool
|
|
|
|
savedImagePool = journalPool
|
|
|
|
} else { // check poolID/UUID encoding; extract the vol UUID and pool name
|
|
|
|
var buf64 []byte
|
|
|
|
components := strings.Split(objUUIDAndPool, "/")
|
|
|
|
objUUID = components[1]
|
|
|
|
savedImagePoolIDStr := components[0]
|
|
|
|
|
|
|
|
buf64, err = hex.DecodeString(savedImagePoolIDStr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
savedImagePoolID = int64(binary.BigEndian.Uint64(buf64))
|
|
|
|
|
2020-05-12 21:05:55 +00:00
|
|
|
savedImagePool, err = util.GetPoolName(ctx, conn.monitors, conn.cr, savedImagePoolID)
|
2020-01-24 16:26:56 +00:00
|
|
|
if err != nil {
|
2020-04-23 18:22:55 +00:00
|
|
|
if _, ok := err.(util.ErrPoolNotFound); ok {
|
2020-05-12 21:05:55 +00:00
|
|
|
err = conn.UndoReservation(ctx, journalPool, "", "", reqName)
|
2020-01-24 16:26:56 +00:00
|
|
|
}
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-12 21:05:55 +00:00
|
|
|
savedImageAttributes, err := conn.GetImageAttributes(ctx, savedImagePool,
|
2019-05-14 19:15:01 +00:00
|
|
|
objUUID, snapSource)
|
|
|
|
if err != nil {
|
|
|
|
// error should specifically be not found, for image to be absent, any other error
|
|
|
|
// is not conclusive, and we should not proceed
|
2020-04-23 18:22:55 +00:00
|
|
|
if _, ok := err.(util.ErrKeyNotFound); ok {
|
2020-05-12 21:05:55 +00:00
|
|
|
err = conn.UndoReservation(ctx, journalPool, savedImagePool,
|
2020-01-24 16:26:56 +00:00
|
|
|
cj.GetNameForUUID(namePrefix, objUUID, snapSource), reqName)
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, err
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// check if UUID key points back to the request name
|
2020-01-24 16:26:56 +00:00
|
|
|
if savedImageAttributes.RequestName != reqName {
|
2019-05-14 19:15:01 +00:00
|
|
|
// NOTE: This should never be possible, hence no cleanup, but log error
|
|
|
|
// and return, as cleanup may need to occur manually!
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, fmt.Errorf("internal state inconsistent, omap names mismatch,"+
|
2019-05-14 19:15:01 +00:00
|
|
|
" request name (%s) volume UUID (%s) volume omap name (%s)",
|
2020-01-24 16:26:56 +00:00
|
|
|
reqName, objUUID, savedImageAttributes.RequestName)
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
if kmsConfig != "" {
|
|
|
|
if savedImageAttributes.KmsID != kmsConfig {
|
|
|
|
return nil, fmt.Errorf("internal state inconsistent, omap encryption KMS"+
|
2020-01-29 11:44:45 +00:00
|
|
|
" mismatch, request KMS (%s) volume UUID (%s) volume omap KMS (%s)",
|
2020-01-24 16:26:56 +00:00
|
|
|
kmsConfig, objUUID, savedImageAttributes.KmsID)
|
2020-01-29 11:44:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// TODO: skipping due to excessive poolID to poolname call, also this should never happen!
|
|
|
|
// check if journal pool points back to the passed in journal pool
|
|
|
|
// if savedJournalPoolID != journalPoolID {
|
|
|
|
|
2019-05-14 19:15:01 +00:00
|
|
|
if snapSource {
|
|
|
|
// check if source UUID key points back to the parent volume passed in
|
2020-01-24 16:26:56 +00:00
|
|
|
if savedImageAttributes.SourceName != parentName {
|
2019-05-14 19:15:01 +00:00
|
|
|
// NOTE: This can happen if there is a snapname conflict, and we already have a snapshot
|
|
|
|
// with the same name pointing to a different UUID as the source
|
|
|
|
err = fmt.Errorf("snapname points to different volume, request name (%s)"+
|
|
|
|
" source name (%s) saved source name (%s)",
|
2020-01-24 16:26:56 +00:00
|
|
|
reqName, parentName, savedImageAttributes.SourceName)
|
2020-04-23 18:22:55 +00:00
|
|
|
return nil, util.NewErrSnapNameConflict(reqName, err)
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
imageData := &ImageData{
|
|
|
|
ImageUUID: objUUID,
|
|
|
|
ImagePool: savedImagePool,
|
|
|
|
ImagePoolID: savedImagePoolID,
|
|
|
|
ImageAttributes: savedImageAttributes,
|
|
|
|
}
|
|
|
|
|
|
|
|
return imageData, nil
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
UndoReservation undoes a reservation, in the reverse order of ReserveName
|
|
|
|
- The UUID directory is cleaned up before the VolName key in the csiDirectory is cleaned up
|
|
|
|
|
|
|
|
NOTE: Ensure that the Ceph volume (image or FS subvolume) backing the reservation is cleaned up
|
|
|
|
prior to cleaning up the reservation
|
|
|
|
|
|
|
|
NOTE: As the function manipulates omaps, it should be called with a lock against the request name
|
|
|
|
held, to prevent parallel operations from modifying the state of the omaps for this request name.
|
2020-01-24 16:26:56 +00:00
|
|
|
|
|
|
|
Input arguments:
|
|
|
|
- csiJournalPool: Pool name that holds the CSI request name based journal
|
|
|
|
- volJournalPool: Pool name that holds the image/subvolume and the per-image journal (may be
|
|
|
|
different if image is created in a topology constrained pool)
|
2019-05-14 19:15:01 +00:00
|
|
|
*/
|
2020-05-12 21:05:55 +00:00
|
|
|
func (conn *Connection) UndoReservation(ctx context.Context,
|
2020-01-24 16:26:56 +00:00
|
|
|
csiJournalPool, volJournalPool, volName, reqName string) error {
|
2019-05-14 19:15:01 +00:00
|
|
|
// delete volume UUID omap (first, inverse of create order)
|
2020-02-24 13:19:42 +00:00
|
|
|
|
2020-05-12 21:05:55 +00:00
|
|
|
cj := conn.config
|
2020-01-24 16:26:56 +00:00
|
|
|
if volName != "" {
|
|
|
|
if len(volName) < 36 {
|
|
|
|
return fmt.Errorf("unable to parse UUID from %s, too short", volName)
|
|
|
|
}
|
2020-02-24 13:19:42 +00:00
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
imageUUID := volName[len(volName)-36:]
|
|
|
|
if valid := uuid.Parse(imageUUID); valid == nil {
|
|
|
|
return fmt.Errorf("failed parsing UUID in %s", volName)
|
|
|
|
}
|
2020-02-24 13:19:42 +00:00
|
|
|
|
2020-05-12 21:05:55 +00:00
|
|
|
err := util.RemoveObject(ctx, conn.monitors, conn.cr, volJournalPool, cj.namespace, cj.cephUUIDDirectoryPrefix+imageUUID)
|
2020-01-24 16:26:56 +00:00
|
|
|
if err != nil {
|
2020-04-23 18:22:55 +00:00
|
|
|
if _, ok := err.(util.ErrObjectNotFound); !ok {
|
|
|
|
klog.Errorf(util.Log(ctx, "failed removing oMap %s (%s)"), cj.cephUUIDDirectoryPrefix+imageUUID, err)
|
2020-01-24 16:26:56 +00:00
|
|
|
return err
|
|
|
|
}
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// delete the request name key (last, inverse of create order)
|
2020-05-12 21:05:55 +00:00
|
|
|
err := util.RemoveOMapKey(ctx, conn.monitors, conn.cr, csiJournalPool, cj.namespace, cj.csiDirectory,
|
2019-05-14 19:15:01 +00:00
|
|
|
cj.csiNameKeyPrefix+reqName)
|
|
|
|
if err != nil {
|
2020-04-23 18:22:55 +00:00
|
|
|
klog.Errorf(util.Log(ctx, "failed removing oMap key %s (%s)"), cj.csiNameKeyPrefix+reqName, err)
|
2019-05-14 19:15:01 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// reserveOMapName creates an omap with passed in oMapNamePrefix and a generated <uuid>.
|
|
|
|
// It ensures generated omap name does not already exist and if conflicts are detected, a set
|
|
|
|
// number of retires with newer uuids are attempted before returning an error
|
2020-04-23 18:22:55 +00:00
|
|
|
func reserveOMapName(ctx context.Context, monitors string, cr *util.Credentials, pool, namespace, oMapNamePrefix string) (string, error) {
|
2019-05-14 19:15:01 +00:00
|
|
|
var iterUUID string
|
|
|
|
|
|
|
|
maxAttempts := 5
|
|
|
|
attempt := 1
|
|
|
|
for attempt <= maxAttempts {
|
|
|
|
// generate a uuid for the image name
|
|
|
|
iterUUID = uuid.NewUUID().String()
|
|
|
|
|
2020-04-23 18:22:55 +00:00
|
|
|
err := util.CreateObject(ctx, monitors, cr, pool, namespace, oMapNamePrefix+iterUUID)
|
2019-05-14 19:15:01 +00:00
|
|
|
if err != nil {
|
2020-04-23 18:22:55 +00:00
|
|
|
if _, ok := err.(util.ErrObjectExists); ok {
|
2019-05-14 19:15:01 +00:00
|
|
|
attempt++
|
|
|
|
// try again with a different uuid, for maxAttempts tries
|
2020-04-23 18:22:55 +00:00
|
|
|
klog.V(4).Infof(util.Log(ctx, "uuid (%s) conflict detected, retrying (attempt %d of %d)"),
|
2019-05-14 19:15:01 +00:00
|
|
|
iterUUID, attempt, maxAttempts)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
return iterUUID, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return "", errors.New("uuid conflicts exceeds retry threshold")
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
ReserveName adds respective entries to the csiDirectory omaps, post generating a target
|
|
|
|
UUIDDirectory for use. Further, these functions update the UUIDDirectory omaps, to store back
|
|
|
|
pointers to the CSI generated request names.
|
|
|
|
|
|
|
|
NOTE: As the function manipulates omaps, it should be called with a lock against the request name
|
|
|
|
held, to prevent parallel operations from modifying the state of the omaps for this request name.
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
Input arguments:
|
|
|
|
- journalPool: Pool where the CSI journal is stored (maybe different than the pool where the
|
|
|
|
image/subvolume is created duw to topology constraints)
|
|
|
|
- journalPoolID: pool ID of the journalPool
|
|
|
|
- imagePool: Pool where the image/subvolume is created
|
|
|
|
- imagePoolID: pool ID of the imagePool
|
|
|
|
- reqName: Name of the volume request received
|
|
|
|
- namePrefix: Prefix to use when generating the image/subvolume name (suffix is an auto-genetated UUID)
|
|
|
|
- parentName: Name of the parent image/subvolume if reservation is for a snapshot (optional)
|
|
|
|
- kmsConf: Name of the key management service used to encrypt the image (optional)
|
|
|
|
|
2019-05-14 19:15:01 +00:00
|
|
|
Return values:
|
|
|
|
- string: Contains the UUID that was reserved for the passed in reqName
|
2020-02-24 13:19:42 +00:00
|
|
|
- string: Contains the image name that was reserved for the passed in reqName
|
2019-05-14 19:15:01 +00:00
|
|
|
- error: non-nil in case of any errors
|
|
|
|
*/
|
2020-05-12 21:05:55 +00:00
|
|
|
func (conn *Connection) ReserveName(ctx context.Context,
|
2020-01-24 16:26:56 +00:00
|
|
|
journalPool string, journalPoolID int64,
|
|
|
|
imagePool string, imagePoolID int64,
|
|
|
|
reqName, namePrefix, parentName, kmsConf string) (string, string, error) {
|
|
|
|
// TODO: Take in-arg as ImageAttributes?
|
|
|
|
var (
|
|
|
|
snapSource bool
|
|
|
|
nameKeyVal string
|
2020-05-12 21:05:55 +00:00
|
|
|
cj = conn.config
|
2020-01-24 16:26:56 +00:00
|
|
|
)
|
2019-05-14 19:15:01 +00:00
|
|
|
|
|
|
|
if parentName != "" {
|
|
|
|
if cj.cephSnapSourceKey == "" {
|
|
|
|
err := errors.New("invalid request, cephSnapSourceKey is nil")
|
2020-02-24 13:19:42 +00:00
|
|
|
return "", "", err
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
snapSource = true
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create the UUID based omap first, to reserve the same and avoid conflicts
|
|
|
|
// NOTE: If any service loss occurs post creation of the UUID directory, and before
|
|
|
|
// setting the request name key (csiNameKey) to point back to the UUID directory, the
|
|
|
|
// UUID directory key will be leaked
|
2020-05-12 21:05:55 +00:00
|
|
|
volUUID, err := reserveOMapName(ctx, conn.monitors, conn.cr, imagePool, cj.namespace, cj.cephUUIDDirectoryPrefix)
|
2019-05-14 19:15:01 +00:00
|
|
|
if err != nil {
|
2020-02-24 13:19:42 +00:00
|
|
|
return "", "", err
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
2020-02-24 13:19:42 +00:00
|
|
|
imageName := cj.GetNameForUUID(namePrefix, volUUID, snapSource)
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// Create request name (csiNameKey) key in csiDirectory and store the UUID based
|
|
|
|
// volume name and optionally the image pool location into it
|
2020-04-23 18:22:55 +00:00
|
|
|
if journalPool != imagePool && imagePoolID != util.InvalidPoolID {
|
2020-01-24 16:26:56 +00:00
|
|
|
buf64 := make([]byte, 8)
|
|
|
|
binary.BigEndian.PutUint64(buf64, uint64(imagePoolID))
|
|
|
|
poolIDEncodedHex := hex.EncodeToString(buf64)
|
|
|
|
nameKeyVal = poolIDEncodedHex + "/" + volUUID
|
|
|
|
} else {
|
|
|
|
nameKeyVal = volUUID
|
|
|
|
}
|
|
|
|
|
2020-05-12 21:05:55 +00:00
|
|
|
err = util.SetOMapKeyValue(ctx, conn.monitors, conn.cr, journalPool, cj.namespace, cj.csiDirectory,
|
2020-01-24 16:26:56 +00:00
|
|
|
cj.csiNameKeyPrefix+reqName, nameKeyVal)
|
2019-05-14 19:15:01 +00:00
|
|
|
if err != nil {
|
2020-02-24 13:19:42 +00:00
|
|
|
return "", "", err
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
2020-04-23 18:22:55 +00:00
|
|
|
klog.Warningf(util.Log(ctx, "reservation failed for volume: %s"), reqName)
|
2020-05-12 21:05:55 +00:00
|
|
|
errDefer := conn.UndoReservation(ctx, imagePool, journalPool, imageName, reqName)
|
2019-05-14 19:15:01 +00:00
|
|
|
if errDefer != nil {
|
2020-04-23 18:22:55 +00:00
|
|
|
klog.Warningf(util.Log(ctx, "failed undoing reservation of volume: %s (%v)"), reqName, errDefer)
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// NOTE: UUID directory is stored on the same pool as the image, helps determine image attributes
|
|
|
|
// and also CSI journal pool, when only the VolumeID is passed in (e.g DeleteVolume/DeleteSnapshot,
|
|
|
|
// VolID during CreateSnapshot).
|
|
|
|
// Update UUID directory to store CSI request name
|
2020-05-12 21:05:55 +00:00
|
|
|
err = util.SetOMapKeyValue(ctx, conn.monitors, conn.cr, imagePool, cj.namespace, cj.cephUUIDDirectoryPrefix+volUUID,
|
2019-05-14 19:15:01 +00:00
|
|
|
cj.csiNameKey, reqName)
|
|
|
|
if err != nil {
|
2020-02-24 13:19:42 +00:00
|
|
|
return "", "", err
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// Update UUID directory to store image name
|
2020-05-12 21:05:55 +00:00
|
|
|
err = util.SetOMapKeyValue(ctx, conn.monitors, conn.cr, imagePool, cj.namespace, cj.cephUUIDDirectoryPrefix+volUUID,
|
2020-02-24 13:19:42 +00:00
|
|
|
cj.csiImageKey, imageName)
|
|
|
|
if err != nil {
|
|
|
|
return "", "", err
|
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// Update UUID directory to store encryption values
|
2020-02-24 13:19:42 +00:00
|
|
|
if kmsConf != "" {
|
2020-05-12 21:05:55 +00:00
|
|
|
err = util.SetOMapKeyValue(ctx, conn.monitors, conn.cr, imagePool, cj.namespace, cj.cephUUIDDirectoryPrefix+volUUID,
|
2020-02-24 13:19:42 +00:00
|
|
|
cj.encryptKMSKey, kmsConf)
|
2020-01-29 11:44:45 +00:00
|
|
|
if err != nil {
|
2020-02-24 13:19:42 +00:00
|
|
|
return "", "", err
|
2020-01-29 11:44:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-23 18:22:55 +00:00
|
|
|
if journalPool != imagePool && journalPoolID != util.InvalidPoolID {
|
2020-01-24 16:26:56 +00:00
|
|
|
buf64 := make([]byte, 8)
|
|
|
|
binary.BigEndian.PutUint64(buf64, uint64(journalPoolID))
|
|
|
|
journalPoolIDStr := hex.EncodeToString(buf64)
|
|
|
|
|
|
|
|
// Update UUID directory to store CSI journal pool name (prefer ID instead of name to be pool rename proof)
|
2020-05-12 21:05:55 +00:00
|
|
|
err = util.SetOMapKeyValue(ctx, conn.monitors, conn.cr, imagePool, cj.namespace, cj.cephUUIDDirectoryPrefix+volUUID,
|
2020-01-24 16:26:56 +00:00
|
|
|
cj.csiJournalPool, journalPoolIDStr)
|
|
|
|
if err != nil {
|
|
|
|
return "", "", err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-14 19:15:01 +00:00
|
|
|
if snapSource {
|
|
|
|
// Update UUID directory to store source volume UUID in case of snapshots
|
2020-05-12 21:05:55 +00:00
|
|
|
err = util.SetOMapKeyValue(ctx, conn.monitors, conn.cr, imagePool, cj.namespace, cj.cephUUIDDirectoryPrefix+volUUID,
|
2019-05-14 19:15:01 +00:00
|
|
|
cj.cephSnapSourceKey, parentName)
|
|
|
|
if err != nil {
|
2020-02-24 13:19:42 +00:00
|
|
|
return "", "", err
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-24 13:19:42 +00:00
|
|
|
return volUUID, imageName, nil
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
// ImageAttributes contains all CSI stored image attributes, typically as OMap keys
|
|
|
|
type ImageAttributes struct {
|
|
|
|
RequestName string // Contains the request name for the passed in UUID
|
|
|
|
SourceName string // Contains the parent image name for the passed in UUID, if it is a snapshot
|
|
|
|
ImageName string // Contains the image or subvolume name for the passed in UUID
|
|
|
|
KmsID string // Contains encryption KMS, if it is an encrypted image
|
|
|
|
JournalPoolID int64 // Pool ID of the CSI journal pool, stored in big endian format (on-disk data)
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetImageAttributes fetches all keys and their values, from a UUID directory, returning ImageAttributes structure
|
2020-05-12 21:05:55 +00:00
|
|
|
func (conn *Connection) GetImageAttributes(ctx context.Context, pool, objectUUID string, snapSource bool) (*ImageAttributes, error) {
|
2020-01-24 16:26:56 +00:00
|
|
|
var (
|
|
|
|
err error
|
|
|
|
imageAttributes *ImageAttributes = &ImageAttributes{}
|
2020-05-12 21:05:55 +00:00
|
|
|
cj = conn.config
|
2020-01-24 16:26:56 +00:00
|
|
|
)
|
2019-05-14 19:15:01 +00:00
|
|
|
|
|
|
|
if snapSource && cj.cephSnapSourceKey == "" {
|
2020-01-24 16:26:56 +00:00
|
|
|
err = errors.New("invalid request, cephSnapSourceKey is nil")
|
|
|
|
return nil, err
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: fetch all omap vals in one call, than make multiple listomapvals
|
2020-05-12 21:05:55 +00:00
|
|
|
imageAttributes.RequestName, err = util.GetOMapValue(ctx, conn.monitors, conn.cr, pool, cj.namespace,
|
2019-05-14 19:15:01 +00:00
|
|
|
cj.cephUUIDDirectoryPrefix+objectUUID, cj.csiNameKey)
|
|
|
|
if err != nil {
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, err
|
2020-02-24 13:19:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// image key was added at some point, so not all volumes will have this key set
|
|
|
|
// when ceph-csi was upgraded
|
2020-05-12 21:05:55 +00:00
|
|
|
imageAttributes.ImageName, err = util.GetOMapValue(ctx, conn.monitors, conn.cr, pool, cj.namespace,
|
2020-02-24 13:19:42 +00:00
|
|
|
cj.cephUUIDDirectoryPrefix+objectUUID, cj.csiImageKey)
|
|
|
|
if err != nil {
|
|
|
|
// if the key was not found, assume the default key + UUID
|
|
|
|
// otherwise return error
|
2020-03-19 12:30:59 +00:00
|
|
|
switch err.(type) {
|
|
|
|
default:
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, err
|
2020-04-23 18:22:55 +00:00
|
|
|
case util.ErrKeyNotFound, util.ErrPoolNotFound:
|
2020-02-24 13:19:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if snapSource {
|
2020-01-24 16:26:56 +00:00
|
|
|
imageAttributes.ImageName = defaultSnapshotNamingPrefix + objectUUID
|
2020-02-24 13:19:42 +00:00
|
|
|
} else {
|
2020-01-24 16:26:56 +00:00
|
|
|
imageAttributes.ImageName = defaultVolumeNamingPrefix + objectUUID
|
2020-02-24 13:19:42 +00:00
|
|
|
}
|
2020-01-29 11:44:45 +00:00
|
|
|
}
|
|
|
|
|
2020-05-12 21:05:55 +00:00
|
|
|
imageAttributes.KmsID, err = util.GetOMapValue(ctx, conn.monitors, conn.cr, pool, cj.namespace,
|
2020-01-29 11:44:45 +00:00
|
|
|
cj.cephUUIDDirectoryPrefix+objectUUID, cj.encryptKMSKey)
|
|
|
|
if err != nil {
|
2020-03-19 12:30:59 +00:00
|
|
|
// ErrKeyNotFound means no encryption KMS was used
|
|
|
|
switch err.(type) {
|
|
|
|
default:
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, fmt.Errorf("OMapVal for %s/%s failed to get encryption KMS value: %s",
|
2020-02-06 16:23:14 +00:00
|
|
|
pool, cj.cephUUIDDirectoryPrefix+objectUUID, err)
|
2020-04-23 18:22:55 +00:00
|
|
|
case util.ErrKeyNotFound, util.ErrPoolNotFound:
|
2020-01-29 11:44:45 +00:00
|
|
|
}
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
|
2020-05-12 21:05:55 +00:00
|
|
|
journalPoolIDStr, err := util.GetOMapValue(ctx, conn.monitors, conn.cr, pool, cj.namespace,
|
2020-01-24 16:26:56 +00:00
|
|
|
cj.cephUUIDDirectoryPrefix+objectUUID, cj.csiJournalPool)
|
|
|
|
if err != nil {
|
2020-04-23 18:22:55 +00:00
|
|
|
if _, ok := err.(util.ErrKeyNotFound); !ok {
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
2020-04-23 18:22:55 +00:00
|
|
|
imageAttributes.JournalPoolID = util.InvalidPoolID
|
2020-01-24 16:26:56 +00:00
|
|
|
} else {
|
|
|
|
var buf64 []byte
|
|
|
|
buf64, err = hex.DecodeString(journalPoolIDStr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
imageAttributes.JournalPoolID = int64(binary.BigEndian.Uint64(buf64))
|
|
|
|
}
|
|
|
|
|
2019-05-14 19:15:01 +00:00
|
|
|
if snapSource {
|
2020-05-12 21:05:55 +00:00
|
|
|
imageAttributes.SourceName, err = util.GetOMapValue(ctx, conn.monitors, conn.cr, pool, cj.namespace,
|
2019-05-14 19:15:01 +00:00
|
|
|
cj.cephUUIDDirectoryPrefix+objectUUID, cj.cephSnapSourceKey)
|
|
|
|
if err != nil {
|
2020-01-24 16:26:56 +00:00
|
|
|
return nil, err
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-24 16:26:56 +00:00
|
|
|
return imageAttributes, nil
|
2019-05-14 19:15:01 +00:00
|
|
|
}
|
2020-05-12 21:05:55 +00:00
|
|
|
|
|
|
|
// Destroy frees any resources and invalidates the journal connection.
|
|
|
|
func (conn *Connection) Destroy() {
|
|
|
|
// invalidate cluster connection metadata
|
|
|
|
conn.monitors = ""
|
|
|
|
conn.cr = nil
|
|
|
|
}
|