mirror of
https://github.com/ceph/ceph-csi.git
synced 2024-11-30 02:00:19 +00:00
1a1630d0c5
This commit adds groupUUID param for `ReserveName` to be used for OMAP name reserve instead of auto-generating. This is useful for mirroring and metro-DR ensuring that mirrored resources have consistent OMAP names across mirrored clusters. Signed-off-by: Praveen M <m.praveen@ibm.com>
471 lines
15 KiB
Go
471 lines
15 KiB
Go
/*
|
|
Copyright 2024 The Ceph-CSI Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package journal
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/ceph/ceph-csi/internal/util"
|
|
"github.com/ceph/ceph-csi/internal/util/log"
|
|
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
const (
|
|
defaultVolumeGroupNamingPrefix string = "csi-vol-group-"
|
|
)
|
|
|
|
type VolumeGroupJournal interface {
|
|
Destroy()
|
|
CheckReservation(
|
|
ctx context.Context,
|
|
journalPool,
|
|
reqName,
|
|
namePrefix string) (*VolumeGroupData, error)
|
|
UndoReservation(
|
|
ctx context.Context,
|
|
csiJournalPool,
|
|
groupName,
|
|
reqName string) error
|
|
// GetGroupAttributes fetches all keys and their values, from a UUID directory,
|
|
// returning VolumeGroupAttributes structure.
|
|
GetVolumeGroupAttributes(
|
|
ctx context.Context,
|
|
pool,
|
|
objectUUID string) (*VolumeGroupAttributes, error)
|
|
ReserveName(
|
|
ctx context.Context,
|
|
journalPool,
|
|
reqName,
|
|
groupUUID,
|
|
namePrefix string) (string, string, error)
|
|
// AddVolumesMapping adds a volumeMap map which contains volumeID's and its
|
|
// corresponding values mapping which need to be added to the UUID
|
|
// directory. value can be anything which needs mapping, in case of
|
|
// volumegroupsnapshot its a snapshotID and its empty in case of
|
|
// volumegroup.
|
|
AddVolumesMapping(
|
|
ctx context.Context,
|
|
pool,
|
|
reservedUUID string,
|
|
volumeMap map[string]string) error
|
|
// RemoveVolumesMapping removes volumeIDs mapping from the UUID directory.
|
|
RemoveVolumesMapping(
|
|
ctx context.Context,
|
|
pool,
|
|
reservedUUID string,
|
|
volumeIDs []string) error
|
|
}
|
|
|
|
// VolumeGroupJournalConfig contains the configuration.
|
|
type VolumeGroupJournalConfig struct {
|
|
Config
|
|
|
|
// csiCreationTimeKey can hold the key for the time a group was
|
|
// created. At least RBD groups do not provide the creation time
|
|
// through API calls.
|
|
csiCreationTimeKey string
|
|
}
|
|
|
|
type volumeGroupJournalConnection struct {
|
|
config *VolumeGroupJournalConfig
|
|
connection *Connection
|
|
}
|
|
|
|
// assert that volumeGroupJournalConnection implements the VolumeGroupJournal
|
|
// interface.
|
|
var _ VolumeGroupJournal = &volumeGroupJournalConnection{}
|
|
|
|
// NewCSIVolumeGroupJournal returns an instance of VolumeGroupJournal for groups.
|
|
func NewCSIVolumeGroupJournal(suffix string) VolumeGroupJournalConfig {
|
|
return VolumeGroupJournalConfig{
|
|
Config: Config{
|
|
csiDirectory: "csi.groups." + suffix,
|
|
csiNameKeyPrefix: "csi.volume.group.",
|
|
cephUUIDDirectoryPrefix: "csi.volume.group.",
|
|
csiImageKey: "csi.groupname",
|
|
csiNameKey: "csi.volname",
|
|
namespace: "",
|
|
},
|
|
csiCreationTimeKey: "csi.creationtime",
|
|
}
|
|
}
|
|
|
|
// SetNamespace sets the namespace for the journal.
|
|
func (vgc *VolumeGroupJournalConfig) SetNamespace(ns string) {
|
|
vgc.Config.namespace = ns
|
|
}
|
|
|
|
// NewCSIVolumeGroupJournalWithNamespace returns an instance of VolumeGroupJournal for
|
|
// volume groups using a predetermined namespace value.
|
|
func NewCSIVolumeGroupJournalWithNamespace(suffix, ns string) VolumeGroupJournalConfig {
|
|
j := NewCSIVolumeGroupJournal(suffix)
|
|
j.SetNamespace(ns)
|
|
|
|
return j
|
|
}
|
|
|
|
// Connect establishes a new connection to a ceph cluster for journal metadata.
|
|
func (vgc *VolumeGroupJournalConfig) Connect(
|
|
monitors,
|
|
namespace string,
|
|
cr *util.Credentials,
|
|
) (VolumeGroupJournal, error) {
|
|
vgjc := &volumeGroupJournalConnection{}
|
|
vgjc.config = &VolumeGroupJournalConfig{
|
|
Config: vgc.Config,
|
|
csiCreationTimeKey: vgc.csiCreationTimeKey,
|
|
}
|
|
conn, err := vgc.Config.Connect(monitors, namespace, cr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
vgjc.connection = conn
|
|
|
|
return vgjc, nil
|
|
}
|
|
|
|
// Destroy frees any resources and invalidates the journal connection.
|
|
func (vgjc *volumeGroupJournalConnection) Destroy() {
|
|
if vgjc.connection != nil {
|
|
vgjc.connection.Destroy()
|
|
vgjc.connection = nil
|
|
}
|
|
}
|
|
|
|
// VolumeGroupData contains the GroupUUID and VolumeGroupAttributes for a
|
|
// volume group.
|
|
type VolumeGroupData struct {
|
|
GroupUUID string
|
|
GroupName string
|
|
VolumeGroupAttributes *VolumeGroupAttributes
|
|
}
|
|
|
|
func generateVolumeGroupName(namePrefix, groupUUID string) string {
|
|
if namePrefix == "" {
|
|
namePrefix = defaultVolumeGroupNamingPrefix
|
|
}
|
|
|
|
return namePrefix + groupUUID
|
|
}
|
|
|
|
/*
|
|
CheckReservation checks if given request name contains a valid reservation
|
|
- If there is a valid reservation, then the corresponding VolumeGroupData for
|
|
the snapshot group is returned
|
|
- If there is a reservation that is stale (or not fully cleaned up), it is
|
|
garbage collected using the UndoReservation call, as appropriate
|
|
|
|
NOTE: As the function manipulates omaps, it should be called with a lock
|
|
against the request name held, to prevent parallel operations from modifying
|
|
the state of the omaps for this request name.
|
|
|
|
Return values:
|
|
- VolumeGroupData: which contains the GroupUUID and GroupSnapshotAttributes
|
|
that were reserved for the passed in reqName, empty if there was no
|
|
reservation found.
|
|
- error: non-nil in case of any errors.
|
|
*/
|
|
func (vgjc *volumeGroupJournalConnection) CheckReservation(ctx context.Context,
|
|
journalPool, reqName, namePrefix string,
|
|
) (*VolumeGroupData, error) {
|
|
var (
|
|
cj = vgjc.config
|
|
volGroupData = &VolumeGroupData{}
|
|
)
|
|
|
|
// check if request name is already part of the directory omap
|
|
fetchKeys := []string{
|
|
cj.csiNameKeyPrefix + reqName,
|
|
}
|
|
values, err := getOMapValues(
|
|
ctx, vgjc.connection, journalPool, cj.namespace, cj.csiDirectory,
|
|
cj.commonPrefix, fetchKeys)
|
|
if err != nil {
|
|
if errors.Is(err, util.ErrKeyNotFound) || errors.Is(err, util.ErrPoolNotFound) {
|
|
// pool or omap (oid) was not present
|
|
// stop processing but without an error for no reservation exists
|
|
return nil, nil
|
|
}
|
|
|
|
return nil, err
|
|
}
|
|
|
|
objUUID, found := values[cj.csiNameKeyPrefix+reqName]
|
|
if !found {
|
|
// omap was read but was missing the desired key-value pair
|
|
// stop processing but without an error for no reservation exists
|
|
return nil, nil
|
|
}
|
|
volGroupData.GroupUUID = objUUID
|
|
|
|
savedVolumeGroupAttributes, err := vgjc.GetVolumeGroupAttributes(ctx, journalPool,
|
|
objUUID)
|
|
if err != nil {
|
|
// error should specifically be not found, for image to be absent, any other error
|
|
// is not conclusive, and we should not proceed
|
|
if errors.Is(err, util.ErrKeyNotFound) {
|
|
err = vgjc.UndoReservation(ctx, journalPool,
|
|
generateVolumeGroupName(namePrefix, objUUID), reqName)
|
|
}
|
|
|
|
return nil, err
|
|
}
|
|
|
|
// check if the request name in the omap matches the passed in request name
|
|
if savedVolumeGroupAttributes.RequestName != reqName {
|
|
// NOTE: This should never be possible, hence no cleanup, but log error
|
|
// and return, as cleanup may need to occur manually!
|
|
return nil, fmt.Errorf("internal state inconsistent, omap names mismatch,"+
|
|
" request name (%s) volume group UUID (%s) volume group omap name (%s)",
|
|
reqName, objUUID, savedVolumeGroupAttributes.RequestName)
|
|
}
|
|
volGroupData.GroupName = savedVolumeGroupAttributes.GroupName
|
|
volGroupData.VolumeGroupAttributes = &VolumeGroupAttributes{}
|
|
volGroupData.VolumeGroupAttributes.RequestName = savedVolumeGroupAttributes.RequestName
|
|
volGroupData.VolumeGroupAttributes.VolumeMap = savedVolumeGroupAttributes.VolumeMap
|
|
volGroupData.VolumeGroupAttributes.CreationTime = savedVolumeGroupAttributes.CreationTime
|
|
|
|
return volGroupData, nil
|
|
}
|
|
|
|
/*
|
|
UndoReservation undoes a reservation, in the reverse order of ReserveName
|
|
- The UUID directory is cleaned up before the GroupName key in the csiDirectory is cleaned up
|
|
|
|
NOTE: Ensure that the Ceph volume snapshots backing the reservation is cleaned up
|
|
prior to cleaning up the reservation
|
|
|
|
NOTE: As the function manipulates omaps, it should be called with a lock against the request name
|
|
held, to prevent parallel operations from modifying the state of the omaps for this request name.
|
|
|
|
Input arguments:
|
|
- csiJournalPool: Pool name that holds the CSI request name based journal
|
|
- groupID: ID of the volume group, generated from the UUID
|
|
- reqName: Request name for the volume group
|
|
*/
|
|
func (vgjc *volumeGroupJournalConnection) UndoReservation(ctx context.Context,
|
|
csiJournalPool, groupID, reqName string,
|
|
) error {
|
|
// delete volume UUID omap (first, inverse of create order)
|
|
cj := vgjc.config
|
|
if groupID != "" {
|
|
if len(groupID) < uuidEncodedLength {
|
|
return fmt.Errorf("unable to parse UUID from %s, too short", groupID)
|
|
}
|
|
|
|
groupUUID := groupID[len(groupID)-36:]
|
|
if _, err := uuid.Parse(groupUUID); err != nil {
|
|
return fmt.Errorf("failed parsing UUID in %s: %w", groupUUID, err)
|
|
}
|
|
|
|
err := util.RemoveObject(
|
|
ctx,
|
|
vgjc.connection.monitors,
|
|
vgjc.connection.cr,
|
|
csiJournalPool,
|
|
cj.namespace,
|
|
cj.cephUUIDDirectoryPrefix+groupUUID)
|
|
if err != nil {
|
|
if !errors.Is(err, util.ErrObjectNotFound) {
|
|
log.ErrorLog(ctx, "failed removing oMap %s (%s)", cj.cephUUIDDirectoryPrefix+groupUUID, err)
|
|
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
// delete the request name key (last, inverse of create order)
|
|
err := removeMapKeys(ctx, vgjc.connection, csiJournalPool, cj.namespace, cj.csiDirectory,
|
|
[]string{cj.csiNameKeyPrefix + reqName})
|
|
if err != nil {
|
|
log.ErrorLog(ctx, "failed removing oMap key %s (%s)", cj.csiNameKeyPrefix+reqName, err)
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
/*
|
|
ReserveName adds respective entries to the csiDirectory omaps, post generating a target
|
|
UUIDDirectory for use. Further, these functions update the UUIDDirectory omaps, to store back
|
|
pointers to the CSI generated request names.
|
|
|
|
NOTE: As the function manipulates omaps, it should be called with a lock against the request name
|
|
held, to prevent parallel operations from modifying the state of the omaps for this request name.
|
|
|
|
Input arguments:
|
|
- journalPool: Pool where the CSI journal is stored
|
|
- reqName: Name of the volumeGroupSnapshot request received
|
|
- groupUUID: UUID need to be reserved instead of auto-generating one (this is useful for RBD mirroring)
|
|
- namePrefix: Prefix to use when generating the volumeGroupName name (suffix is an auto-generated UUID)
|
|
|
|
Return values:
|
|
- string: Contains the UUID that was reserved for the passed in reqName
|
|
- string: Contains the VolumeGroup name that was reserved for the passed in reqName
|
|
- error: non-nil in case of any errors
|
|
*/
|
|
func (vgjc *volumeGroupJournalConnection) ReserveName(ctx context.Context,
|
|
journalPool, reqName, groupUUID, namePrefix string,
|
|
) (string, string, error) {
|
|
cj := vgjc.config
|
|
|
|
// Create the UUID based omap first, to reserve the same and avoid conflicts
|
|
// NOTE: If any service loss occurs post creation of the UUID directory, and before
|
|
// setting the request name key to point back to the UUID directory, the
|
|
// UUID directory key will be leaked
|
|
objUUID, err := reserveOMapName(
|
|
ctx,
|
|
vgjc.connection.monitors,
|
|
vgjc.connection.cr,
|
|
journalPool,
|
|
cj.namespace,
|
|
cj.cephUUIDDirectoryPrefix,
|
|
groupUUID)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
groupName := generateVolumeGroupName(namePrefix, objUUID)
|
|
nameKeyVal := objUUID
|
|
// After generating the UUID Directory omap, we populate the csiDirectory
|
|
// omap with a key-value entry to map the request to the backend volume group:
|
|
// `csiNameKeyPrefix + reqName: nameKeyVal`
|
|
err = setOMapKeys(ctx, vgjc.connection, journalPool, cj.namespace, cj.csiDirectory,
|
|
map[string]string{cj.csiNameKeyPrefix + reqName: nameKeyVal})
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
defer func() {
|
|
if err != nil {
|
|
log.WarningLog(ctx, "reservation failed for volume group: %s", reqName)
|
|
errDefer := vgjc.UndoReservation(ctx, journalPool, groupName, reqName)
|
|
if errDefer != nil {
|
|
log.WarningLog(ctx, "failed undoing reservation of volume group: %s (%v)", reqName, errDefer)
|
|
}
|
|
}
|
|
}()
|
|
|
|
oid := cj.cephUUIDDirectoryPrefix + objUUID
|
|
omapValues := map[string]string{}
|
|
|
|
// Update UUID directory to store CSI request name
|
|
omapValues[cj.csiNameKey] = reqName
|
|
omapValues[cj.csiImageKey] = groupName
|
|
|
|
t, err := time.Now().MarshalText()
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
omapValues[cj.csiCreationTimeKey] = string(t)
|
|
|
|
err = setOMapKeys(ctx, vgjc.connection, journalPool, cj.namespace, oid, omapValues)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
|
|
return objUUID, groupName, nil
|
|
}
|
|
|
|
// VolumeGroupAttributes contains the request name and the volumeID's and
|
|
// the corresponding snapshotID's.
|
|
type VolumeGroupAttributes struct {
|
|
RequestName string // Contains the request name for the passed in UUID
|
|
GroupName string // Contains the group name
|
|
CreationTime *time.Time // Contains the time of creation of the group
|
|
VolumeMap map[string]string // Contains the volumeID and the corresponding value mapping
|
|
}
|
|
|
|
func (vgjc *volumeGroupJournalConnection) GetVolumeGroupAttributes(
|
|
ctx context.Context,
|
|
pool, objectUUID string,
|
|
) (*VolumeGroupAttributes, error) {
|
|
var (
|
|
err error
|
|
groupAttributes = &VolumeGroupAttributes{}
|
|
cj = vgjc.config
|
|
)
|
|
|
|
values, err := listOMapValues(
|
|
ctx, vgjc.connection, pool, cj.namespace, cj.cephUUIDDirectoryPrefix+objectUUID,
|
|
cj.commonPrefix)
|
|
if err != nil {
|
|
if !errors.Is(err, util.ErrKeyNotFound) && !errors.Is(err, util.ErrPoolNotFound) {
|
|
return nil, err
|
|
}
|
|
log.WarningLog(ctx, "unable to read omap values: pool missing: %v", err)
|
|
}
|
|
|
|
t := &time.Time{}
|
|
err = t.UnmarshalText([]byte(values[cj.csiCreationTimeKey]))
|
|
if err != nil {
|
|
t = nil
|
|
}
|
|
|
|
groupAttributes.RequestName = values[cj.csiNameKey]
|
|
groupAttributes.GroupName = values[cj.csiImageKey]
|
|
groupAttributes.CreationTime = t
|
|
|
|
// Remove request name key and group name key from the omap, as we are
|
|
// looking for volumeID/snapshotID mapping
|
|
delete(values, cj.csiNameKey)
|
|
delete(values, cj.csiImageKey)
|
|
delete(values, cj.csiCreationTimeKey)
|
|
groupAttributes.VolumeMap = map[string]string{}
|
|
for k, v := range values {
|
|
groupAttributes.VolumeMap[k] = v
|
|
}
|
|
|
|
return groupAttributes, nil
|
|
}
|
|
|
|
func (vgjc *volumeGroupJournalConnection) AddVolumesMapping(
|
|
ctx context.Context,
|
|
pool,
|
|
reservedUUID string,
|
|
volumeMap map[string]string,
|
|
) error {
|
|
err := setOMapKeys(ctx, vgjc.connection, pool, vgjc.config.namespace, vgjc.config.cephUUIDDirectoryPrefix+reservedUUID,
|
|
volumeMap)
|
|
if err != nil {
|
|
log.ErrorLog(ctx, "failed to add volumeMap %v: %w ", volumeMap, err)
|
|
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (vgjc *volumeGroupJournalConnection) RemoveVolumesMapping(
|
|
ctx context.Context,
|
|
pool,
|
|
reservedUUID string,
|
|
volumeIDs []string,
|
|
) error {
|
|
err := removeMapKeys(ctx, vgjc.connection, pool, vgjc.config.namespace,
|
|
vgjc.config.cephUUIDDirectoryPrefix+reservedUUID,
|
|
volumeIDs)
|
|
if err != nil {
|
|
log.ErrorLog(ctx, "failed removing volume mapping from group: key: %q %v", volumeIDs, err)
|
|
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|