cephfs: added support for snapshot-backed volumes

This commit implements most of
docs/design/proposals/cephfs-snapshot-shallow-ro-vol.md design document;
specifically (de-)provisioning of snapshot-backed volumes, mounting such
volumes as well as mounting pre-provisioned snapshot-backed volumes.

Signed-off-by: Robert Vasek <robert.vasek@cern.ch>
This commit is contained in:
Robert Vasek 2022-04-06 15:26:07 +02:00 committed by mergify[bot]
parent 0807fd2e6c
commit fd7559a903
7 changed files with 710 additions and 107 deletions

View File

@ -29,6 +29,7 @@ import (
"github.com/ceph/ceph-csi/internal/util"
"github.com/ceph/ceph-csi/internal/util/k8s"
"github.com/ceph/ceph-csi/internal/util/log"
rterrors "github.com/ceph/ceph-csi/internal/util/reftracker/errors"
"github.com/container-storage-interface/spec/lib/go/csi"
"github.com/golang/protobuf/ptypes/timestamp"
@ -65,47 +66,77 @@ func (cs *ControllerServer) createBackingVolume(
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
if sID != nil {
if err = cs.OperationLocks.GetRestoreLock(sID.SnapshotID); err != nil {
log.ErrorLog(ctx, err.Error())
return status.Error(codes.Aborted, err.Error())
}
defer cs.OperationLocks.ReleaseRestoreLock(sID.SnapshotID)
snap := core.Snapshot{
SnapshotID: sID.FsSnapshotName,
SubVolume: &parentVolOpt.SubVolume,
}
err = volClient.CreateCloneFromSnapshot(ctx, snap)
if err != nil {
log.ErrorLog(ctx, "failed to create clone from snapshot %s: %v", sID.FsSnapshotName, err)
return err
}
return err
return cs.createBackingVolumeFromSnapshotSource(ctx, volOptions, parentVolOpt, volClient, sID)
}
if parentVolOpt != nil {
if err = cs.OperationLocks.GetCloneLock(pvID.VolumeID); err != nil {
log.ErrorLog(ctx, err.Error())
return status.Error(codes.Aborted, err.Error())
}
defer cs.OperationLocks.ReleaseCloneLock(pvID.VolumeID)
err = volClient.CreateCloneFromSubvolume(
ctx, &parentVolOpt.SubVolume)
if err != nil {
log.ErrorLog(ctx, "failed to create clone from subvolume %s: %v", fsutil.VolumeID(pvID.FsSubvolName), err)
if parentVolOpt != nil {
return cs.createBackingVolumeFromVolumeSource(ctx, parentVolOpt, volClient, pvID)
}
if err = volClient.CreateVolume(ctx); err != nil {
log.ErrorLog(ctx, "failed to create volume %s: %v", volOptions.RequestName, err)
return status.Error(codes.Internal, err.Error())
}
return nil
}
func (cs *ControllerServer) createBackingVolumeFromSnapshotSource(
ctx context.Context,
volOptions *store.VolumeOptions,
parentVolOpt *store.VolumeOptions,
volClient core.SubVolumeClient,
sID *store.SnapshotIdentifier,
) error {
if err := cs.OperationLocks.GetRestoreLock(sID.SnapshotID); err != nil {
log.ErrorLog(ctx, err.Error())
return status.Error(codes.Aborted, err.Error())
}
defer cs.OperationLocks.ReleaseRestoreLock(sID.SnapshotID)
if volOptions.BackingSnapshot {
if err := store.AddSnapshotBackedVolumeRef(ctx, volOptions); err != nil {
log.ErrorLog(ctx, "failed to create snapshot-backed volume from snapshot %s: %v",
sID.FsSnapshotName, err)
return err
}
return nil
}
if err = volClient.CreateVolume(ctx); err != nil {
log.ErrorLog(ctx, "failed to create volume %s: %v", volOptions.RequestName, err)
return status.Error(codes.Internal, err.Error())
err := volClient.CreateCloneFromSnapshot(ctx, core.Snapshot{
SnapshotID: sID.FsSnapshotName,
SubVolume: &parentVolOpt.SubVolume,
})
if err != nil {
log.ErrorLog(ctx, "failed to create clone from snapshot %s: %v", sID.FsSnapshotName, err)
return err
}
return nil
}
func (cs *ControllerServer) createBackingVolumeFromVolumeSource(
ctx context.Context,
parentVolOpt *store.VolumeOptions,
volClient core.SubVolumeClient,
pvID *store.VolumeIdentifier,
) error {
if err := cs.OperationLocks.GetCloneLock(pvID.VolumeID); err != nil {
log.ErrorLog(ctx, err.Error())
return status.Error(codes.Aborted, err.Error())
}
defer cs.OperationLocks.ReleaseCloneLock(pvID.VolumeID)
if err := volClient.CreateCloneFromSubvolume(ctx, &parentVolOpt.SubVolume); err != nil {
log.ErrorLog(ctx, "failed to create clone from subvolume %s: %v", fsutil.VolumeID(pvID.FsSubvolName), err)
return err
}
return nil
@ -158,6 +189,7 @@ func checkValidCreateVolumeRequest(
parentVol *store.VolumeOptions,
pvID *store.VolumeIdentifier,
sID *store.SnapshotIdentifier,
req *csi.CreateVolumeRequest,
) error {
switch {
case pvID != nil:
@ -168,6 +200,10 @@ func checkValidCreateVolumeRequest(
parentVol.Size,
vol.Size)
}
if vol.BackingSnapshot {
return errors.New("cloning snapshot-backed volumes is currently not supported")
}
case sID != nil:
if vol.Size < parentVol.Size {
return fmt.Errorf(
@ -176,6 +212,25 @@ func checkValidCreateVolumeRequest(
parentVol.Size,
vol.Size)
}
if vol.BackingSnapshot {
if vol.Size != parentVol.Size {
return fmt.Errorf(
"cannot create snapshot-backed volume of different size: expected %d bytes, got %d bytes",
parentVol.Size,
vol.Size,
)
}
volCaps := req.GetVolumeCapabilities()
for _, volCap := range volCaps {
mode := volCap.AccessMode.Mode
if mode != csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY &&
mode != csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY {
return errors.New("backingSnapshot may be used only with read-only access modes")
}
}
}
}
return nil
@ -233,7 +288,7 @@ func (cs *ControllerServer) CreateVolume(
defer parentVol.Destroy()
}
err = checkValidCreateVolumeRequest(volOptions, parentVol, pvID, sID)
err = checkValidCreateVolumeRequest(volOptions, parentVol, pvID, sID, req)
if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
@ -249,7 +304,7 @@ func (cs *ControllerServer) CreateVolume(
// TODO return error message if requested vol size greater than found volume return error
if vID != nil {
if sID != nil || pvID != nil {
if sID != nil || pvID != nil && !volOptions.BackingSnapshot {
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
err = volClient.ExpandVolume(ctx, volOptions.Size)
if err != nil {
@ -321,26 +376,32 @@ func (cs *ControllerServer) CreateVolume(
return nil, err
}
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
volOptions.RootPath, err = volClient.GetVolumeRootPathCeph(ctx)
if err != nil {
purgeErr := volClient.PurgeVolume(ctx, true)
if purgeErr != nil {
log.ErrorLog(ctx, "failed to delete volume %s: %v", vID.FsSubvolName, purgeErr)
// All errors other than ErrVolumeNotFound should return an error back to the caller
if !errors.Is(purgeErr, cerrors.ErrVolumeNotFound) {
// If the subvolume deletion is failed, we should not cleanup
// the OMAP entry it will stale subvolume in cluster.
// set err=nil so that when we get the request again we can get
// the subvolume info.
err = nil
if !volOptions.BackingSnapshot {
// Get root path for the created subvolume.
// Note that root path for snapshot-backed volumes has been already set when
// building VolumeOptions.
return nil, status.Error(codes.Internal, purgeErr.Error())
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
volOptions.RootPath, err = volClient.GetVolumeRootPathCeph(ctx)
if err != nil {
purgeErr := volClient.PurgeVolume(ctx, true)
if purgeErr != nil {
log.ErrorLog(ctx, "failed to delete volume %s: %v", vID.FsSubvolName, purgeErr)
// All errors other than ErrVolumeNotFound should return an error back to the caller
if !errors.Is(purgeErr, cerrors.ErrVolumeNotFound) {
// If the subvolume deletion is failed, we should not cleanup
// the OMAP entry it will stale subvolume in cluster.
// set err=nil so that when we get the request again we can get
// the subvolume info.
err = nil
return nil, status.Error(codes.Internal, purgeErr.Error())
}
}
}
log.ErrorLog(ctx, "failed to get subvolume path %s: %v", vID.FsSubvolName, err)
log.ErrorLog(ctx, "failed to get subvolume path %s: %v", vID.FsSubvolName, err)
return nil, status.Error(codes.Internal, err.Error())
return nil, status.Error(codes.Internal, err.Error())
}
}
log.DebugLog(ctx, "cephfs: successfully created backing volume named %s for request name %s",
@ -452,16 +513,8 @@ func (cs *ControllerServer) DeleteVolume(
}
defer cr.DeleteCredentials()
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
if err = volClient.PurgeVolume(ctx, false); err != nil {
log.ErrorLog(ctx, "failed to delete volume %s: %v", volID, err)
if errors.Is(err, cerrors.ErrVolumeHasSnapshots) {
return nil, status.Error(codes.FailedPrecondition, err.Error())
}
if !errors.Is(err, cerrors.ErrVolumeNotFound) {
return nil, status.Error(codes.Internal, err.Error())
}
if err := cleanUpBackingVolume(ctx, volOptions, vID, cr); err != nil {
return nil, err
}
if err := store.UndoVolReservation(ctx, volOptions, *vID, secrets); err != nil {
@ -473,6 +526,84 @@ func (cs *ControllerServer) DeleteVolume(
return &csi.DeleteVolumeResponse{}, nil
}
func cleanUpBackingVolume(
ctx context.Context,
volOptions *store.VolumeOptions,
volID *store.VolumeIdentifier,
cr *util.Credentials,
) error {
if !volOptions.BackingSnapshot {
// Regular volumes need to be purged.
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
if err := volClient.PurgeVolume(ctx, false); err != nil {
log.ErrorLog(ctx, "failed to delete volume %s: %v", volID, err)
if errors.Is(err, cerrors.ErrVolumeHasSnapshots) {
return status.Error(codes.FailedPrecondition, err.Error())
}
if !errors.Is(err, cerrors.ErrVolumeNotFound) {
return status.Error(codes.Internal, err.Error())
}
}
return nil
}
// Snapshot-backed volumes need to un-reference the backing snapshot, and
// the snapshot itself may need to be deleted if its reftracker doesn't
// hold any references anymore.
backingSnapNeedsDelete, err := store.UnrefSnapshotBackedVolume(ctx, volOptions)
if err != nil {
if errors.Is(err, rterrors.ErrObjectOutOfDate) {
return status.Error(codes.Aborted, err.Error())
}
return status.Error(codes.Internal, err.Error())
}
if !backingSnapNeedsDelete {
return nil
}
snapParentVolOptions, _, snapID, err := store.NewSnapshotOptionsFromID(ctx, volOptions.BackingSnapshotID, cr)
if err != nil {
absorbErrs := []error{
util.ErrPoolNotFound,
util.ErrKeyNotFound,
cerrors.ErrSnapNotFound,
cerrors.ErrVolumeNotFound,
}
fatalErr := true
for i := range absorbErrs {
if errors.Is(err, absorbErrs[i]) {
fatalErr = false
break
}
}
if fatalErr {
return status.Error(codes.Internal, err.Error())
}
} else {
snapClient := core.NewSnapshot(
snapParentVolOptions.GetConnection(),
snapID.FsSnapshotName,
&snapParentVolOptions.SubVolume,
)
err = deleteSnapshotAndUndoReservation(ctx, snapClient, snapParentVolOptions, snapID, cr)
if err != nil {
return status.Error(codes.Internal, err.Error())
}
}
return nil
}
// ValidateVolumeCapabilities checks whether the volume capabilities requested
// are supported.
func (cs *ControllerServer) ValidateVolumeCapabilities(
@ -537,6 +668,10 @@ func (cs *ControllerServer) ControllerExpandVolume(
}
defer volOptions.Destroy()
if volOptions.BackingSnapshot {
return nil, status.Error(codes.InvalidArgument, "cannot expand snapshot-backed volume")
}
RoundOffSize := util.RoundOffBytes(req.GetCapacityRange().GetRequiredBytes())
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
if err = volClient.ResizeVolume(ctx, RoundOffSize); err != nil {
@ -615,6 +750,10 @@ func (cs *ControllerServer) CreateSnapshot(
parentVolOptions.ClusterID)
}
if parentVolOptions.BackingSnapshot {
return nil, status.Error(codes.InvalidArgument, "cannot snapshot a snapshot-backed volume")
}
cephfsSnap, genSnapErr := store.GenSnapFromOptions(ctx, req)
if genSnapErr != nil {
return nil, status.Error(codes.Internal, genSnapErr.Error())
@ -780,6 +919,7 @@ func (cs *ControllerServer) validateSnapshotReq(ctx context.Context, req *csi.Cr
// DeleteSnapshot deletes the snapshot in backend and removes the
// snapshot metadata from store.
// nolint:gocyclo,cyclop // TODO: reduce complexity
func (cs *ControllerServer) DeleteSnapshot(
ctx context.Context,
req *csi.DeleteSnapshotRequest,
@ -878,17 +1018,51 @@ func (cs *ControllerServer) DeleteSnapshot(
return nil, status.Error(codes.Internal, err.Error())
}
}
err = snapClient.DeleteSnapshot(ctx)
needsDelete, err := store.UnrefSelfInSnapshotBackedVolumes(ctx, volOpt, sid.SnapshotID)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
err = store.UndoSnapReservation(ctx, volOpt, *sid, sid.RequestName, cr)
if err != nil {
log.ErrorLog(ctx, "failed to remove reservation for snapname (%s) with backing snap (%s) (%s)",
sid.RequestName, sid.FsSnapshotName, err)
if errors.Is(err, rterrors.ErrObjectOutOfDate) {
return nil, status.Error(codes.Aborted, err.Error())
}
return nil, status.Error(codes.Internal, err.Error())
}
if needsDelete {
err = deleteSnapshotAndUndoReservation(
ctx,
snapClient,
volOpt,
sid,
cr,
)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
}
return &csi.DeleteSnapshotResponse{}, nil
}
func deleteSnapshotAndUndoReservation(
ctx context.Context,
snapClient core.SnapshotClient,
parentVolOptions *store.VolumeOptions,
snapID *store.SnapshotIdentifier,
cr *util.Credentials,
) error {
err := snapClient.DeleteSnapshot(ctx)
if err != nil {
return err
}
err = store.UndoSnapReservation(ctx, parentVolOptions, *snapID, snapID.RequestName, cr)
if err != nil {
log.ErrorLog(ctx, "failed to remove reservation for snapname (%s) with backing snap (%s) (%s)",
snapID.RequestName, snapID.RequestName, err)
return err
}
return nil
}

View File

@ -192,7 +192,7 @@ func (ns *NodeServer) tryRestoreFuseMountsInNodePublish(
// Unmount and mount the volume.
if stagingTargetMs != msMounted {
if err := mounter.UnmountVolume(ctx, stagingTargetPath); err != nil {
if err := mounter.UnmountAll(ctx, stagingTargetPath); err != nil {
return err
}
@ -269,5 +269,5 @@ func (ns *NodeServer) tryRestoreFuseMountInNodeStage(
// Restoration here means only unmounting the volume.
// NodeStageVolume should take care of the rest.
return mounter.UnmountVolume(ctx, stagingTargetPath)
return mounter.UnmountAll(ctx, stagingTargetPath)
}

View File

@ -118,8 +118,8 @@ func (m *FuseMounter) Mount(
func (m *FuseMounter) Name() string { return "Ceph FUSE driver" }
func UnmountVolume(ctx context.Context, mountPoint string) error {
if _, stderr, err := util.ExecCommand(ctx, "umount", mountPoint); err != nil {
func UnmountVolume(ctx context.Context, mountPoint string, opts ...string) error {
if _, stderr, err := util.ExecCommand(ctx, "umount", append([]string{mountPoint}, opts...)...); err != nil {
err = fmt.Errorf("%w stderr: %s", err, stderr)
if strings.Contains(err.Error(), fmt.Sprintf("umount: %s: not mounted", mountPoint)) ||
strings.Contains(err.Error(), "No such file or directory") {
@ -149,3 +149,7 @@ func UnmountVolume(ctx context.Context, mountPoint string) error {
return nil
}
func UnmountAll(ctx context.Context, mountPoint string) error {
return UnmountVolume(ctx, mountPoint, "--all-targets")
}

View File

@ -21,6 +21,7 @@ import (
"errors"
"fmt"
"os"
"path"
"strings"
cerrors "github.com/ceph/ceph-csi/internal/cephfs/errors"
@ -101,6 +102,20 @@ func (ns *NodeServer) getVolumeOptions(
return volOptions, nil
}
func validateSnapshotBackedVolCapability(volCap *csi.VolumeCapability) error {
// Snapshot-backed volumes may be used with read-only volume access modes only.
mode := volCap.AccessMode.Mode
if mode != csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY &&
mode != csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY {
return status.Error(codes.InvalidArgument,
"snapshot-backed volume supports only read-only access mode")
}
return nil
}
// NodeStageVolume mounts the volume to a staging path on the node.
func (ns *NodeServer) NodeStageVolume(
ctx context.Context,
@ -140,6 +155,12 @@ func (ns *NodeServer) NodeStageVolume(
}
}
if volOptions.BackingSnapshot {
if err = validateSnapshotBackedVolCapability(req.GetVolumeCapability()); err != nil {
return nil, err
}
}
mnt, err := mounter.New(volOptions)
if err != nil {
log.ErrorLog(ctx, "failed to create mounter for volume %s: %v", volID, err)
@ -192,7 +213,7 @@ func (ns *NodeServer) NodeStageVolume(
log.ErrorLog(ctx, "cephfs: failed to write NodeStageMountinfo for volume %s: %v", volID, err)
// Try to clean node stage mount.
if unmountErr := mounter.UnmountVolume(ctx, stagingTargetPath); unmountErr != nil {
if unmountErr := mounter.UnmountAll(ctx, stagingTargetPath); unmountErr != nil {
log.ErrorLog(ctx, "cephfs: failed to unmount %s in WriteNodeStageMountinfo clean up: %v",
stagingTargetPath, unmountErr)
}
@ -223,7 +244,7 @@ func (*NodeServer) mount(
log.DebugLog(ctx, "cephfs: mounting volume %s with %s", volID, mnt.Name())
readOnly := "ro"
const readOnly = "ro"
if volCap.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY ||
volCap.AccessMode.Mode == csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY {
@ -248,9 +269,112 @@ func (*NodeServer) mount(
return status.Error(codes.Internal, err.Error())
}
defer func() {
if err == nil {
return
}
unmountErr := mounter.UnmountAll(ctx, stagingTargetPath)
if unmountErr != nil {
log.ErrorLog(ctx, "failed to clean up mounts in rollback procedure: %v", unmountErr)
}
}()
if volOptions.BackingSnapshot {
snapshotRoot, err := getBackingSnapshotRoot(ctx, volOptions, stagingTargetPath)
if err != nil {
return err
}
absoluteSnapshotRoot := path.Join(stagingTargetPath, snapshotRoot)
err = mounter.BindMount(
ctx,
absoluteSnapshotRoot,
stagingTargetPath,
true,
[]string{"bind", "_netdev"},
)
if err != nil {
log.ErrorLog(ctx,
"failed to bind mount snapshot root %s: %v", absoluteSnapshotRoot, err)
return status.Error(codes.Internal, err.Error())
}
}
return nil
}
func getBackingSnapshotRoot(
ctx context.Context,
volOptions *store.VolumeOptions,
stagingTargetPath string,
) (string, error) {
if volOptions.ProvisionVolume {
// Provisioned snapshot-backed volumes should have their BackingSnapshotRoot
// already populated.
return volOptions.BackingSnapshotRoot, nil
}
// Pre-provisioned snapshot-backed volumes are more involved:
//
// Snapshots created with `ceph fs subvolume snapshot create` have following
// snap directory name format inside <root path>/.snap:
//
// _<snapshot>_<snapshot inode number>
//
// We don't know what <snapshot inode number> is, and so <root path>/.snap
// needs to be traversed in order to determine the full snapshot directory name.
snapshotsBase := path.Join(stagingTargetPath, ".snap")
dir, err := os.Open(snapshotsBase)
if err != nil {
log.ErrorLog(ctx, "failed to open %s when searching for snapshot root: %v", snapshotsBase, err)
return "", status.Errorf(codes.Internal, err.Error())
}
// Read the contents of <root path>/.snap directory into a string slice.
contents, err := dir.Readdirnames(0)
if err != nil {
log.ErrorLog(ctx, "failed to read %s when searching for snapshot root: %v", snapshotsBase, err)
return "", status.Errorf(codes.Internal, err.Error())
}
var (
found bool
snapshotDirName string
)
// Look through the directory's contents and try to find the correct snapshot
// dir name. The search must be exhaustive to catch possible ambiguous results.
for i := range contents {
if !strings.Contains(contents[i], volOptions.BackingSnapshotID) {
continue
}
if !found {
found = true
snapshotDirName = contents[i]
} else {
return "", status.Errorf(codes.InvalidArgument, "ambiguous backingSnapshotID %s in %s",
volOptions.BackingSnapshotID, snapshotsBase)
}
}
if !found {
return "", status.Errorf(codes.InvalidArgument, "no snapshot with backingSnapshotID %s found in %s",
volOptions.BackingSnapshotID, snapshotsBase)
}
return path.Join(".snap", snapshotDirName), nil
}
// NodePublishVolume mounts the volume mounted to the staging path to the target
// path.
func (ns *NodeServer) NodePublishVolume(
@ -444,7 +568,7 @@ func (ns *NodeServer) NodeUnstageVolume(
return &csi.NodeUnstageVolumeResponse{}, nil
}
// Unmount the volume
if err = mounter.UnmountVolume(ctx, stagingTargetPath); err != nil {
if err = mounter.UnmountAll(ctx, stagingTargetPath); err != nil {
return nil, status.Error(codes.Internal, err.Error())
}

View File

@ -0,0 +1,168 @@
/*
Copyright 2022 The Ceph-CSI Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package store
import (
"context"
"fmt"
fsutil "github.com/ceph/ceph-csi/internal/cephfs/util"
"github.com/ceph/ceph-csi/internal/util/log"
"github.com/ceph/ceph-csi/internal/util/reftracker"
"github.com/ceph/ceph-csi/internal/util/reftracker/radoswrapper"
"github.com/ceph/ceph-csi/internal/util/reftracker/reftype"
)
func fmtBackingSnapshotReftrackerName(backingSnapID string) string {
return fmt.Sprintf("rt-backingsnapshot-%s", backingSnapID)
}
func AddSnapshotBackedVolumeRef(
ctx context.Context,
volOptions *VolumeOptions,
) error {
ioctx, err := volOptions.conn.GetIoctx(volOptions.MetadataPool)
if err != nil {
log.ErrorLog(ctx, "failed to create RADOS ioctx: %s", err)
return err
}
defer ioctx.Destroy()
ioctx.SetNamespace(fsutil.RadosNamespace)
var (
backingSnapID = volOptions.BackingSnapshotID
ioctxW = radoswrapper.NewIOContext(ioctx)
)
created, err := reftracker.Add(
ioctxW,
fmtBackingSnapshotReftrackerName(backingSnapID),
map[string]struct{}{
backingSnapID: {},
volOptions.VolID: {},
},
)
if err != nil {
log.ErrorLog(ctx, "failed to add refs for backing snapshot %s: %v",
backingSnapID, err)
return err
}
defer func() {
if err == nil {
return
}
// Clean up after failure.
var deleted bool
deleted, err = reftracker.Remove(
ioctxW,
fmtBackingSnapshotReftrackerName(backingSnapID),
map[string]reftype.RefType{
backingSnapID: reftype.Normal,
volOptions.VolID: reftype.Normal,
},
)
if err != nil {
log.ErrorLog(ctx, "failed to remove refs in cleanup procedure for backing snapshot %s: %v",
backingSnapID, err)
}
if created && !deleted {
log.ErrorLog(ctx, "orphaned reftracker object %s (pool %s, namespace %s)",
backingSnapID, volOptions.MetadataPool, fsutil.RadosNamespace)
}
}()
// There may have been a race between adding a ref to the reftracker and
// deleting the backing snapshot. Make sure the snapshot still exists by
// trying to retrieve it again.
_, _, _, err = NewSnapshotOptionsFromID(ctx, volOptions.BackingSnapshotID, volOptions.conn.Creds)
if err != nil {
log.ErrorLog(ctx, "failed to get backing snapshot %s: %v", volOptions.BackingSnapshotID, err)
}
return err
}
func UnrefSnapshotBackedVolume(
ctx context.Context,
volOptions *VolumeOptions,
) (bool, error) {
ioctx, err := volOptions.conn.GetIoctx(volOptions.MetadataPool)
if err != nil {
log.ErrorLog(ctx, "failed to create RADOS ioctx: %s", err)
return false, err
}
defer ioctx.Destroy()
ioctx.SetNamespace(fsutil.RadosNamespace)
var (
backingSnapID = volOptions.BackingSnapshotID
ioctxW = radoswrapper.NewIOContext(ioctx)
)
deleted, err := reftracker.Remove(
ioctxW,
fmtBackingSnapshotReftrackerName(backingSnapID),
map[string]reftype.RefType{
volOptions.VolID: reftype.Normal,
},
)
if err != nil {
log.ErrorLog(ctx, "failed to remove refs for backing snapshot %s: %v",
backingSnapID, err)
return false, err
}
return deleted, err
}
// UnrefSelfInSnapshotBackedVolumes removes (masks) snapshot ID in the
// reftracker for volumes backed by this snapshot. The returned boolean
// value signals whether the snapshot is not referenced by any such volumes
// and needs to be removed.
func UnrefSelfInSnapshotBackedVolumes(
ctx context.Context,
snapParentVolOptions *VolumeOptions,
snapshotID string,
) (bool, error) {
ioctx, err := snapParentVolOptions.conn.GetIoctx(snapParentVolOptions.MetadataPool)
if err != nil {
log.ErrorLog(ctx, "failed to create RADOS ioctx: %s", err)
return false, err
}
defer ioctx.Destroy()
ioctx.SetNamespace(fsutil.RadosNamespace)
return reftracker.Remove(
radoswrapper.NewIOContext(ioctx),
fmtBackingSnapshotReftrackerName(snapshotID),
map[string]reftype.RefType{
snapshotID: reftype.Mask,
},
)
}

View File

@ -20,6 +20,7 @@ import (
"context"
"errors"
"fmt"
"path"
"strconv"
"strings"
@ -34,27 +35,31 @@ import (
type VolumeOptions struct {
core.SubVolume
TopologyPools *[]util.TopologyConstrainedPool
TopologyRequirement *csi.TopologyRequirement
Topology map[string]string
RequestName string
NamePrefix string
ClusterID string
FscID int64
MetadataPool string
RequestName string
NamePrefix string
ClusterID string
MetadataPool string
// ReservedID represents the ID reserved for a subvolume
ReservedID string
Monitors string `json:"monitors"`
RootPath string `json:"rootPath"`
Mounter string `json:"mounter"`
ProvisionVolume bool `json:"provisionVolume"`
KernelMountOptions string `json:"kernelMountOptions"`
FuseMountOptions string `json:"fuseMountOptions"`
// Network namespace file path to execute nsenter command
ReservedID string
Monitors string `json:"monitors"`
RootPath string `json:"rootPath"`
Mounter string `json:"mounter"`
BackingSnapshotRoot string // Snapshot root relative to RootPath.
BackingSnapshotID string
KernelMountOptions string `json:"kernelMountOptions"`
FuseMountOptions string `json:"fuseMountOptions"`
NetNamespaceFilePath string
TopologyPools *[]util.TopologyConstrainedPool
TopologyRequirement *csi.TopologyRequirement
Topology map[string]string
FscID int64
// conn is a connection to the Ceph cluster obtained from a ConnPool
conn *util.ClusterConnection
ProvisionVolume bool `json:"provisionVolume"`
BackingSnapshot bool `json:"backingSnapshot"`
}
// Connect a CephFS volume to the Ceph cluster.
@ -184,14 +189,20 @@ func (vo *VolumeOptions) GetConnection() *util.ClusterConnection {
return vo.conn
}
func fmtBackingSnapshotOptionMismatch(optName, expected, actual string) error {
return fmt.Errorf("%s option mismatch with backing snapshot: got %s, expected %s",
optName, actual, expected)
}
// NewVolumeOptions generates a new instance of volumeOptions from the provided
// CSI request parameters.
func NewVolumeOptions(ctx context.Context, requestName string, req *csi.CreateVolumeRequest,
cr *util.Credentials,
) (*VolumeOptions, error) {
var (
opts VolumeOptions
err error
opts VolumeOptions
backingSnapshotBool string
err error
)
volOptions := req.GetParameters()
@ -228,6 +239,16 @@ func NewVolumeOptions(ctx context.Context, requestName string, req *csi.CreateVo
return nil, err
}
if err = extractOptionalOption(&backingSnapshotBool, "backingSnapshot", volOptions); err != nil {
return nil, err
}
if backingSnapshotBool != "" {
if opts.BackingSnapshot, err = strconv.ParseBool(backingSnapshotBool); err != nil {
return nil, fmt.Errorf("failed to parse backingSnapshot: %w", err)
}
}
opts.RequestName = requestName
err = opts.Connect(cr)
@ -261,6 +282,19 @@ func NewVolumeOptions(ctx context.Context, requestName string, req *csi.CreateVo
opts.ProvisionVolume = true
if opts.BackingSnapshot {
if req.GetVolumeContentSource() == nil || req.GetVolumeContentSource().GetSnapshot() == nil {
return nil, errors.New("backingSnapshot option requires snapshot volume source")
}
opts.BackingSnapshotID = req.GetVolumeContentSource().GetSnapshot().GetSnapshotId()
err = opts.populateVolumeOptionsFromBackingSnapshot(ctx, cr)
if err != nil {
return nil, err
}
}
return &opts, nil
}
@ -364,23 +398,109 @@ func NewVolumeOptionsFromVolID(
}
}
volOptions.ProvisionVolume = true
volOptions.SubVolume.VolID = vid.FsSubvolName
vol := core.NewSubVolume(volOptions.conn, &volOptions.SubVolume, volOptions.ClusterID)
info, err := vol.GetSubVolumeInfo(ctx)
if err == nil {
volOptions.RootPath = info.Path
volOptions.Features = info.Features
volOptions.Size = info.BytesQuota
if imageAttributes.BackingSnapshotID != "" || volOptions.BackingSnapshotID != "" {
volOptions.BackingSnapshot = true
volOptions.BackingSnapshotID = imageAttributes.BackingSnapshotID
}
if errors.Is(err, cerrors.ErrInvalidCommand) {
volOptions.RootPath, err = vol.GetVolumeRootPathCeph(ctx)
volOptions.ProvisionVolume = true
volOptions.SubVolume.VolID = vid.FsSubvolName
if volOptions.BackingSnapshot {
err = volOptions.populateVolumeOptionsFromBackingSnapshot(ctx, cr)
} else {
err = volOptions.populateVolumeOptionsFromSubvolume(ctx)
}
return &volOptions, &vid, err
}
func (vo *VolumeOptions) populateVolumeOptionsFromSubvolume(ctx context.Context) error {
vol := core.NewSubVolume(vo.conn, &vo.SubVolume, vo.ClusterID)
var info *core.Subvolume
info, err := vol.GetSubVolumeInfo(ctx)
if err == nil {
vo.RootPath = info.Path
vo.Features = info.Features
vo.Size = info.BytesQuota
}
if errors.Is(err, cerrors.ErrInvalidCommand) {
vo.RootPath, err = vol.GetVolumeRootPathCeph(ctx)
}
return err
}
func (vo *VolumeOptions) populateVolumeOptionsFromBackingSnapshot(
ctx context.Context,
cr *util.Credentials,
) error {
// As of CephFS snapshot v2 API, snapshots may be found in two locations:
//
// (a) /volumes/<volume group>/<subvolume>/.snap/<snapshot>/<UUID>
// (b) /volumes/<volume group>/<subvolume>/<UUID>/.snap/_<snapshot>_<snapshot inode number>
if !vo.ProvisionVolume {
// Case (b)
//
// If the volume is not provisioned by us, we assume that we have access only
// to snapshot's parent volume root. In this case, o.RootPath is expected to
// be already set in the volume context.
// BackingSnapshotRoot cannot be determined at this stage, because the
// full directory name is not known (see snapshot path format for case
// (b) above). RootPath/.snap must be traversed in order to find out
// the snapshot directory name.
return nil
}
parentBackingSnapVolOpts, _, snapID, err := NewSnapshotOptionsFromID(ctx, vo.BackingSnapshotID, cr)
if err != nil {
return fmt.Errorf("failed to retrieve backing snapshot %s: %w", vo.BackingSnapshotID, err)
}
// Ensure that backing snapshot parent's volume options match the context.
// Snapshot-backed volume inherits all its parent's (parent of the snapshot) options.
if vo.ClusterID != parentBackingSnapVolOpts.ClusterID {
return fmtBackingSnapshotOptionMismatch("clusterID", vo.ClusterID, parentBackingSnapVolOpts.ClusterID)
}
if vo.Pool != "" {
return errors.New("cannot set pool for snapshot-backed volume")
}
if vo.MetadataPool != parentBackingSnapVolOpts.MetadataPool {
return fmtBackingSnapshotOptionMismatch("MetadataPool", vo.MetadataPool, parentBackingSnapVolOpts.MetadataPool)
}
if vo.FsName != parentBackingSnapVolOpts.FsName {
return fmtBackingSnapshotOptionMismatch("fsName", vo.FsName, parentBackingSnapVolOpts.FsName)
}
if vo.SubvolumeGroup != parentBackingSnapVolOpts.SubvolumeGroup {
return fmtBackingSnapshotOptionMismatch("SubvolumeGroup", vo.SubvolumeGroup, parentBackingSnapVolOpts.SubvolumeGroup)
}
vo.Features = parentBackingSnapVolOpts.Features
vo.Size = parentBackingSnapVolOpts.Size
// For case (a) (o.ProvisionVolume==true is assumed), snapshot root path
// can be built out of subvolume root path, which is in following format:
//
// /volumes/<volume group>/<subvolume>/<subvolume UUID>
subvolRoot, subvolUUID := path.Split(parentBackingSnapVolOpts.RootPath)
vo.RootPath = subvolRoot
vo.BackingSnapshotRoot = path.Join(".snap", snapID.FsSnapshotName, subvolUUID)
return nil
}
// NewVolumeOptionsFromMonitorList generates a new instance of VolumeOptions and
// VolumeIdentifier from the provided CSI volume context.
func NewVolumeOptionsFromMonitorList(
@ -438,9 +558,17 @@ func NewVolumeOptionsFromMonitorList(
return nil, nil, err
}
if err = extractOptionalOption(&opts.BackingSnapshotID, "backingSnapshotID", options); err != nil {
return nil, nil, err
}
vid.FsSubvolName = volID
vid.VolumeID = volID
if opts.BackingSnapshotID != "" {
opts.BackingSnapshot = true
}
return &opts, &vid, nil
}
@ -511,6 +639,10 @@ func NewVolumeOptionsFromStaticVolume(
vid.FsSubvolName = opts.RootPath
vid.VolumeID = volID
if opts.BackingSnapshotID != "" {
opts.BackingSnapshot = true
}
return &opts, &vid, nil
}
@ -599,6 +731,7 @@ func NewSnapshotOptionsFromID(
}
volOptions.Features = subvolInfo.Features
volOptions.Size = subvolInfo.BytesQuota
volOptions.RootPath = subvolInfo.Path
snap := core.NewSnapshot(volOptions.conn, sid.FsSnapshotName, &volOptions.SubVolume)
info, err := snap.GetSnapshotInfo(ctx)
if err != nil {

View File

@ -152,7 +152,7 @@ type Config struct {
// ownerKey is used to identify the owner of the volume, can be used with some KMS configurations
ownerKey string
// backingSnapshotIDKey is snapshot ID on which a shallow CephFS volume is based.
// backingSnapshotIDKey ID of the snapshot on which the CephFS snapshot-backed volume is based
backingSnapshotIDKey string
// commonPrefix is the prefix common to all omap keys for this Config
@ -532,7 +532,7 @@ Input arguments:
- kmsConf: Name of the key management service used to encrypt the image (optional)
- volUUID: UUID need to be reserved instead of auto-generating one (this is useful for mirroring and metro-DR)
- owner: the owner of the volume (optional)
- backingSnapshotID: (optional)
- backingSnapshotID: ID of the snapshot on which the CephFS snapshot-backed volume is based (optional)
Return values:
- string: Contains the UUID that was reserved for the passed in reqName
@ -645,7 +645,7 @@ func (conn *Connection) ReserveName(ctx context.Context,
omapValues[cj.cephSnapSourceKey] = parentName
}
// Update backing snapshot ID for shallow CephFS volume
// Update backing snapshot ID for snapshot-backed CephFS volume
if backingSnapshotID != "" {
omapValues[cj.backingSnapshotIDKey] = backingSnapshotID
}
@ -667,7 +667,7 @@ type ImageAttributes struct {
Owner string // Contains the owner to be used in combination with KmsID (for some KMS)
ImageID string // Contains the image id
JournalPoolID int64 // Pool ID of the CSI journal pool, stored in big endian format (on-disk data)
BackingSnapshotID string // ID of the backing snapshot of a shallow CephFS volume
BackingSnapshotID string // ID of the snapshot on which the CephFS snapshot-backed volume is based
}
// GetImageAttributes fetches all keys and their values, from a UUID directory, returning ImageAttributes structure.