cephfs: added support for snapshot-backed volumes

This commit implements most of
docs/design/proposals/cephfs-snapshot-shallow-ro-vol.md design document;
specifically (de-)provisioning of snapshot-backed volumes, mounting such
volumes as well as mounting pre-provisioned snapshot-backed volumes.

Signed-off-by: Robert Vasek <robert.vasek@cern.ch>
This commit is contained in:
Robert Vasek 2022-04-06 15:26:07 +02:00 committed by mergify[bot]
parent 0807fd2e6c
commit fd7559a903
7 changed files with 710 additions and 107 deletions

View File

@ -29,6 +29,7 @@ import (
"github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util"
"github.com/ceph/ceph-csi/internal/util/k8s" "github.com/ceph/ceph-csi/internal/util/k8s"
"github.com/ceph/ceph-csi/internal/util/log" "github.com/ceph/ceph-csi/internal/util/log"
rterrors "github.com/ceph/ceph-csi/internal/util/reftracker/errors"
"github.com/container-storage-interface/spec/lib/go/csi" "github.com/container-storage-interface/spec/lib/go/csi"
"github.com/golang/protobuf/ptypes/timestamp" "github.com/golang/protobuf/ptypes/timestamp"
@ -65,47 +66,77 @@ func (cs *ControllerServer) createBackingVolume(
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
if sID != nil { if sID != nil {
if err = cs.OperationLocks.GetRestoreLock(sID.SnapshotID); err != nil { return cs.createBackingVolumeFromSnapshotSource(ctx, volOptions, parentVolOpt, volClient, sID)
log.ErrorLog(ctx, err.Error())
return status.Error(codes.Aborted, err.Error())
}
defer cs.OperationLocks.ReleaseRestoreLock(sID.SnapshotID)
snap := core.Snapshot{
SnapshotID: sID.FsSnapshotName,
SubVolume: &parentVolOpt.SubVolume,
}
err = volClient.CreateCloneFromSnapshot(ctx, snap)
if err != nil {
log.ErrorLog(ctx, "failed to create clone from snapshot %s: %v", sID.FsSnapshotName, err)
return err
}
return err
} }
if parentVolOpt != nil {
if err = cs.OperationLocks.GetCloneLock(pvID.VolumeID); err != nil {
log.ErrorLog(ctx, err.Error())
return status.Error(codes.Aborted, err.Error()) if parentVolOpt != nil {
} return cs.createBackingVolumeFromVolumeSource(ctx, parentVolOpt, volClient, pvID)
defer cs.OperationLocks.ReleaseCloneLock(pvID.VolumeID) }
err = volClient.CreateCloneFromSubvolume(
ctx, &parentVolOpt.SubVolume) if err = volClient.CreateVolume(ctx); err != nil {
if err != nil { log.ErrorLog(ctx, "failed to create volume %s: %v", volOptions.RequestName, err)
log.ErrorLog(ctx, "failed to create clone from subvolume %s: %v", fsutil.VolumeID(pvID.FsSubvolName), err)
return status.Error(codes.Internal, err.Error())
}
return nil
}
func (cs *ControllerServer) createBackingVolumeFromSnapshotSource(
ctx context.Context,
volOptions *store.VolumeOptions,
parentVolOpt *store.VolumeOptions,
volClient core.SubVolumeClient,
sID *store.SnapshotIdentifier,
) error {
if err := cs.OperationLocks.GetRestoreLock(sID.SnapshotID); err != nil {
log.ErrorLog(ctx, err.Error())
return status.Error(codes.Aborted, err.Error())
}
defer cs.OperationLocks.ReleaseRestoreLock(sID.SnapshotID)
if volOptions.BackingSnapshot {
if err := store.AddSnapshotBackedVolumeRef(ctx, volOptions); err != nil {
log.ErrorLog(ctx, "failed to create snapshot-backed volume from snapshot %s: %v",
sID.FsSnapshotName, err)
return err return err
} }
return nil return nil
} }
if err = volClient.CreateVolume(ctx); err != nil {
log.ErrorLog(ctx, "failed to create volume %s: %v", volOptions.RequestName, err)
return status.Error(codes.Internal, err.Error()) err := volClient.CreateCloneFromSnapshot(ctx, core.Snapshot{
SnapshotID: sID.FsSnapshotName,
SubVolume: &parentVolOpt.SubVolume,
})
if err != nil {
log.ErrorLog(ctx, "failed to create clone from snapshot %s: %v", sID.FsSnapshotName, err)
return err
}
return nil
}
func (cs *ControllerServer) createBackingVolumeFromVolumeSource(
ctx context.Context,
parentVolOpt *store.VolumeOptions,
volClient core.SubVolumeClient,
pvID *store.VolumeIdentifier,
) error {
if err := cs.OperationLocks.GetCloneLock(pvID.VolumeID); err != nil {
log.ErrorLog(ctx, err.Error())
return status.Error(codes.Aborted, err.Error())
}
defer cs.OperationLocks.ReleaseCloneLock(pvID.VolumeID)
if err := volClient.CreateCloneFromSubvolume(ctx, &parentVolOpt.SubVolume); err != nil {
log.ErrorLog(ctx, "failed to create clone from subvolume %s: %v", fsutil.VolumeID(pvID.FsSubvolName), err)
return err
} }
return nil return nil
@ -158,6 +189,7 @@ func checkValidCreateVolumeRequest(
parentVol *store.VolumeOptions, parentVol *store.VolumeOptions,
pvID *store.VolumeIdentifier, pvID *store.VolumeIdentifier,
sID *store.SnapshotIdentifier, sID *store.SnapshotIdentifier,
req *csi.CreateVolumeRequest,
) error { ) error {
switch { switch {
case pvID != nil: case pvID != nil:
@ -168,6 +200,10 @@ func checkValidCreateVolumeRequest(
parentVol.Size, parentVol.Size,
vol.Size) vol.Size)
} }
if vol.BackingSnapshot {
return errors.New("cloning snapshot-backed volumes is currently not supported")
}
case sID != nil: case sID != nil:
if vol.Size < parentVol.Size { if vol.Size < parentVol.Size {
return fmt.Errorf( return fmt.Errorf(
@ -176,6 +212,25 @@ func checkValidCreateVolumeRequest(
parentVol.Size, parentVol.Size,
vol.Size) vol.Size)
} }
if vol.BackingSnapshot {
if vol.Size != parentVol.Size {
return fmt.Errorf(
"cannot create snapshot-backed volume of different size: expected %d bytes, got %d bytes",
parentVol.Size,
vol.Size,
)
}
volCaps := req.GetVolumeCapabilities()
for _, volCap := range volCaps {
mode := volCap.AccessMode.Mode
if mode != csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY &&
mode != csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY {
return errors.New("backingSnapshot may be used only with read-only access modes")
}
}
}
} }
return nil return nil
@ -233,7 +288,7 @@ func (cs *ControllerServer) CreateVolume(
defer parentVol.Destroy() defer parentVol.Destroy()
} }
err = checkValidCreateVolumeRequest(volOptions, parentVol, pvID, sID) err = checkValidCreateVolumeRequest(volOptions, parentVol, pvID, sID, req)
if err != nil { if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error()) return nil, status.Error(codes.InvalidArgument, err.Error())
} }
@ -249,7 +304,7 @@ func (cs *ControllerServer) CreateVolume(
// TODO return error message if requested vol size greater than found volume return error // TODO return error message if requested vol size greater than found volume return error
if vID != nil { if vID != nil {
if sID != nil || pvID != nil { if sID != nil || pvID != nil && !volOptions.BackingSnapshot {
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
err = volClient.ExpandVolume(ctx, volOptions.Size) err = volClient.ExpandVolume(ctx, volOptions.Size)
if err != nil { if err != nil {
@ -321,26 +376,32 @@ func (cs *ControllerServer) CreateVolume(
return nil, err return nil, err
} }
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) if !volOptions.BackingSnapshot {
volOptions.RootPath, err = volClient.GetVolumeRootPathCeph(ctx) // Get root path for the created subvolume.
if err != nil { // Note that root path for snapshot-backed volumes has been already set when
purgeErr := volClient.PurgeVolume(ctx, true) // building VolumeOptions.
if purgeErr != nil {
log.ErrorLog(ctx, "failed to delete volume %s: %v", vID.FsSubvolName, purgeErr)
// All errors other than ErrVolumeNotFound should return an error back to the caller
if !errors.Is(purgeErr, cerrors.ErrVolumeNotFound) {
// If the subvolume deletion is failed, we should not cleanup
// the OMAP entry it will stale subvolume in cluster.
// set err=nil so that when we get the request again we can get
// the subvolume info.
err = nil
return nil, status.Error(codes.Internal, purgeErr.Error()) volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
volOptions.RootPath, err = volClient.GetVolumeRootPathCeph(ctx)
if err != nil {
purgeErr := volClient.PurgeVolume(ctx, true)
if purgeErr != nil {
log.ErrorLog(ctx, "failed to delete volume %s: %v", vID.FsSubvolName, purgeErr)
// All errors other than ErrVolumeNotFound should return an error back to the caller
if !errors.Is(purgeErr, cerrors.ErrVolumeNotFound) {
// If the subvolume deletion is failed, we should not cleanup
// the OMAP entry it will stale subvolume in cluster.
// set err=nil so that when we get the request again we can get
// the subvolume info.
err = nil
return nil, status.Error(codes.Internal, purgeErr.Error())
}
} }
} log.ErrorLog(ctx, "failed to get subvolume path %s: %v", vID.FsSubvolName, err)
log.ErrorLog(ctx, "failed to get subvolume path %s: %v", vID.FsSubvolName, err)
return nil, status.Error(codes.Internal, err.Error()) return nil, status.Error(codes.Internal, err.Error())
}
} }
log.DebugLog(ctx, "cephfs: successfully created backing volume named %s for request name %s", log.DebugLog(ctx, "cephfs: successfully created backing volume named %s for request name %s",
@ -452,16 +513,8 @@ func (cs *ControllerServer) DeleteVolume(
} }
defer cr.DeleteCredentials() defer cr.DeleteCredentials()
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) if err := cleanUpBackingVolume(ctx, volOptions, vID, cr); err != nil {
if err = volClient.PurgeVolume(ctx, false); err != nil { return nil, err
log.ErrorLog(ctx, "failed to delete volume %s: %v", volID, err)
if errors.Is(err, cerrors.ErrVolumeHasSnapshots) {
return nil, status.Error(codes.FailedPrecondition, err.Error())
}
if !errors.Is(err, cerrors.ErrVolumeNotFound) {
return nil, status.Error(codes.Internal, err.Error())
}
} }
if err := store.UndoVolReservation(ctx, volOptions, *vID, secrets); err != nil { if err := store.UndoVolReservation(ctx, volOptions, *vID, secrets); err != nil {
@ -473,6 +526,84 @@ func (cs *ControllerServer) DeleteVolume(
return &csi.DeleteVolumeResponse{}, nil return &csi.DeleteVolumeResponse{}, nil
} }
func cleanUpBackingVolume(
ctx context.Context,
volOptions *store.VolumeOptions,
volID *store.VolumeIdentifier,
cr *util.Credentials,
) error {
if !volOptions.BackingSnapshot {
// Regular volumes need to be purged.
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
if err := volClient.PurgeVolume(ctx, false); err != nil {
log.ErrorLog(ctx, "failed to delete volume %s: %v", volID, err)
if errors.Is(err, cerrors.ErrVolumeHasSnapshots) {
return status.Error(codes.FailedPrecondition, err.Error())
}
if !errors.Is(err, cerrors.ErrVolumeNotFound) {
return status.Error(codes.Internal, err.Error())
}
}
return nil
}
// Snapshot-backed volumes need to un-reference the backing snapshot, and
// the snapshot itself may need to be deleted if its reftracker doesn't
// hold any references anymore.
backingSnapNeedsDelete, err := store.UnrefSnapshotBackedVolume(ctx, volOptions)
if err != nil {
if errors.Is(err, rterrors.ErrObjectOutOfDate) {
return status.Error(codes.Aborted, err.Error())
}
return status.Error(codes.Internal, err.Error())
}
if !backingSnapNeedsDelete {
return nil
}
snapParentVolOptions, _, snapID, err := store.NewSnapshotOptionsFromID(ctx, volOptions.BackingSnapshotID, cr)
if err != nil {
absorbErrs := []error{
util.ErrPoolNotFound,
util.ErrKeyNotFound,
cerrors.ErrSnapNotFound,
cerrors.ErrVolumeNotFound,
}
fatalErr := true
for i := range absorbErrs {
if errors.Is(err, absorbErrs[i]) {
fatalErr = false
break
}
}
if fatalErr {
return status.Error(codes.Internal, err.Error())
}
} else {
snapClient := core.NewSnapshot(
snapParentVolOptions.GetConnection(),
snapID.FsSnapshotName,
&snapParentVolOptions.SubVolume,
)
err = deleteSnapshotAndUndoReservation(ctx, snapClient, snapParentVolOptions, snapID, cr)
if err != nil {
return status.Error(codes.Internal, err.Error())
}
}
return nil
}
// ValidateVolumeCapabilities checks whether the volume capabilities requested // ValidateVolumeCapabilities checks whether the volume capabilities requested
// are supported. // are supported.
func (cs *ControllerServer) ValidateVolumeCapabilities( func (cs *ControllerServer) ValidateVolumeCapabilities(
@ -537,6 +668,10 @@ func (cs *ControllerServer) ControllerExpandVolume(
} }
defer volOptions.Destroy() defer volOptions.Destroy()
if volOptions.BackingSnapshot {
return nil, status.Error(codes.InvalidArgument, "cannot expand snapshot-backed volume")
}
RoundOffSize := util.RoundOffBytes(req.GetCapacityRange().GetRequiredBytes()) RoundOffSize := util.RoundOffBytes(req.GetCapacityRange().GetRequiredBytes())
volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID)
if err = volClient.ResizeVolume(ctx, RoundOffSize); err != nil { if err = volClient.ResizeVolume(ctx, RoundOffSize); err != nil {
@ -615,6 +750,10 @@ func (cs *ControllerServer) CreateSnapshot(
parentVolOptions.ClusterID) parentVolOptions.ClusterID)
} }
if parentVolOptions.BackingSnapshot {
return nil, status.Error(codes.InvalidArgument, "cannot snapshot a snapshot-backed volume")
}
cephfsSnap, genSnapErr := store.GenSnapFromOptions(ctx, req) cephfsSnap, genSnapErr := store.GenSnapFromOptions(ctx, req)
if genSnapErr != nil { if genSnapErr != nil {
return nil, status.Error(codes.Internal, genSnapErr.Error()) return nil, status.Error(codes.Internal, genSnapErr.Error())
@ -780,6 +919,7 @@ func (cs *ControllerServer) validateSnapshotReq(ctx context.Context, req *csi.Cr
// DeleteSnapshot deletes the snapshot in backend and removes the // DeleteSnapshot deletes the snapshot in backend and removes the
// snapshot metadata from store. // snapshot metadata from store.
// nolint:gocyclo,cyclop // TODO: reduce complexity
func (cs *ControllerServer) DeleteSnapshot( func (cs *ControllerServer) DeleteSnapshot(
ctx context.Context, ctx context.Context,
req *csi.DeleteSnapshotRequest, req *csi.DeleteSnapshotRequest,
@ -878,17 +1018,51 @@ func (cs *ControllerServer) DeleteSnapshot(
return nil, status.Error(codes.Internal, err.Error()) return nil, status.Error(codes.Internal, err.Error())
} }
} }
err = snapClient.DeleteSnapshot(ctx)
needsDelete, err := store.UnrefSelfInSnapshotBackedVolumes(ctx, volOpt, sid.SnapshotID)
if err != nil { if err != nil {
return nil, status.Error(codes.Internal, err.Error()) if errors.Is(err, rterrors.ErrObjectOutOfDate) {
} return nil, status.Error(codes.Aborted, err.Error())
err = store.UndoSnapReservation(ctx, volOpt, *sid, sid.RequestName, cr) }
if err != nil {
log.ErrorLog(ctx, "failed to remove reservation for snapname (%s) with backing snap (%s) (%s)",
sid.RequestName, sid.FsSnapshotName, err)
return nil, status.Error(codes.Internal, err.Error()) return nil, status.Error(codes.Internal, err.Error())
} }
if needsDelete {
err = deleteSnapshotAndUndoReservation(
ctx,
snapClient,
volOpt,
sid,
cr,
)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
}
return &csi.DeleteSnapshotResponse{}, nil return &csi.DeleteSnapshotResponse{}, nil
} }
func deleteSnapshotAndUndoReservation(
ctx context.Context,
snapClient core.SnapshotClient,
parentVolOptions *store.VolumeOptions,
snapID *store.SnapshotIdentifier,
cr *util.Credentials,
) error {
err := snapClient.DeleteSnapshot(ctx)
if err != nil {
return err
}
err = store.UndoSnapReservation(ctx, parentVolOptions, *snapID, snapID.RequestName, cr)
if err != nil {
log.ErrorLog(ctx, "failed to remove reservation for snapname (%s) with backing snap (%s) (%s)",
snapID.RequestName, snapID.RequestName, err)
return err
}
return nil
}

View File

@ -192,7 +192,7 @@ func (ns *NodeServer) tryRestoreFuseMountsInNodePublish(
// Unmount and mount the volume. // Unmount and mount the volume.
if stagingTargetMs != msMounted { if stagingTargetMs != msMounted {
if err := mounter.UnmountVolume(ctx, stagingTargetPath); err != nil { if err := mounter.UnmountAll(ctx, stagingTargetPath); err != nil {
return err return err
} }
@ -269,5 +269,5 @@ func (ns *NodeServer) tryRestoreFuseMountInNodeStage(
// Restoration here means only unmounting the volume. // Restoration here means only unmounting the volume.
// NodeStageVolume should take care of the rest. // NodeStageVolume should take care of the rest.
return mounter.UnmountVolume(ctx, stagingTargetPath) return mounter.UnmountAll(ctx, stagingTargetPath)
} }

View File

@ -118,8 +118,8 @@ func (m *FuseMounter) Mount(
func (m *FuseMounter) Name() string { return "Ceph FUSE driver" } func (m *FuseMounter) Name() string { return "Ceph FUSE driver" }
func UnmountVolume(ctx context.Context, mountPoint string) error { func UnmountVolume(ctx context.Context, mountPoint string, opts ...string) error {
if _, stderr, err := util.ExecCommand(ctx, "umount", mountPoint); err != nil { if _, stderr, err := util.ExecCommand(ctx, "umount", append([]string{mountPoint}, opts...)...); err != nil {
err = fmt.Errorf("%w stderr: %s", err, stderr) err = fmt.Errorf("%w stderr: %s", err, stderr)
if strings.Contains(err.Error(), fmt.Sprintf("umount: %s: not mounted", mountPoint)) || if strings.Contains(err.Error(), fmt.Sprintf("umount: %s: not mounted", mountPoint)) ||
strings.Contains(err.Error(), "No such file or directory") { strings.Contains(err.Error(), "No such file or directory") {
@ -149,3 +149,7 @@ func UnmountVolume(ctx context.Context, mountPoint string) error {
return nil return nil
} }
func UnmountAll(ctx context.Context, mountPoint string) error {
return UnmountVolume(ctx, mountPoint, "--all-targets")
}

View File

@ -21,6 +21,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"os" "os"
"path"
"strings" "strings"
cerrors "github.com/ceph/ceph-csi/internal/cephfs/errors" cerrors "github.com/ceph/ceph-csi/internal/cephfs/errors"
@ -101,6 +102,20 @@ func (ns *NodeServer) getVolumeOptions(
return volOptions, nil return volOptions, nil
} }
func validateSnapshotBackedVolCapability(volCap *csi.VolumeCapability) error {
// Snapshot-backed volumes may be used with read-only volume access modes only.
mode := volCap.AccessMode.Mode
if mode != csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY &&
mode != csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY {
return status.Error(codes.InvalidArgument,
"snapshot-backed volume supports only read-only access mode")
}
return nil
}
// NodeStageVolume mounts the volume to a staging path on the node. // NodeStageVolume mounts the volume to a staging path on the node.
func (ns *NodeServer) NodeStageVolume( func (ns *NodeServer) NodeStageVolume(
ctx context.Context, ctx context.Context,
@ -140,6 +155,12 @@ func (ns *NodeServer) NodeStageVolume(
} }
} }
if volOptions.BackingSnapshot {
if err = validateSnapshotBackedVolCapability(req.GetVolumeCapability()); err != nil {
return nil, err
}
}
mnt, err := mounter.New(volOptions) mnt, err := mounter.New(volOptions)
if err != nil { if err != nil {
log.ErrorLog(ctx, "failed to create mounter for volume %s: %v", volID, err) log.ErrorLog(ctx, "failed to create mounter for volume %s: %v", volID, err)
@ -192,7 +213,7 @@ func (ns *NodeServer) NodeStageVolume(
log.ErrorLog(ctx, "cephfs: failed to write NodeStageMountinfo for volume %s: %v", volID, err) log.ErrorLog(ctx, "cephfs: failed to write NodeStageMountinfo for volume %s: %v", volID, err)
// Try to clean node stage mount. // Try to clean node stage mount.
if unmountErr := mounter.UnmountVolume(ctx, stagingTargetPath); unmountErr != nil { if unmountErr := mounter.UnmountAll(ctx, stagingTargetPath); unmountErr != nil {
log.ErrorLog(ctx, "cephfs: failed to unmount %s in WriteNodeStageMountinfo clean up: %v", log.ErrorLog(ctx, "cephfs: failed to unmount %s in WriteNodeStageMountinfo clean up: %v",
stagingTargetPath, unmountErr) stagingTargetPath, unmountErr)
} }
@ -223,7 +244,7 @@ func (*NodeServer) mount(
log.DebugLog(ctx, "cephfs: mounting volume %s with %s", volID, mnt.Name()) log.DebugLog(ctx, "cephfs: mounting volume %s with %s", volID, mnt.Name())
readOnly := "ro" const readOnly = "ro"
if volCap.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY || if volCap.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY ||
volCap.AccessMode.Mode == csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY { volCap.AccessMode.Mode == csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY {
@ -248,9 +269,112 @@ func (*NodeServer) mount(
return status.Error(codes.Internal, err.Error()) return status.Error(codes.Internal, err.Error())
} }
defer func() {
if err == nil {
return
}
unmountErr := mounter.UnmountAll(ctx, stagingTargetPath)
if unmountErr != nil {
log.ErrorLog(ctx, "failed to clean up mounts in rollback procedure: %v", unmountErr)
}
}()
if volOptions.BackingSnapshot {
snapshotRoot, err := getBackingSnapshotRoot(ctx, volOptions, stagingTargetPath)
if err != nil {
return err
}
absoluteSnapshotRoot := path.Join(stagingTargetPath, snapshotRoot)
err = mounter.BindMount(
ctx,
absoluteSnapshotRoot,
stagingTargetPath,
true,
[]string{"bind", "_netdev"},
)
if err != nil {
log.ErrorLog(ctx,
"failed to bind mount snapshot root %s: %v", absoluteSnapshotRoot, err)
return status.Error(codes.Internal, err.Error())
}
}
return nil return nil
} }
func getBackingSnapshotRoot(
ctx context.Context,
volOptions *store.VolumeOptions,
stagingTargetPath string,
) (string, error) {
if volOptions.ProvisionVolume {
// Provisioned snapshot-backed volumes should have their BackingSnapshotRoot
// already populated.
return volOptions.BackingSnapshotRoot, nil
}
// Pre-provisioned snapshot-backed volumes are more involved:
//
// Snapshots created with `ceph fs subvolume snapshot create` have following
// snap directory name format inside <root path>/.snap:
//
// _<snapshot>_<snapshot inode number>
//
// We don't know what <snapshot inode number> is, and so <root path>/.snap
// needs to be traversed in order to determine the full snapshot directory name.
snapshotsBase := path.Join(stagingTargetPath, ".snap")
dir, err := os.Open(snapshotsBase)
if err != nil {
log.ErrorLog(ctx, "failed to open %s when searching for snapshot root: %v", snapshotsBase, err)
return "", status.Errorf(codes.Internal, err.Error())
}
// Read the contents of <root path>/.snap directory into a string slice.
contents, err := dir.Readdirnames(0)
if err != nil {
log.ErrorLog(ctx, "failed to read %s when searching for snapshot root: %v", snapshotsBase, err)
return "", status.Errorf(codes.Internal, err.Error())
}
var (
found bool
snapshotDirName string
)
// Look through the directory's contents and try to find the correct snapshot
// dir name. The search must be exhaustive to catch possible ambiguous results.
for i := range contents {
if !strings.Contains(contents[i], volOptions.BackingSnapshotID) {
continue
}
if !found {
found = true
snapshotDirName = contents[i]
} else {
return "", status.Errorf(codes.InvalidArgument, "ambiguous backingSnapshotID %s in %s",
volOptions.BackingSnapshotID, snapshotsBase)
}
}
if !found {
return "", status.Errorf(codes.InvalidArgument, "no snapshot with backingSnapshotID %s found in %s",
volOptions.BackingSnapshotID, snapshotsBase)
}
return path.Join(".snap", snapshotDirName), nil
}
// NodePublishVolume mounts the volume mounted to the staging path to the target // NodePublishVolume mounts the volume mounted to the staging path to the target
// path. // path.
func (ns *NodeServer) NodePublishVolume( func (ns *NodeServer) NodePublishVolume(
@ -444,7 +568,7 @@ func (ns *NodeServer) NodeUnstageVolume(
return &csi.NodeUnstageVolumeResponse{}, nil return &csi.NodeUnstageVolumeResponse{}, nil
} }
// Unmount the volume // Unmount the volume
if err = mounter.UnmountVolume(ctx, stagingTargetPath); err != nil { if err = mounter.UnmountAll(ctx, stagingTargetPath); err != nil {
return nil, status.Error(codes.Internal, err.Error()) return nil, status.Error(codes.Internal, err.Error())
} }

View File

@ -0,0 +1,168 @@
/*
Copyright 2022 The Ceph-CSI Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package store
import (
"context"
"fmt"
fsutil "github.com/ceph/ceph-csi/internal/cephfs/util"
"github.com/ceph/ceph-csi/internal/util/log"
"github.com/ceph/ceph-csi/internal/util/reftracker"
"github.com/ceph/ceph-csi/internal/util/reftracker/radoswrapper"
"github.com/ceph/ceph-csi/internal/util/reftracker/reftype"
)
func fmtBackingSnapshotReftrackerName(backingSnapID string) string {
return fmt.Sprintf("rt-backingsnapshot-%s", backingSnapID)
}
func AddSnapshotBackedVolumeRef(
ctx context.Context,
volOptions *VolumeOptions,
) error {
ioctx, err := volOptions.conn.GetIoctx(volOptions.MetadataPool)
if err != nil {
log.ErrorLog(ctx, "failed to create RADOS ioctx: %s", err)
return err
}
defer ioctx.Destroy()
ioctx.SetNamespace(fsutil.RadosNamespace)
var (
backingSnapID = volOptions.BackingSnapshotID
ioctxW = radoswrapper.NewIOContext(ioctx)
)
created, err := reftracker.Add(
ioctxW,
fmtBackingSnapshotReftrackerName(backingSnapID),
map[string]struct{}{
backingSnapID: {},
volOptions.VolID: {},
},
)
if err != nil {
log.ErrorLog(ctx, "failed to add refs for backing snapshot %s: %v",
backingSnapID, err)
return err
}
defer func() {
if err == nil {
return
}
// Clean up after failure.
var deleted bool
deleted, err = reftracker.Remove(
ioctxW,
fmtBackingSnapshotReftrackerName(backingSnapID),
map[string]reftype.RefType{
backingSnapID: reftype.Normal,
volOptions.VolID: reftype.Normal,
},
)
if err != nil {
log.ErrorLog(ctx, "failed to remove refs in cleanup procedure for backing snapshot %s: %v",
backingSnapID, err)
}
if created && !deleted {
log.ErrorLog(ctx, "orphaned reftracker object %s (pool %s, namespace %s)",
backingSnapID, volOptions.MetadataPool, fsutil.RadosNamespace)
}
}()
// There may have been a race between adding a ref to the reftracker and
// deleting the backing snapshot. Make sure the snapshot still exists by
// trying to retrieve it again.
_, _, _, err = NewSnapshotOptionsFromID(ctx, volOptions.BackingSnapshotID, volOptions.conn.Creds)
if err != nil {
log.ErrorLog(ctx, "failed to get backing snapshot %s: %v", volOptions.BackingSnapshotID, err)
}
return err
}
func UnrefSnapshotBackedVolume(
ctx context.Context,
volOptions *VolumeOptions,
) (bool, error) {
ioctx, err := volOptions.conn.GetIoctx(volOptions.MetadataPool)
if err != nil {
log.ErrorLog(ctx, "failed to create RADOS ioctx: %s", err)
return false, err
}
defer ioctx.Destroy()
ioctx.SetNamespace(fsutil.RadosNamespace)
var (
backingSnapID = volOptions.BackingSnapshotID
ioctxW = radoswrapper.NewIOContext(ioctx)
)
deleted, err := reftracker.Remove(
ioctxW,
fmtBackingSnapshotReftrackerName(backingSnapID),
map[string]reftype.RefType{
volOptions.VolID: reftype.Normal,
},
)
if err != nil {
log.ErrorLog(ctx, "failed to remove refs for backing snapshot %s: %v",
backingSnapID, err)
return false, err
}
return deleted, err
}
// UnrefSelfInSnapshotBackedVolumes removes (masks) snapshot ID in the
// reftracker for volumes backed by this snapshot. The returned boolean
// value signals whether the snapshot is not referenced by any such volumes
// and needs to be removed.
func UnrefSelfInSnapshotBackedVolumes(
ctx context.Context,
snapParentVolOptions *VolumeOptions,
snapshotID string,
) (bool, error) {
ioctx, err := snapParentVolOptions.conn.GetIoctx(snapParentVolOptions.MetadataPool)
if err != nil {
log.ErrorLog(ctx, "failed to create RADOS ioctx: %s", err)
return false, err
}
defer ioctx.Destroy()
ioctx.SetNamespace(fsutil.RadosNamespace)
return reftracker.Remove(
radoswrapper.NewIOContext(ioctx),
fmtBackingSnapshotReftrackerName(snapshotID),
map[string]reftype.RefType{
snapshotID: reftype.Mask,
},
)
}

View File

@ -20,6 +20,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"path"
"strconv" "strconv"
"strings" "strings"
@ -34,27 +35,31 @@ import (
type VolumeOptions struct { type VolumeOptions struct {
core.SubVolume core.SubVolume
TopologyPools *[]util.TopologyConstrainedPool
TopologyRequirement *csi.TopologyRequirement RequestName string
Topology map[string]string NamePrefix string
RequestName string ClusterID string
NamePrefix string MetadataPool string
ClusterID string
FscID int64
MetadataPool string
// ReservedID represents the ID reserved for a subvolume // ReservedID represents the ID reserved for a subvolume
ReservedID string ReservedID string
Monitors string `json:"monitors"` Monitors string `json:"monitors"`
RootPath string `json:"rootPath"` RootPath string `json:"rootPath"`
Mounter string `json:"mounter"` Mounter string `json:"mounter"`
ProvisionVolume bool `json:"provisionVolume"` BackingSnapshotRoot string // Snapshot root relative to RootPath.
KernelMountOptions string `json:"kernelMountOptions"` BackingSnapshotID string
FuseMountOptions string `json:"fuseMountOptions"` KernelMountOptions string `json:"kernelMountOptions"`
// Network namespace file path to execute nsenter command FuseMountOptions string `json:"fuseMountOptions"`
NetNamespaceFilePath string NetNamespaceFilePath string
TopologyPools *[]util.TopologyConstrainedPool
TopologyRequirement *csi.TopologyRequirement
Topology map[string]string
FscID int64
// conn is a connection to the Ceph cluster obtained from a ConnPool // conn is a connection to the Ceph cluster obtained from a ConnPool
conn *util.ClusterConnection conn *util.ClusterConnection
ProvisionVolume bool `json:"provisionVolume"`
BackingSnapshot bool `json:"backingSnapshot"`
} }
// Connect a CephFS volume to the Ceph cluster. // Connect a CephFS volume to the Ceph cluster.
@ -184,14 +189,20 @@ func (vo *VolumeOptions) GetConnection() *util.ClusterConnection {
return vo.conn return vo.conn
} }
func fmtBackingSnapshotOptionMismatch(optName, expected, actual string) error {
return fmt.Errorf("%s option mismatch with backing snapshot: got %s, expected %s",
optName, actual, expected)
}
// NewVolumeOptions generates a new instance of volumeOptions from the provided // NewVolumeOptions generates a new instance of volumeOptions from the provided
// CSI request parameters. // CSI request parameters.
func NewVolumeOptions(ctx context.Context, requestName string, req *csi.CreateVolumeRequest, func NewVolumeOptions(ctx context.Context, requestName string, req *csi.CreateVolumeRequest,
cr *util.Credentials, cr *util.Credentials,
) (*VolumeOptions, error) { ) (*VolumeOptions, error) {
var ( var (
opts VolumeOptions opts VolumeOptions
err error backingSnapshotBool string
err error
) )
volOptions := req.GetParameters() volOptions := req.GetParameters()
@ -228,6 +239,16 @@ func NewVolumeOptions(ctx context.Context, requestName string, req *csi.CreateVo
return nil, err return nil, err
} }
if err = extractOptionalOption(&backingSnapshotBool, "backingSnapshot", volOptions); err != nil {
return nil, err
}
if backingSnapshotBool != "" {
if opts.BackingSnapshot, err = strconv.ParseBool(backingSnapshotBool); err != nil {
return nil, fmt.Errorf("failed to parse backingSnapshot: %w", err)
}
}
opts.RequestName = requestName opts.RequestName = requestName
err = opts.Connect(cr) err = opts.Connect(cr)
@ -261,6 +282,19 @@ func NewVolumeOptions(ctx context.Context, requestName string, req *csi.CreateVo
opts.ProvisionVolume = true opts.ProvisionVolume = true
if opts.BackingSnapshot {
if req.GetVolumeContentSource() == nil || req.GetVolumeContentSource().GetSnapshot() == nil {
return nil, errors.New("backingSnapshot option requires snapshot volume source")
}
opts.BackingSnapshotID = req.GetVolumeContentSource().GetSnapshot().GetSnapshotId()
err = opts.populateVolumeOptionsFromBackingSnapshot(ctx, cr)
if err != nil {
return nil, err
}
}
return &opts, nil return &opts, nil
} }
@ -364,23 +398,109 @@ func NewVolumeOptionsFromVolID(
} }
} }
volOptions.ProvisionVolume = true if imageAttributes.BackingSnapshotID != "" || volOptions.BackingSnapshotID != "" {
volOptions.SubVolume.VolID = vid.FsSubvolName volOptions.BackingSnapshot = true
vol := core.NewSubVolume(volOptions.conn, &volOptions.SubVolume, volOptions.ClusterID) volOptions.BackingSnapshotID = imageAttributes.BackingSnapshotID
info, err := vol.GetSubVolumeInfo(ctx)
if err == nil {
volOptions.RootPath = info.Path
volOptions.Features = info.Features
volOptions.Size = info.BytesQuota
} }
if errors.Is(err, cerrors.ErrInvalidCommand) { volOptions.ProvisionVolume = true
volOptions.RootPath, err = vol.GetVolumeRootPathCeph(ctx) volOptions.SubVolume.VolID = vid.FsSubvolName
if volOptions.BackingSnapshot {
err = volOptions.populateVolumeOptionsFromBackingSnapshot(ctx, cr)
} else {
err = volOptions.populateVolumeOptionsFromSubvolume(ctx)
} }
return &volOptions, &vid, err return &volOptions, &vid, err
} }
func (vo *VolumeOptions) populateVolumeOptionsFromSubvolume(ctx context.Context) error {
vol := core.NewSubVolume(vo.conn, &vo.SubVolume, vo.ClusterID)
var info *core.Subvolume
info, err := vol.GetSubVolumeInfo(ctx)
if err == nil {
vo.RootPath = info.Path
vo.Features = info.Features
vo.Size = info.BytesQuota
}
if errors.Is(err, cerrors.ErrInvalidCommand) {
vo.RootPath, err = vol.GetVolumeRootPathCeph(ctx)
}
return err
}
func (vo *VolumeOptions) populateVolumeOptionsFromBackingSnapshot(
ctx context.Context,
cr *util.Credentials,
) error {
// As of CephFS snapshot v2 API, snapshots may be found in two locations:
//
// (a) /volumes/<volume group>/<subvolume>/.snap/<snapshot>/<UUID>
// (b) /volumes/<volume group>/<subvolume>/<UUID>/.snap/_<snapshot>_<snapshot inode number>
if !vo.ProvisionVolume {
// Case (b)
//
// If the volume is not provisioned by us, we assume that we have access only
// to snapshot's parent volume root. In this case, o.RootPath is expected to
// be already set in the volume context.
// BackingSnapshotRoot cannot be determined at this stage, because the
// full directory name is not known (see snapshot path format for case
// (b) above). RootPath/.snap must be traversed in order to find out
// the snapshot directory name.
return nil
}
parentBackingSnapVolOpts, _, snapID, err := NewSnapshotOptionsFromID(ctx, vo.BackingSnapshotID, cr)
if err != nil {
return fmt.Errorf("failed to retrieve backing snapshot %s: %w", vo.BackingSnapshotID, err)
}
// Ensure that backing snapshot parent's volume options match the context.
// Snapshot-backed volume inherits all its parent's (parent of the snapshot) options.
if vo.ClusterID != parentBackingSnapVolOpts.ClusterID {
return fmtBackingSnapshotOptionMismatch("clusterID", vo.ClusterID, parentBackingSnapVolOpts.ClusterID)
}
if vo.Pool != "" {
return errors.New("cannot set pool for snapshot-backed volume")
}
if vo.MetadataPool != parentBackingSnapVolOpts.MetadataPool {
return fmtBackingSnapshotOptionMismatch("MetadataPool", vo.MetadataPool, parentBackingSnapVolOpts.MetadataPool)
}
if vo.FsName != parentBackingSnapVolOpts.FsName {
return fmtBackingSnapshotOptionMismatch("fsName", vo.FsName, parentBackingSnapVolOpts.FsName)
}
if vo.SubvolumeGroup != parentBackingSnapVolOpts.SubvolumeGroup {
return fmtBackingSnapshotOptionMismatch("SubvolumeGroup", vo.SubvolumeGroup, parentBackingSnapVolOpts.SubvolumeGroup)
}
vo.Features = parentBackingSnapVolOpts.Features
vo.Size = parentBackingSnapVolOpts.Size
// For case (a) (o.ProvisionVolume==true is assumed), snapshot root path
// can be built out of subvolume root path, which is in following format:
//
// /volumes/<volume group>/<subvolume>/<subvolume UUID>
subvolRoot, subvolUUID := path.Split(parentBackingSnapVolOpts.RootPath)
vo.RootPath = subvolRoot
vo.BackingSnapshotRoot = path.Join(".snap", snapID.FsSnapshotName, subvolUUID)
return nil
}
// NewVolumeOptionsFromMonitorList generates a new instance of VolumeOptions and // NewVolumeOptionsFromMonitorList generates a new instance of VolumeOptions and
// VolumeIdentifier from the provided CSI volume context. // VolumeIdentifier from the provided CSI volume context.
func NewVolumeOptionsFromMonitorList( func NewVolumeOptionsFromMonitorList(
@ -438,9 +558,17 @@ func NewVolumeOptionsFromMonitorList(
return nil, nil, err return nil, nil, err
} }
if err = extractOptionalOption(&opts.BackingSnapshotID, "backingSnapshotID", options); err != nil {
return nil, nil, err
}
vid.FsSubvolName = volID vid.FsSubvolName = volID
vid.VolumeID = volID vid.VolumeID = volID
if opts.BackingSnapshotID != "" {
opts.BackingSnapshot = true
}
return &opts, &vid, nil return &opts, &vid, nil
} }
@ -511,6 +639,10 @@ func NewVolumeOptionsFromStaticVolume(
vid.FsSubvolName = opts.RootPath vid.FsSubvolName = opts.RootPath
vid.VolumeID = volID vid.VolumeID = volID
if opts.BackingSnapshotID != "" {
opts.BackingSnapshot = true
}
return &opts, &vid, nil return &opts, &vid, nil
} }
@ -599,6 +731,7 @@ func NewSnapshotOptionsFromID(
} }
volOptions.Features = subvolInfo.Features volOptions.Features = subvolInfo.Features
volOptions.Size = subvolInfo.BytesQuota volOptions.Size = subvolInfo.BytesQuota
volOptions.RootPath = subvolInfo.Path
snap := core.NewSnapshot(volOptions.conn, sid.FsSnapshotName, &volOptions.SubVolume) snap := core.NewSnapshot(volOptions.conn, sid.FsSnapshotName, &volOptions.SubVolume)
info, err := snap.GetSnapshotInfo(ctx) info, err := snap.GetSnapshotInfo(ctx)
if err != nil { if err != nil {

View File

@ -152,7 +152,7 @@ type Config struct {
// ownerKey is used to identify the owner of the volume, can be used with some KMS configurations // ownerKey is used to identify the owner of the volume, can be used with some KMS configurations
ownerKey string ownerKey string
// backingSnapshotIDKey is snapshot ID on which a shallow CephFS volume is based. // backingSnapshotIDKey ID of the snapshot on which the CephFS snapshot-backed volume is based
backingSnapshotIDKey string backingSnapshotIDKey string
// commonPrefix is the prefix common to all omap keys for this Config // commonPrefix is the prefix common to all omap keys for this Config
@ -532,7 +532,7 @@ Input arguments:
- kmsConf: Name of the key management service used to encrypt the image (optional) - kmsConf: Name of the key management service used to encrypt the image (optional)
- volUUID: UUID need to be reserved instead of auto-generating one (this is useful for mirroring and metro-DR) - volUUID: UUID need to be reserved instead of auto-generating one (this is useful for mirroring and metro-DR)
- owner: the owner of the volume (optional) - owner: the owner of the volume (optional)
- backingSnapshotID: (optional) - backingSnapshotID: ID of the snapshot on which the CephFS snapshot-backed volume is based (optional)
Return values: Return values:
- string: Contains the UUID that was reserved for the passed in reqName - string: Contains the UUID that was reserved for the passed in reqName
@ -645,7 +645,7 @@ func (conn *Connection) ReserveName(ctx context.Context,
omapValues[cj.cephSnapSourceKey] = parentName omapValues[cj.cephSnapSourceKey] = parentName
} }
// Update backing snapshot ID for shallow CephFS volume // Update backing snapshot ID for snapshot-backed CephFS volume
if backingSnapshotID != "" { if backingSnapshotID != "" {
omapValues[cj.backingSnapshotIDKey] = backingSnapshotID omapValues[cj.backingSnapshotIDKey] = backingSnapshotID
} }
@ -667,7 +667,7 @@ type ImageAttributes struct {
Owner string // Contains the owner to be used in combination with KmsID (for some KMS) Owner string // Contains the owner to be used in combination with KmsID (for some KMS)
ImageID string // Contains the image id ImageID string // Contains the image id
JournalPoolID int64 // Pool ID of the CSI journal pool, stored in big endian format (on-disk data) JournalPoolID int64 // Pool ID of the CSI journal pool, stored in big endian format (on-disk data)
BackingSnapshotID string // ID of the backing snapshot of a shallow CephFS volume BackingSnapshotID string // ID of the snapshot on which the CephFS snapshot-backed volume is based
} }
// GetImageAttributes fetches all keys and their values, from a UUID directory, returning ImageAttributes structure. // GetImageAttributes fetches all keys and their values, from a UUID directory, returning ImageAttributes structure.