diff --git a/internal/cephfs/controllerserver.go b/internal/cephfs/controllerserver.go index 2f28ba0df..f638f3ef6 100644 --- a/internal/cephfs/controllerserver.go +++ b/internal/cephfs/controllerserver.go @@ -29,6 +29,7 @@ import ( "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/k8s" "github.com/ceph/ceph-csi/internal/util/log" + rterrors "github.com/ceph/ceph-csi/internal/util/reftracker/errors" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/golang/protobuf/ptypes/timestamp" @@ -65,47 +66,77 @@ func (cs *ControllerServer) createBackingVolume( volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) if sID != nil { - if err = cs.OperationLocks.GetRestoreLock(sID.SnapshotID); err != nil { - log.ErrorLog(ctx, err.Error()) - - return status.Error(codes.Aborted, err.Error()) - } - defer cs.OperationLocks.ReleaseRestoreLock(sID.SnapshotID) - snap := core.Snapshot{ - SnapshotID: sID.FsSnapshotName, - SubVolume: &parentVolOpt.SubVolume, - } - - err = volClient.CreateCloneFromSnapshot(ctx, snap) - if err != nil { - log.ErrorLog(ctx, "failed to create clone from snapshot %s: %v", sID.FsSnapshotName, err) - - return err - } - - return err + return cs.createBackingVolumeFromSnapshotSource(ctx, volOptions, parentVolOpt, volClient, sID) } - if parentVolOpt != nil { - if err = cs.OperationLocks.GetCloneLock(pvID.VolumeID); err != nil { - log.ErrorLog(ctx, err.Error()) - return status.Error(codes.Aborted, err.Error()) - } - defer cs.OperationLocks.ReleaseCloneLock(pvID.VolumeID) - err = volClient.CreateCloneFromSubvolume( - ctx, &parentVolOpt.SubVolume) - if err != nil { - log.ErrorLog(ctx, "failed to create clone from subvolume %s: %v", fsutil.VolumeID(pvID.FsSubvolName), err) + if parentVolOpt != nil { + return cs.createBackingVolumeFromVolumeSource(ctx, parentVolOpt, volClient, pvID) + } + + if err = volClient.CreateVolume(ctx); err != nil { + log.ErrorLog(ctx, "failed to create volume %s: %v", volOptions.RequestName, err) + + return status.Error(codes.Internal, err.Error()) + } + + return nil +} + +func (cs *ControllerServer) createBackingVolumeFromSnapshotSource( + ctx context.Context, + volOptions *store.VolumeOptions, + parentVolOpt *store.VolumeOptions, + volClient core.SubVolumeClient, + sID *store.SnapshotIdentifier, +) error { + if err := cs.OperationLocks.GetRestoreLock(sID.SnapshotID); err != nil { + log.ErrorLog(ctx, err.Error()) + + return status.Error(codes.Aborted, err.Error()) + } + defer cs.OperationLocks.ReleaseRestoreLock(sID.SnapshotID) + + if volOptions.BackingSnapshot { + if err := store.AddSnapshotBackedVolumeRef(ctx, volOptions); err != nil { + log.ErrorLog(ctx, "failed to create snapshot-backed volume from snapshot %s: %v", + sID.FsSnapshotName, err) return err } return nil } - if err = volClient.CreateVolume(ctx); err != nil { - log.ErrorLog(ctx, "failed to create volume %s: %v", volOptions.RequestName, err) - return status.Error(codes.Internal, err.Error()) + err := volClient.CreateCloneFromSnapshot(ctx, core.Snapshot{ + SnapshotID: sID.FsSnapshotName, + SubVolume: &parentVolOpt.SubVolume, + }) + if err != nil { + log.ErrorLog(ctx, "failed to create clone from snapshot %s: %v", sID.FsSnapshotName, err) + + return err + } + + return nil +} + +func (cs *ControllerServer) createBackingVolumeFromVolumeSource( + ctx context.Context, + parentVolOpt *store.VolumeOptions, + volClient core.SubVolumeClient, + pvID *store.VolumeIdentifier, +) error { + if err := cs.OperationLocks.GetCloneLock(pvID.VolumeID); err != nil { + log.ErrorLog(ctx, err.Error()) + + return status.Error(codes.Aborted, err.Error()) + } + defer cs.OperationLocks.ReleaseCloneLock(pvID.VolumeID) + + if err := volClient.CreateCloneFromSubvolume(ctx, &parentVolOpt.SubVolume); err != nil { + log.ErrorLog(ctx, "failed to create clone from subvolume %s: %v", fsutil.VolumeID(pvID.FsSubvolName), err) + + return err } return nil @@ -158,6 +189,7 @@ func checkValidCreateVolumeRequest( parentVol *store.VolumeOptions, pvID *store.VolumeIdentifier, sID *store.SnapshotIdentifier, + req *csi.CreateVolumeRequest, ) error { switch { case pvID != nil: @@ -168,6 +200,10 @@ func checkValidCreateVolumeRequest( parentVol.Size, vol.Size) } + + if vol.BackingSnapshot { + return errors.New("cloning snapshot-backed volumes is currently not supported") + } case sID != nil: if vol.Size < parentVol.Size { return fmt.Errorf( @@ -176,6 +212,25 @@ func checkValidCreateVolumeRequest( parentVol.Size, vol.Size) } + + if vol.BackingSnapshot { + if vol.Size != parentVol.Size { + return fmt.Errorf( + "cannot create snapshot-backed volume of different size: expected %d bytes, got %d bytes", + parentVol.Size, + vol.Size, + ) + } + + volCaps := req.GetVolumeCapabilities() + for _, volCap := range volCaps { + mode := volCap.AccessMode.Mode + if mode != csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY && + mode != csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY { + return errors.New("backingSnapshot may be used only with read-only access modes") + } + } + } } return nil @@ -233,7 +288,7 @@ func (cs *ControllerServer) CreateVolume( defer parentVol.Destroy() } - err = checkValidCreateVolumeRequest(volOptions, parentVol, pvID, sID) + err = checkValidCreateVolumeRequest(volOptions, parentVol, pvID, sID, req) if err != nil { return nil, status.Error(codes.InvalidArgument, err.Error()) } @@ -249,7 +304,7 @@ func (cs *ControllerServer) CreateVolume( // TODO return error message if requested vol size greater than found volume return error if vID != nil { - if sID != nil || pvID != nil { + if sID != nil || pvID != nil && !volOptions.BackingSnapshot { volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) err = volClient.ExpandVolume(ctx, volOptions.Size) if err != nil { @@ -321,26 +376,32 @@ func (cs *ControllerServer) CreateVolume( return nil, err } - volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) - volOptions.RootPath, err = volClient.GetVolumeRootPathCeph(ctx) - if err != nil { - purgeErr := volClient.PurgeVolume(ctx, true) - if purgeErr != nil { - log.ErrorLog(ctx, "failed to delete volume %s: %v", vID.FsSubvolName, purgeErr) - // All errors other than ErrVolumeNotFound should return an error back to the caller - if !errors.Is(purgeErr, cerrors.ErrVolumeNotFound) { - // If the subvolume deletion is failed, we should not cleanup - // the OMAP entry it will stale subvolume in cluster. - // set err=nil so that when we get the request again we can get - // the subvolume info. - err = nil + if !volOptions.BackingSnapshot { + // Get root path for the created subvolume. + // Note that root path for snapshot-backed volumes has been already set when + // building VolumeOptions. - return nil, status.Error(codes.Internal, purgeErr.Error()) + volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) + volOptions.RootPath, err = volClient.GetVolumeRootPathCeph(ctx) + if err != nil { + purgeErr := volClient.PurgeVolume(ctx, true) + if purgeErr != nil { + log.ErrorLog(ctx, "failed to delete volume %s: %v", vID.FsSubvolName, purgeErr) + // All errors other than ErrVolumeNotFound should return an error back to the caller + if !errors.Is(purgeErr, cerrors.ErrVolumeNotFound) { + // If the subvolume deletion is failed, we should not cleanup + // the OMAP entry it will stale subvolume in cluster. + // set err=nil so that when we get the request again we can get + // the subvolume info. + err = nil + + return nil, status.Error(codes.Internal, purgeErr.Error()) + } } - } - log.ErrorLog(ctx, "failed to get subvolume path %s: %v", vID.FsSubvolName, err) + log.ErrorLog(ctx, "failed to get subvolume path %s: %v", vID.FsSubvolName, err) - return nil, status.Error(codes.Internal, err.Error()) + return nil, status.Error(codes.Internal, err.Error()) + } } log.DebugLog(ctx, "cephfs: successfully created backing volume named %s for request name %s", @@ -452,16 +513,8 @@ func (cs *ControllerServer) DeleteVolume( } defer cr.DeleteCredentials() - volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) - if err = volClient.PurgeVolume(ctx, false); err != nil { - log.ErrorLog(ctx, "failed to delete volume %s: %v", volID, err) - if errors.Is(err, cerrors.ErrVolumeHasSnapshots) { - return nil, status.Error(codes.FailedPrecondition, err.Error()) - } - - if !errors.Is(err, cerrors.ErrVolumeNotFound) { - return nil, status.Error(codes.Internal, err.Error()) - } + if err := cleanUpBackingVolume(ctx, volOptions, vID, cr); err != nil { + return nil, err } if err := store.UndoVolReservation(ctx, volOptions, *vID, secrets); err != nil { @@ -473,6 +526,84 @@ func (cs *ControllerServer) DeleteVolume( return &csi.DeleteVolumeResponse{}, nil } +func cleanUpBackingVolume( + ctx context.Context, + volOptions *store.VolumeOptions, + volID *store.VolumeIdentifier, + cr *util.Credentials, +) error { + if !volOptions.BackingSnapshot { + // Regular volumes need to be purged. + + volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) + if err := volClient.PurgeVolume(ctx, false); err != nil { + log.ErrorLog(ctx, "failed to delete volume %s: %v", volID, err) + if errors.Is(err, cerrors.ErrVolumeHasSnapshots) { + return status.Error(codes.FailedPrecondition, err.Error()) + } + + if !errors.Is(err, cerrors.ErrVolumeNotFound) { + return status.Error(codes.Internal, err.Error()) + } + } + + return nil + } + + // Snapshot-backed volumes need to un-reference the backing snapshot, and + // the snapshot itself may need to be deleted if its reftracker doesn't + // hold any references anymore. + + backingSnapNeedsDelete, err := store.UnrefSnapshotBackedVolume(ctx, volOptions) + if err != nil { + if errors.Is(err, rterrors.ErrObjectOutOfDate) { + return status.Error(codes.Aborted, err.Error()) + } + + return status.Error(codes.Internal, err.Error()) + } + + if !backingSnapNeedsDelete { + return nil + } + + snapParentVolOptions, _, snapID, err := store.NewSnapshotOptionsFromID(ctx, volOptions.BackingSnapshotID, cr) + if err != nil { + absorbErrs := []error{ + util.ErrPoolNotFound, + util.ErrKeyNotFound, + cerrors.ErrSnapNotFound, + cerrors.ErrVolumeNotFound, + } + + fatalErr := true + for i := range absorbErrs { + if errors.Is(err, absorbErrs[i]) { + fatalErr = false + + break + } + } + + if fatalErr { + return status.Error(codes.Internal, err.Error()) + } + } else { + snapClient := core.NewSnapshot( + snapParentVolOptions.GetConnection(), + snapID.FsSnapshotName, + &snapParentVolOptions.SubVolume, + ) + + err = deleteSnapshotAndUndoReservation(ctx, snapClient, snapParentVolOptions, snapID, cr) + if err != nil { + return status.Error(codes.Internal, err.Error()) + } + } + + return nil +} + // ValidateVolumeCapabilities checks whether the volume capabilities requested // are supported. func (cs *ControllerServer) ValidateVolumeCapabilities( @@ -537,6 +668,10 @@ func (cs *ControllerServer) ControllerExpandVolume( } defer volOptions.Destroy() + if volOptions.BackingSnapshot { + return nil, status.Error(codes.InvalidArgument, "cannot expand snapshot-backed volume") + } + RoundOffSize := util.RoundOffBytes(req.GetCapacityRange().GetRequiredBytes()) volClient := core.NewSubVolume(volOptions.GetConnection(), &volOptions.SubVolume, volOptions.ClusterID) if err = volClient.ResizeVolume(ctx, RoundOffSize); err != nil { @@ -615,6 +750,10 @@ func (cs *ControllerServer) CreateSnapshot( parentVolOptions.ClusterID) } + if parentVolOptions.BackingSnapshot { + return nil, status.Error(codes.InvalidArgument, "cannot snapshot a snapshot-backed volume") + } + cephfsSnap, genSnapErr := store.GenSnapFromOptions(ctx, req) if genSnapErr != nil { return nil, status.Error(codes.Internal, genSnapErr.Error()) @@ -780,6 +919,7 @@ func (cs *ControllerServer) validateSnapshotReq(ctx context.Context, req *csi.Cr // DeleteSnapshot deletes the snapshot in backend and removes the // snapshot metadata from store. +// nolint:gocyclo,cyclop // TODO: reduce complexity func (cs *ControllerServer) DeleteSnapshot( ctx context.Context, req *csi.DeleteSnapshotRequest, @@ -878,17 +1018,51 @@ func (cs *ControllerServer) DeleteSnapshot( return nil, status.Error(codes.Internal, err.Error()) } } - err = snapClient.DeleteSnapshot(ctx) + + needsDelete, err := store.UnrefSelfInSnapshotBackedVolumes(ctx, volOpt, sid.SnapshotID) if err != nil { - return nil, status.Error(codes.Internal, err.Error()) - } - err = store.UndoSnapReservation(ctx, volOpt, *sid, sid.RequestName, cr) - if err != nil { - log.ErrorLog(ctx, "failed to remove reservation for snapname (%s) with backing snap (%s) (%s)", - sid.RequestName, sid.FsSnapshotName, err) + if errors.Is(err, rterrors.ErrObjectOutOfDate) { + return nil, status.Error(codes.Aborted, err.Error()) + } return nil, status.Error(codes.Internal, err.Error()) } + if needsDelete { + err = deleteSnapshotAndUndoReservation( + ctx, + snapClient, + volOpt, + sid, + cr, + ) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + } + return &csi.DeleteSnapshotResponse{}, nil } + +func deleteSnapshotAndUndoReservation( + ctx context.Context, + snapClient core.SnapshotClient, + parentVolOptions *store.VolumeOptions, + snapID *store.SnapshotIdentifier, + cr *util.Credentials, +) error { + err := snapClient.DeleteSnapshot(ctx) + if err != nil { + return err + } + + err = store.UndoSnapReservation(ctx, parentVolOptions, *snapID, snapID.RequestName, cr) + if err != nil { + log.ErrorLog(ctx, "failed to remove reservation for snapname (%s) with backing snap (%s) (%s)", + snapID.RequestName, snapID.RequestName, err) + + return err + } + + return nil +} diff --git a/internal/cephfs/fuserecovery.go b/internal/cephfs/fuserecovery.go index 32018a328..e7f25aa6a 100644 --- a/internal/cephfs/fuserecovery.go +++ b/internal/cephfs/fuserecovery.go @@ -192,7 +192,7 @@ func (ns *NodeServer) tryRestoreFuseMountsInNodePublish( // Unmount and mount the volume. if stagingTargetMs != msMounted { - if err := mounter.UnmountVolume(ctx, stagingTargetPath); err != nil { + if err := mounter.UnmountAll(ctx, stagingTargetPath); err != nil { return err } @@ -269,5 +269,5 @@ func (ns *NodeServer) tryRestoreFuseMountInNodeStage( // Restoration here means only unmounting the volume. // NodeStageVolume should take care of the rest. - return mounter.UnmountVolume(ctx, stagingTargetPath) + return mounter.UnmountAll(ctx, stagingTargetPath) } diff --git a/internal/cephfs/mounter/fuse.go b/internal/cephfs/mounter/fuse.go index fdfee6eae..606976ec9 100644 --- a/internal/cephfs/mounter/fuse.go +++ b/internal/cephfs/mounter/fuse.go @@ -118,8 +118,8 @@ func (m *FuseMounter) Mount( func (m *FuseMounter) Name() string { return "Ceph FUSE driver" } -func UnmountVolume(ctx context.Context, mountPoint string) error { - if _, stderr, err := util.ExecCommand(ctx, "umount", mountPoint); err != nil { +func UnmountVolume(ctx context.Context, mountPoint string, opts ...string) error { + if _, stderr, err := util.ExecCommand(ctx, "umount", append([]string{mountPoint}, opts...)...); err != nil { err = fmt.Errorf("%w stderr: %s", err, stderr) if strings.Contains(err.Error(), fmt.Sprintf("umount: %s: not mounted", mountPoint)) || strings.Contains(err.Error(), "No such file or directory") { @@ -149,3 +149,7 @@ func UnmountVolume(ctx context.Context, mountPoint string) error { return nil } + +func UnmountAll(ctx context.Context, mountPoint string) error { + return UnmountVolume(ctx, mountPoint, "--all-targets") +} diff --git a/internal/cephfs/nodeserver.go b/internal/cephfs/nodeserver.go index 2c675aa18..c6a2eb691 100644 --- a/internal/cephfs/nodeserver.go +++ b/internal/cephfs/nodeserver.go @@ -21,6 +21,7 @@ import ( "errors" "fmt" "os" + "path" "strings" cerrors "github.com/ceph/ceph-csi/internal/cephfs/errors" @@ -101,6 +102,20 @@ func (ns *NodeServer) getVolumeOptions( return volOptions, nil } +func validateSnapshotBackedVolCapability(volCap *csi.VolumeCapability) error { + // Snapshot-backed volumes may be used with read-only volume access modes only. + + mode := volCap.AccessMode.Mode + + if mode != csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY && + mode != csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY { + return status.Error(codes.InvalidArgument, + "snapshot-backed volume supports only read-only access mode") + } + + return nil +} + // NodeStageVolume mounts the volume to a staging path on the node. func (ns *NodeServer) NodeStageVolume( ctx context.Context, @@ -140,6 +155,12 @@ func (ns *NodeServer) NodeStageVolume( } } + if volOptions.BackingSnapshot { + if err = validateSnapshotBackedVolCapability(req.GetVolumeCapability()); err != nil { + return nil, err + } + } + mnt, err := mounter.New(volOptions) if err != nil { log.ErrorLog(ctx, "failed to create mounter for volume %s: %v", volID, err) @@ -192,7 +213,7 @@ func (ns *NodeServer) NodeStageVolume( log.ErrorLog(ctx, "cephfs: failed to write NodeStageMountinfo for volume %s: %v", volID, err) // Try to clean node stage mount. - if unmountErr := mounter.UnmountVolume(ctx, stagingTargetPath); unmountErr != nil { + if unmountErr := mounter.UnmountAll(ctx, stagingTargetPath); unmountErr != nil { log.ErrorLog(ctx, "cephfs: failed to unmount %s in WriteNodeStageMountinfo clean up: %v", stagingTargetPath, unmountErr) } @@ -223,7 +244,7 @@ func (*NodeServer) mount( log.DebugLog(ctx, "cephfs: mounting volume %s with %s", volID, mnt.Name()) - readOnly := "ro" + const readOnly = "ro" if volCap.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY || volCap.AccessMode.Mode == csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY { @@ -248,9 +269,112 @@ func (*NodeServer) mount( return status.Error(codes.Internal, err.Error()) } + defer func() { + if err == nil { + return + } + + unmountErr := mounter.UnmountAll(ctx, stagingTargetPath) + if unmountErr != nil { + log.ErrorLog(ctx, "failed to clean up mounts in rollback procedure: %v", unmountErr) + } + }() + + if volOptions.BackingSnapshot { + snapshotRoot, err := getBackingSnapshotRoot(ctx, volOptions, stagingTargetPath) + if err != nil { + return err + } + + absoluteSnapshotRoot := path.Join(stagingTargetPath, snapshotRoot) + err = mounter.BindMount( + ctx, + absoluteSnapshotRoot, + stagingTargetPath, + true, + []string{"bind", "_netdev"}, + ) + + if err != nil { + log.ErrorLog(ctx, + "failed to bind mount snapshot root %s: %v", absoluteSnapshotRoot, err) + + return status.Error(codes.Internal, err.Error()) + } + } + return nil } +func getBackingSnapshotRoot( + ctx context.Context, + volOptions *store.VolumeOptions, + stagingTargetPath string, +) (string, error) { + if volOptions.ProvisionVolume { + // Provisioned snapshot-backed volumes should have their BackingSnapshotRoot + // already populated. + return volOptions.BackingSnapshotRoot, nil + } + + // Pre-provisioned snapshot-backed volumes are more involved: + // + // Snapshots created with `ceph fs subvolume snapshot create` have following + // snap directory name format inside /.snap: + // + // __ + // + // We don't know what is, and so /.snap + // needs to be traversed in order to determine the full snapshot directory name. + + snapshotsBase := path.Join(stagingTargetPath, ".snap") + + dir, err := os.Open(snapshotsBase) + if err != nil { + log.ErrorLog(ctx, "failed to open %s when searching for snapshot root: %v", snapshotsBase, err) + + return "", status.Errorf(codes.Internal, err.Error()) + } + + // Read the contents of /.snap directory into a string slice. + + contents, err := dir.Readdirnames(0) + if err != nil { + log.ErrorLog(ctx, "failed to read %s when searching for snapshot root: %v", snapshotsBase, err) + + return "", status.Errorf(codes.Internal, err.Error()) + } + + var ( + found bool + snapshotDirName string + ) + + // Look through the directory's contents and try to find the correct snapshot + // dir name. The search must be exhaustive to catch possible ambiguous results. + + for i := range contents { + if !strings.Contains(contents[i], volOptions.BackingSnapshotID) { + continue + } + + if !found { + found = true + snapshotDirName = contents[i] + } else { + return "", status.Errorf(codes.InvalidArgument, "ambiguous backingSnapshotID %s in %s", + volOptions.BackingSnapshotID, snapshotsBase) + } + } + + if !found { + return "", status.Errorf(codes.InvalidArgument, "no snapshot with backingSnapshotID %s found in %s", + volOptions.BackingSnapshotID, snapshotsBase) + } + + return path.Join(".snap", snapshotDirName), nil +} + // NodePublishVolume mounts the volume mounted to the staging path to the target // path. func (ns *NodeServer) NodePublishVolume( @@ -444,7 +568,7 @@ func (ns *NodeServer) NodeUnstageVolume( return &csi.NodeUnstageVolumeResponse{}, nil } // Unmount the volume - if err = mounter.UnmountVolume(ctx, stagingTargetPath); err != nil { + if err = mounter.UnmountAll(ctx, stagingTargetPath); err != nil { return nil, status.Error(codes.Internal, err.Error()) } diff --git a/internal/cephfs/store/backingsnapshot.go b/internal/cephfs/store/backingsnapshot.go new file mode 100644 index 000000000..2c22839d3 --- /dev/null +++ b/internal/cephfs/store/backingsnapshot.go @@ -0,0 +1,168 @@ +/* +Copyright 2022 The Ceph-CSI Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package store + +import ( + "context" + "fmt" + + fsutil "github.com/ceph/ceph-csi/internal/cephfs/util" + "github.com/ceph/ceph-csi/internal/util/log" + "github.com/ceph/ceph-csi/internal/util/reftracker" + "github.com/ceph/ceph-csi/internal/util/reftracker/radoswrapper" + "github.com/ceph/ceph-csi/internal/util/reftracker/reftype" +) + +func fmtBackingSnapshotReftrackerName(backingSnapID string) string { + return fmt.Sprintf("rt-backingsnapshot-%s", backingSnapID) +} + +func AddSnapshotBackedVolumeRef( + ctx context.Context, + volOptions *VolumeOptions, +) error { + ioctx, err := volOptions.conn.GetIoctx(volOptions.MetadataPool) + if err != nil { + log.ErrorLog(ctx, "failed to create RADOS ioctx: %s", err) + + return err + } + defer ioctx.Destroy() + + ioctx.SetNamespace(fsutil.RadosNamespace) + + var ( + backingSnapID = volOptions.BackingSnapshotID + ioctxW = radoswrapper.NewIOContext(ioctx) + ) + + created, err := reftracker.Add( + ioctxW, + fmtBackingSnapshotReftrackerName(backingSnapID), + map[string]struct{}{ + backingSnapID: {}, + volOptions.VolID: {}, + }, + ) + if err != nil { + log.ErrorLog(ctx, "failed to add refs for backing snapshot %s: %v", + backingSnapID, err) + + return err + } + + defer func() { + if err == nil { + return + } + + // Clean up after failure. + + var deleted bool + deleted, err = reftracker.Remove( + ioctxW, + fmtBackingSnapshotReftrackerName(backingSnapID), + map[string]reftype.RefType{ + backingSnapID: reftype.Normal, + volOptions.VolID: reftype.Normal, + }, + ) + if err != nil { + log.ErrorLog(ctx, "failed to remove refs in cleanup procedure for backing snapshot %s: %v", + backingSnapID, err) + } + + if created && !deleted { + log.ErrorLog(ctx, "orphaned reftracker object %s (pool %s, namespace %s)", + backingSnapID, volOptions.MetadataPool, fsutil.RadosNamespace) + } + }() + + // There may have been a race between adding a ref to the reftracker and + // deleting the backing snapshot. Make sure the snapshot still exists by + // trying to retrieve it again. + _, _, _, err = NewSnapshotOptionsFromID(ctx, volOptions.BackingSnapshotID, volOptions.conn.Creds) + if err != nil { + log.ErrorLog(ctx, "failed to get backing snapshot %s: %v", volOptions.BackingSnapshotID, err) + } + + return err +} + +func UnrefSnapshotBackedVolume( + ctx context.Context, + volOptions *VolumeOptions, +) (bool, error) { + ioctx, err := volOptions.conn.GetIoctx(volOptions.MetadataPool) + if err != nil { + log.ErrorLog(ctx, "failed to create RADOS ioctx: %s", err) + + return false, err + } + defer ioctx.Destroy() + + ioctx.SetNamespace(fsutil.RadosNamespace) + + var ( + backingSnapID = volOptions.BackingSnapshotID + ioctxW = radoswrapper.NewIOContext(ioctx) + ) + + deleted, err := reftracker.Remove( + ioctxW, + fmtBackingSnapshotReftrackerName(backingSnapID), + map[string]reftype.RefType{ + volOptions.VolID: reftype.Normal, + }, + ) + if err != nil { + log.ErrorLog(ctx, "failed to remove refs for backing snapshot %s: %v", + backingSnapID, err) + + return false, err + } + + return deleted, err +} + +// UnrefSelfInSnapshotBackedVolumes removes (masks) snapshot ID in the +// reftracker for volumes backed by this snapshot. The returned boolean +// value signals whether the snapshot is not referenced by any such volumes +// and needs to be removed. +func UnrefSelfInSnapshotBackedVolumes( + ctx context.Context, + snapParentVolOptions *VolumeOptions, + snapshotID string, +) (bool, error) { + ioctx, err := snapParentVolOptions.conn.GetIoctx(snapParentVolOptions.MetadataPool) + if err != nil { + log.ErrorLog(ctx, "failed to create RADOS ioctx: %s", err) + + return false, err + } + defer ioctx.Destroy() + + ioctx.SetNamespace(fsutil.RadosNamespace) + + return reftracker.Remove( + radoswrapper.NewIOContext(ioctx), + fmtBackingSnapshotReftrackerName(snapshotID), + map[string]reftype.RefType{ + snapshotID: reftype.Mask, + }, + ) +} diff --git a/internal/cephfs/store/volumeoptions.go b/internal/cephfs/store/volumeoptions.go index e0380f149..b1765d2f1 100644 --- a/internal/cephfs/store/volumeoptions.go +++ b/internal/cephfs/store/volumeoptions.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "path" "strconv" "strings" @@ -34,27 +35,31 @@ import ( type VolumeOptions struct { core.SubVolume - TopologyPools *[]util.TopologyConstrainedPool - TopologyRequirement *csi.TopologyRequirement - Topology map[string]string - RequestName string - NamePrefix string - ClusterID string - FscID int64 - MetadataPool string + + RequestName string + NamePrefix string + ClusterID string + MetadataPool string // ReservedID represents the ID reserved for a subvolume - ReservedID string - Monitors string `json:"monitors"` - RootPath string `json:"rootPath"` - Mounter string `json:"mounter"` - ProvisionVolume bool `json:"provisionVolume"` - KernelMountOptions string `json:"kernelMountOptions"` - FuseMountOptions string `json:"fuseMountOptions"` - // Network namespace file path to execute nsenter command + ReservedID string + Monitors string `json:"monitors"` + RootPath string `json:"rootPath"` + Mounter string `json:"mounter"` + BackingSnapshotRoot string // Snapshot root relative to RootPath. + BackingSnapshotID string + KernelMountOptions string `json:"kernelMountOptions"` + FuseMountOptions string `json:"fuseMountOptions"` NetNamespaceFilePath string + TopologyPools *[]util.TopologyConstrainedPool + TopologyRequirement *csi.TopologyRequirement + Topology map[string]string + FscID int64 // conn is a connection to the Ceph cluster obtained from a ConnPool conn *util.ClusterConnection + + ProvisionVolume bool `json:"provisionVolume"` + BackingSnapshot bool `json:"backingSnapshot"` } // Connect a CephFS volume to the Ceph cluster. @@ -184,14 +189,20 @@ func (vo *VolumeOptions) GetConnection() *util.ClusterConnection { return vo.conn } +func fmtBackingSnapshotOptionMismatch(optName, expected, actual string) error { + return fmt.Errorf("%s option mismatch with backing snapshot: got %s, expected %s", + optName, actual, expected) +} + // NewVolumeOptions generates a new instance of volumeOptions from the provided // CSI request parameters. func NewVolumeOptions(ctx context.Context, requestName string, req *csi.CreateVolumeRequest, cr *util.Credentials, ) (*VolumeOptions, error) { var ( - opts VolumeOptions - err error + opts VolumeOptions + backingSnapshotBool string + err error ) volOptions := req.GetParameters() @@ -228,6 +239,16 @@ func NewVolumeOptions(ctx context.Context, requestName string, req *csi.CreateVo return nil, err } + if err = extractOptionalOption(&backingSnapshotBool, "backingSnapshot", volOptions); err != nil { + return nil, err + } + + if backingSnapshotBool != "" { + if opts.BackingSnapshot, err = strconv.ParseBool(backingSnapshotBool); err != nil { + return nil, fmt.Errorf("failed to parse backingSnapshot: %w", err) + } + } + opts.RequestName = requestName err = opts.Connect(cr) @@ -261,6 +282,19 @@ func NewVolumeOptions(ctx context.Context, requestName string, req *csi.CreateVo opts.ProvisionVolume = true + if opts.BackingSnapshot { + if req.GetVolumeContentSource() == nil || req.GetVolumeContentSource().GetSnapshot() == nil { + return nil, errors.New("backingSnapshot option requires snapshot volume source") + } + + opts.BackingSnapshotID = req.GetVolumeContentSource().GetSnapshot().GetSnapshotId() + + err = opts.populateVolumeOptionsFromBackingSnapshot(ctx, cr) + if err != nil { + return nil, err + } + } + return &opts, nil } @@ -364,23 +398,109 @@ func NewVolumeOptionsFromVolID( } } - volOptions.ProvisionVolume = true - volOptions.SubVolume.VolID = vid.FsSubvolName - vol := core.NewSubVolume(volOptions.conn, &volOptions.SubVolume, volOptions.ClusterID) - info, err := vol.GetSubVolumeInfo(ctx) - if err == nil { - volOptions.RootPath = info.Path - volOptions.Features = info.Features - volOptions.Size = info.BytesQuota + if imageAttributes.BackingSnapshotID != "" || volOptions.BackingSnapshotID != "" { + volOptions.BackingSnapshot = true + volOptions.BackingSnapshotID = imageAttributes.BackingSnapshotID } - if errors.Is(err, cerrors.ErrInvalidCommand) { - volOptions.RootPath, err = vol.GetVolumeRootPathCeph(ctx) + volOptions.ProvisionVolume = true + volOptions.SubVolume.VolID = vid.FsSubvolName + + if volOptions.BackingSnapshot { + err = volOptions.populateVolumeOptionsFromBackingSnapshot(ctx, cr) + } else { + err = volOptions.populateVolumeOptionsFromSubvolume(ctx) } return &volOptions, &vid, err } +func (vo *VolumeOptions) populateVolumeOptionsFromSubvolume(ctx context.Context) error { + vol := core.NewSubVolume(vo.conn, &vo.SubVolume, vo.ClusterID) + + var info *core.Subvolume + info, err := vol.GetSubVolumeInfo(ctx) + if err == nil { + vo.RootPath = info.Path + vo.Features = info.Features + vo.Size = info.BytesQuota + } + + if errors.Is(err, cerrors.ErrInvalidCommand) { + vo.RootPath, err = vol.GetVolumeRootPathCeph(ctx) + } + + return err +} + +func (vo *VolumeOptions) populateVolumeOptionsFromBackingSnapshot( + ctx context.Context, + cr *util.Credentials, +) error { + // As of CephFS snapshot v2 API, snapshots may be found in two locations: + // + // (a) /volumes///.snap// + // (b) /volumes////.snap/__ + + if !vo.ProvisionVolume { + // Case (b) + // + // If the volume is not provisioned by us, we assume that we have access only + // to snapshot's parent volume root. In this case, o.RootPath is expected to + // be already set in the volume context. + + // BackingSnapshotRoot cannot be determined at this stage, because the + // full directory name is not known (see snapshot path format for case + // (b) above). RootPath/.snap must be traversed in order to find out + // the snapshot directory name. + + return nil + } + + parentBackingSnapVolOpts, _, snapID, err := NewSnapshotOptionsFromID(ctx, vo.BackingSnapshotID, cr) + if err != nil { + return fmt.Errorf("failed to retrieve backing snapshot %s: %w", vo.BackingSnapshotID, err) + } + + // Ensure that backing snapshot parent's volume options match the context. + // Snapshot-backed volume inherits all its parent's (parent of the snapshot) options. + + if vo.ClusterID != parentBackingSnapVolOpts.ClusterID { + return fmtBackingSnapshotOptionMismatch("clusterID", vo.ClusterID, parentBackingSnapVolOpts.ClusterID) + } + + if vo.Pool != "" { + return errors.New("cannot set pool for snapshot-backed volume") + } + + if vo.MetadataPool != parentBackingSnapVolOpts.MetadataPool { + return fmtBackingSnapshotOptionMismatch("MetadataPool", vo.MetadataPool, parentBackingSnapVolOpts.MetadataPool) + } + + if vo.FsName != parentBackingSnapVolOpts.FsName { + return fmtBackingSnapshotOptionMismatch("fsName", vo.FsName, parentBackingSnapVolOpts.FsName) + } + + if vo.SubvolumeGroup != parentBackingSnapVolOpts.SubvolumeGroup { + return fmtBackingSnapshotOptionMismatch("SubvolumeGroup", vo.SubvolumeGroup, parentBackingSnapVolOpts.SubvolumeGroup) + } + + vo.Features = parentBackingSnapVolOpts.Features + vo.Size = parentBackingSnapVolOpts.Size + + // For case (a) (o.ProvisionVolume==true is assumed), snapshot root path + // can be built out of subvolume root path, which is in following format: + // + // /volumes/// + + subvolRoot, subvolUUID := path.Split(parentBackingSnapVolOpts.RootPath) + + vo.RootPath = subvolRoot + vo.BackingSnapshotRoot = path.Join(".snap", snapID.FsSnapshotName, subvolUUID) + + return nil +} + // NewVolumeOptionsFromMonitorList generates a new instance of VolumeOptions and // VolumeIdentifier from the provided CSI volume context. func NewVolumeOptionsFromMonitorList( @@ -438,9 +558,17 @@ func NewVolumeOptionsFromMonitorList( return nil, nil, err } + if err = extractOptionalOption(&opts.BackingSnapshotID, "backingSnapshotID", options); err != nil { + return nil, nil, err + } + vid.FsSubvolName = volID vid.VolumeID = volID + if opts.BackingSnapshotID != "" { + opts.BackingSnapshot = true + } + return &opts, &vid, nil } @@ -511,6 +639,10 @@ func NewVolumeOptionsFromStaticVolume( vid.FsSubvolName = opts.RootPath vid.VolumeID = volID + if opts.BackingSnapshotID != "" { + opts.BackingSnapshot = true + } + return &opts, &vid, nil } @@ -599,6 +731,7 @@ func NewSnapshotOptionsFromID( } volOptions.Features = subvolInfo.Features volOptions.Size = subvolInfo.BytesQuota + volOptions.RootPath = subvolInfo.Path snap := core.NewSnapshot(volOptions.conn, sid.FsSnapshotName, &volOptions.SubVolume) info, err := snap.GetSnapshotInfo(ctx) if err != nil { diff --git a/internal/journal/voljournal.go b/internal/journal/voljournal.go index c31073f19..6cbf2f519 100644 --- a/internal/journal/voljournal.go +++ b/internal/journal/voljournal.go @@ -152,7 +152,7 @@ type Config struct { // ownerKey is used to identify the owner of the volume, can be used with some KMS configurations ownerKey string - // backingSnapshotIDKey is snapshot ID on which a shallow CephFS volume is based. + // backingSnapshotIDKey ID of the snapshot on which the CephFS snapshot-backed volume is based backingSnapshotIDKey string // commonPrefix is the prefix common to all omap keys for this Config @@ -532,7 +532,7 @@ Input arguments: - kmsConf: Name of the key management service used to encrypt the image (optional) - volUUID: UUID need to be reserved instead of auto-generating one (this is useful for mirroring and metro-DR) - owner: the owner of the volume (optional) - - backingSnapshotID: (optional) + - backingSnapshotID: ID of the snapshot on which the CephFS snapshot-backed volume is based (optional) Return values: - string: Contains the UUID that was reserved for the passed in reqName @@ -645,7 +645,7 @@ func (conn *Connection) ReserveName(ctx context.Context, omapValues[cj.cephSnapSourceKey] = parentName } - // Update backing snapshot ID for shallow CephFS volume + // Update backing snapshot ID for snapshot-backed CephFS volume if backingSnapshotID != "" { omapValues[cj.backingSnapshotIDKey] = backingSnapshotID } @@ -667,7 +667,7 @@ type ImageAttributes struct { Owner string // Contains the owner to be used in combination with KmsID (for some KMS) ImageID string // Contains the image id JournalPoolID int64 // Pool ID of the CSI journal pool, stored in big endian format (on-disk data) - BackingSnapshotID string // ID of the backing snapshot of a shallow CephFS volume + BackingSnapshotID string // ID of the snapshot on which the CephFS snapshot-backed volume is based } // GetImageAttributes fetches all keys and their values, from a UUID directory, returning ImageAttributes structure.