From 8dcb6a6105f5f890c5f4e86e1208774262847070 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Fri, 31 Jan 2020 14:19:11 +0530 Subject: [PATCH] Handle Delete operation if pool not found If the backend rbd or cephfs pool is already deleted we need to return success to the DeleteVolume RPC call to make it idempotent. Signed-off-by: Madhu Rajanna --- e2e/cephfs.go | 9 ++++ e2e/rbd.go | 9 ++++ e2e/utils.go | 79 ++++++++++++++++++++++++++++++++-- pkg/cephfs/cephfs_util.go | 6 +-- pkg/cephfs/controllerserver.go | 6 +++ pkg/rbd/controllerserver.go | 30 +++++++++++++ pkg/util/cephcmds.go | 2 +- pkg/util/errors.go | 10 +++++ 8 files changed, 144 insertions(+), 7 deletions(-) diff --git a/e2e/cephfs.go b/e2e/cephfs.go index d5c365035..f717e0e69 100644 --- a/e2e/cephfs.go +++ b/e2e/cephfs.go @@ -207,6 +207,15 @@ var _ = Describe("cephfs", func() { }) + // Make sure this should be last testcase in this file, because + // it deletes pool + By("Create a PVC and Delete PVC when backend pool deleted", func() { + err := pvcDeleteWhenPoolNotFound(pvcPath, true, f) + if err != nil { + Fail(err.Error()) + } + }) + }) }) diff --git a/e2e/rbd.go b/e2e/rbd.go index b6d2cf0fc..47da50601 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -338,6 +338,15 @@ var _ = Describe("RBD", func() { Fail(err.Error()) } }) + + // Make sure this should be last testcase in this file, because + // it deletes pool + By("Create a PVC and Delete PVC when backend pool deleted", func() { + err := pvcDeleteWhenPoolNotFound(pvcPath, false, f) + if err != nil { + Fail(err.Error()) + } + }) }) }) diff --git a/e2e/utils.go b/e2e/utils.go index 9ef006094..e2b30f917 100644 --- a/e2e/utils.go +++ b/e2e/utils.go @@ -572,7 +572,7 @@ func validatePVCAndAppBinding(pvcPath, appPath string, f *framework.Framework) { } } -func getRBDImageIds(pvcNamespace, pvcName string, f *framework.Framework) (string, string, error) { +func getImageInfoFromPVC(pvcNamespace, pvcName string, f *framework.Framework) (string, string, error) { c := f.ClientSet.CoreV1() pvc, err := c.PersistentVolumeClaims(pvcNamespace).Get(pvcName, metav1.GetOptions{}) if err != nil { @@ -634,7 +634,7 @@ func readVaultSecret(key string, f *framework.Framework) (string, string) { func validateEncryptedPVCAndAppBinding(pvcPath, appPath, kms string, f *framework.Framework) { pvc, app := createPVCAndAppBinding(pvcPath, appPath, f) - rbdImageID, rbdImageHandle, err := getRBDImageIds(pvc.Namespace, pvc.Name, f) + rbdImageID, rbdImageHandle, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) if err != nil { Fail(err.Error()) } @@ -826,7 +826,7 @@ func validateNormalUserPVCAccess(pvcPath string, f *framework.Framework) { // } func deleteBackingCephFSVolume(f *framework.Framework, pvc *v1.PersistentVolumeClaim) error { - volname, _, err := getRBDImageIds(pvc.Namespace, pvc.Name, f) + volname, _, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) if err != nil { return err } @@ -919,3 +919,76 @@ func checkDataPersist(pvcPath, appPath string, f *framework.Framework) error { err = deletePVCAndApp("", f, pvc, app) return err } + +func deleteBackingRBDImage(f *framework.Framework, pvc *v1.PersistentVolumeClaim) error { + rbdImage, _, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) + if err != nil { + return err + } + + opt := metav1.ListOptions{ + LabelSelector: "app=rook-ceph-tools", + } + + cmd := fmt.Sprintf("rbd rm %s --pool=replicapool", rbdImage) + execCommandInPod(f, cmd, rookNS, &opt) + return nil +} + +func deletePool(name string, cephfs bool, f *framework.Framework) { + opt := metav1.ListOptions{ + LabelSelector: "app=rook-ceph-tools", + } + var cmds = []string{} + if cephfs { + // ceph fs fail + // ceph fs rm myfs --yes-i-really-mean-it + // ceph osd pool delete myfs-metadata myfs-metadata + // --yes-i-really-mean-it + // ceph osd pool delete myfs-data0 myfs-data0 + // --yes-i-really-mean-it + cmds = append(cmds, fmt.Sprintf("ceph fs fail %s", name), + fmt.Sprintf("ceph fs rm %s --yes-i-really-mean-it", name), + fmt.Sprintf("ceph osd pool delete %s-metadata %s-metadata --yes-i-really-really-mean-it", name, name), + fmt.Sprintf("ceph osd pool delete %s-data0 %s-data0 --yes-i-really-really-mean-it", name, name)) + } else { + // ceph osd pool delete replicapool replicapool + // --yes-i-really-mean-it + cmds = append(cmds, fmt.Sprintf("ceph osd pool delete %s %s --yes-i-really-really-mean-it", name, name)) + } + + for _, cmd := range cmds { + // discard stdErr as some commands prints warning in strErr + execCommandInPod(f, cmd, rookNS, &opt) + } +} + +func pvcDeleteWhenPoolNotFound(pvcPath string, cephfs bool, f *framework.Framework) error { + pvc, err := loadPVC(pvcPath) + if err != nil { + return err + } + pvc.Namespace = f.UniqueName + + err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + return err + } + if cephfs { + err = deleteBackingCephFSVolume(f, pvc) + if err != nil { + return err + } + // delete cephfs filesystem + deletePool("myfs", cephfs, f) + } else { + err = deleteBackingRBDImage(f, pvc) + if err != nil { + return err + } + // delete rbd pool + deletePool("replicapool", cephfs, f) + } + err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout) + return err +} diff --git a/pkg/cephfs/cephfs_util.go b/pkg/cephfs/cephfs_util.go index 0c0e805ac..7dba08215 100644 --- a/pkg/cephfs/cephfs_util.go +++ b/pkg/cephfs/cephfs_util.go @@ -84,10 +84,10 @@ func getMetadataPool(ctx context.Context, monitors string, cr *util.Credentials, } } - return "", fmt.Errorf("fsName (%s) not found in Ceph cluster", fsName) + return "", util.ErrPoolNotFound{Pool: fsName, Err: fmt.Errorf("fsName (%s) not found in Ceph cluster", fsName)} } -// CephFilesystemDetails is a representation of the main json structure returned by 'ceph fs dump' +// CephFilesystemDump is a representation of the main json structure returned by 'ceph fs dump' type CephFilesystemDump struct { Filesystems []CephFilesystemDetails `json:"filesystems"` } @@ -114,5 +114,5 @@ func getFsName(ctx context.Context, monitors string, cr *util.Credentials, fscID } } - return "", fmt.Errorf("fscID (%d) not found in Ceph cluster", fscID) + return "", util.ErrPoolNotFound{Pool: string(fscID), Err: fmt.Errorf("fscID (%d) not found in Ceph cluster", fscID)} } diff --git a/pkg/cephfs/controllerserver.go b/pkg/cephfs/controllerserver.go index 7aa08d256..3a0fa3a47 100644 --- a/pkg/cephfs/controllerserver.go +++ b/pkg/cephfs/controllerserver.go @@ -231,6 +231,12 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol // Find the volume using the provided VolumeID volOptions, vID, err := newVolumeOptionsFromVolID(ctx, string(volID), nil, secrets) if err != nil { + // if error is ErrPoolNotFound, the pool is already deleted we dont + // need to worry about deleting subvolume or omap data, return success + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Warningf(util.Log(ctx, "failed to get backend volume for %s: %v"), string(volID), err) + return &csi.DeleteVolumeResponse{}, nil + } // if error is ErrKeyNotFound, then a previous attempt at deletion was complete // or partially complete (subvolume and imageOMap are garbage collected already), hence // return success as deletion is complete diff --git a/pkg/rbd/controllerserver.go b/pkg/rbd/controllerserver.go index 477259d33..2fecb8517 100644 --- a/pkg/rbd/controllerserver.go +++ b/pkg/rbd/controllerserver.go @@ -258,6 +258,12 @@ func (cs *ControllerServer) checkSnapshot(ctx context.Context, req *csi.CreateVo if _, ok := err.(ErrSnapNotFound); !ok { return status.Error(codes.Internal, err.Error()) } + + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Errorf(util.Log(ctx, "failed to get backend snapshot for %s: %v"), snapshotID, err) + return status.Error(codes.InvalidArgument, err.Error()) + } + return status.Error(codes.InvalidArgument, "missing requested Snapshot ID") } @@ -344,6 +350,11 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol rbdVol := &rbdVolume{} if err = genVolFromVolID(ctx, rbdVol, volumeID, cr, req.GetSecrets()); err != nil { + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Warningf(util.Log(ctx, "failed to get backend volume for %s: %v"), volumeID, err) + return &csi.DeleteVolumeResponse{}, nil + } + // If error is ErrInvalidVolID it could be a version 1.0.0 or lower volume, attempt // to process it as such if _, ok := err.(ErrInvalidVolID); ok { @@ -360,6 +371,7 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol // or partially complete (image and imageOMap are garbage collected already), hence return // success as deletion is complete if _, ok := err.(util.ErrKeyNotFound); ok { + klog.Warningf(util.Log(ctx, "Failed to volume options for %s: %v"), volumeID, err) return &csi.DeleteVolumeResponse{}, nil } @@ -458,6 +470,11 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS if _, ok := err.(ErrImageNotFound); ok { return nil, status.Errorf(codes.NotFound, "source Volume ID %s not found", req.GetSourceVolumeId()) } + + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Errorf(util.Log(ctx, "failed to get backend volume for %s: %v"), req.GetSourceVolumeId(), err) + return nil, status.Errorf(codes.Internal, err.Error()) + } return nil, status.Errorf(codes.Internal, err.Error()) } @@ -625,6 +642,13 @@ func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteS rbdSnap := &rbdSnapshot{} if err = genSnapFromSnapID(ctx, rbdSnap, snapshotID, cr); err != nil { + // if error is ErrPoolNotFound, the pool is already deleted we dont + // need to worry about deleting snapshot or omap data, return success + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Warningf(util.Log(ctx, "failed to get backend snapshot for %s: %v"), snapshotID, err) + return &csi.DeleteSnapshotResponse{}, nil + } + // if error is ErrKeyNotFound, then a previous attempt at deletion was complete // or partially complete (snap and snapOMap are garbage collected already), hence return // success as deletion is complete @@ -715,6 +739,12 @@ func (cs *ControllerServer) ControllerExpandVolume(ctx context.Context, req *csi if _, ok := err.(ErrImageNotFound); ok { return nil, status.Errorf(codes.NotFound, "volume ID %s not found", volID) } + + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Errorf(util.Log(ctx, "failed to get backend volume for %s: %v"), volID, err) + return nil, status.Errorf(codes.InvalidArgument, err.Error()) + } + return nil, status.Errorf(codes.Internal, err.Error()) } diff --git a/pkg/util/cephcmds.go b/pkg/util/cephcmds.go index edbc053d4..387bbf3f7 100644 --- a/pkg/util/cephcmds.go +++ b/pkg/util/cephcmds.go @@ -114,7 +114,7 @@ func GetPoolName(ctx context.Context, monitors string, cr *Credentials, poolID i } } - return "", fmt.Errorf("pool ID (%d) not found in Ceph cluster", poolID) + return "", ErrPoolNotFound{string(poolID), fmt.Errorf("pool ID (%d) not found in Ceph cluster", poolID)} } // SetOMapKeyValue sets the given key and value into the provided Ceph omap name diff --git a/pkg/util/errors.go b/pkg/util/errors.go index afea1b601..ca15c5768 100644 --- a/pkg/util/errors.go +++ b/pkg/util/errors.go @@ -56,3 +56,13 @@ type ErrSnapNameConflict struct { func (e ErrSnapNameConflict) Error() string { return e.err.Error() } + +// ErrPoolNotFound is returned when pool is not found +type ErrPoolNotFound struct { + Pool string + Err error +} + +func (e ErrPoolNotFound) Error() string { + return e.Err.Error() +}