diff --git a/e2e/cephfs.go b/e2e/cephfs.go index d5c365035..f717e0e69 100644 --- a/e2e/cephfs.go +++ b/e2e/cephfs.go @@ -207,6 +207,15 @@ var _ = Describe("cephfs", func() { }) + // Make sure this should be last testcase in this file, because + // it deletes pool + By("Create a PVC and Delete PVC when backend pool deleted", func() { + err := pvcDeleteWhenPoolNotFound(pvcPath, true, f) + if err != nil { + Fail(err.Error()) + } + }) + }) }) diff --git a/e2e/rbd.go b/e2e/rbd.go index b6d2cf0fc..47da50601 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -338,6 +338,15 @@ var _ = Describe("RBD", func() { Fail(err.Error()) } }) + + // Make sure this should be last testcase in this file, because + // it deletes pool + By("Create a PVC and Delete PVC when backend pool deleted", func() { + err := pvcDeleteWhenPoolNotFound(pvcPath, false, f) + if err != nil { + Fail(err.Error()) + } + }) }) }) diff --git a/e2e/utils.go b/e2e/utils.go index 9ef006094..e2b30f917 100644 --- a/e2e/utils.go +++ b/e2e/utils.go @@ -572,7 +572,7 @@ func validatePVCAndAppBinding(pvcPath, appPath string, f *framework.Framework) { } } -func getRBDImageIds(pvcNamespace, pvcName string, f *framework.Framework) (string, string, error) { +func getImageInfoFromPVC(pvcNamespace, pvcName string, f *framework.Framework) (string, string, error) { c := f.ClientSet.CoreV1() pvc, err := c.PersistentVolumeClaims(pvcNamespace).Get(pvcName, metav1.GetOptions{}) if err != nil { @@ -634,7 +634,7 @@ func readVaultSecret(key string, f *framework.Framework) (string, string) { func validateEncryptedPVCAndAppBinding(pvcPath, appPath, kms string, f *framework.Framework) { pvc, app := createPVCAndAppBinding(pvcPath, appPath, f) - rbdImageID, rbdImageHandle, err := getRBDImageIds(pvc.Namespace, pvc.Name, f) + rbdImageID, rbdImageHandle, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) if err != nil { Fail(err.Error()) } @@ -826,7 +826,7 @@ func validateNormalUserPVCAccess(pvcPath string, f *framework.Framework) { // } func deleteBackingCephFSVolume(f *framework.Framework, pvc *v1.PersistentVolumeClaim) error { - volname, _, err := getRBDImageIds(pvc.Namespace, pvc.Name, f) + volname, _, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) if err != nil { return err } @@ -919,3 +919,76 @@ func checkDataPersist(pvcPath, appPath string, f *framework.Framework) error { err = deletePVCAndApp("", f, pvc, app) return err } + +func deleteBackingRBDImage(f *framework.Framework, pvc *v1.PersistentVolumeClaim) error { + rbdImage, _, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) + if err != nil { + return err + } + + opt := metav1.ListOptions{ + LabelSelector: "app=rook-ceph-tools", + } + + cmd := fmt.Sprintf("rbd rm %s --pool=replicapool", rbdImage) + execCommandInPod(f, cmd, rookNS, &opt) + return nil +} + +func deletePool(name string, cephfs bool, f *framework.Framework) { + opt := metav1.ListOptions{ + LabelSelector: "app=rook-ceph-tools", + } + var cmds = []string{} + if cephfs { + // ceph fs fail + // ceph fs rm myfs --yes-i-really-mean-it + // ceph osd pool delete myfs-metadata myfs-metadata + // --yes-i-really-mean-it + // ceph osd pool delete myfs-data0 myfs-data0 + // --yes-i-really-mean-it + cmds = append(cmds, fmt.Sprintf("ceph fs fail %s", name), + fmt.Sprintf("ceph fs rm %s --yes-i-really-mean-it", name), + fmt.Sprintf("ceph osd pool delete %s-metadata %s-metadata --yes-i-really-really-mean-it", name, name), + fmt.Sprintf("ceph osd pool delete %s-data0 %s-data0 --yes-i-really-really-mean-it", name, name)) + } else { + // ceph osd pool delete replicapool replicapool + // --yes-i-really-mean-it + cmds = append(cmds, fmt.Sprintf("ceph osd pool delete %s %s --yes-i-really-really-mean-it", name, name)) + } + + for _, cmd := range cmds { + // discard stdErr as some commands prints warning in strErr + execCommandInPod(f, cmd, rookNS, &opt) + } +} + +func pvcDeleteWhenPoolNotFound(pvcPath string, cephfs bool, f *framework.Framework) error { + pvc, err := loadPVC(pvcPath) + if err != nil { + return err + } + pvc.Namespace = f.UniqueName + + err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + return err + } + if cephfs { + err = deleteBackingCephFSVolume(f, pvc) + if err != nil { + return err + } + // delete cephfs filesystem + deletePool("myfs", cephfs, f) + } else { + err = deleteBackingRBDImage(f, pvc) + if err != nil { + return err + } + // delete rbd pool + deletePool("replicapool", cephfs, f) + } + err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout) + return err +} diff --git a/pkg/cephfs/cephfs_util.go b/pkg/cephfs/cephfs_util.go index 0c0e805ac..7dba08215 100644 --- a/pkg/cephfs/cephfs_util.go +++ b/pkg/cephfs/cephfs_util.go @@ -84,10 +84,10 @@ func getMetadataPool(ctx context.Context, monitors string, cr *util.Credentials, } } - return "", fmt.Errorf("fsName (%s) not found in Ceph cluster", fsName) + return "", util.ErrPoolNotFound{Pool: fsName, Err: fmt.Errorf("fsName (%s) not found in Ceph cluster", fsName)} } -// CephFilesystemDetails is a representation of the main json structure returned by 'ceph fs dump' +// CephFilesystemDump is a representation of the main json structure returned by 'ceph fs dump' type CephFilesystemDump struct { Filesystems []CephFilesystemDetails `json:"filesystems"` } @@ -114,5 +114,5 @@ func getFsName(ctx context.Context, monitors string, cr *util.Credentials, fscID } } - return "", fmt.Errorf("fscID (%d) not found in Ceph cluster", fscID) + return "", util.ErrPoolNotFound{Pool: string(fscID), Err: fmt.Errorf("fscID (%d) not found in Ceph cluster", fscID)} } diff --git a/pkg/cephfs/controllerserver.go b/pkg/cephfs/controllerserver.go index 7aa08d256..3a0fa3a47 100644 --- a/pkg/cephfs/controllerserver.go +++ b/pkg/cephfs/controllerserver.go @@ -231,6 +231,12 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol // Find the volume using the provided VolumeID volOptions, vID, err := newVolumeOptionsFromVolID(ctx, string(volID), nil, secrets) if err != nil { + // if error is ErrPoolNotFound, the pool is already deleted we dont + // need to worry about deleting subvolume or omap data, return success + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Warningf(util.Log(ctx, "failed to get backend volume for %s: %v"), string(volID), err) + return &csi.DeleteVolumeResponse{}, nil + } // if error is ErrKeyNotFound, then a previous attempt at deletion was complete // or partially complete (subvolume and imageOMap are garbage collected already), hence // return success as deletion is complete diff --git a/pkg/rbd/controllerserver.go b/pkg/rbd/controllerserver.go index 477259d33..2fecb8517 100644 --- a/pkg/rbd/controllerserver.go +++ b/pkg/rbd/controllerserver.go @@ -258,6 +258,12 @@ func (cs *ControllerServer) checkSnapshot(ctx context.Context, req *csi.CreateVo if _, ok := err.(ErrSnapNotFound); !ok { return status.Error(codes.Internal, err.Error()) } + + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Errorf(util.Log(ctx, "failed to get backend snapshot for %s: %v"), snapshotID, err) + return status.Error(codes.InvalidArgument, err.Error()) + } + return status.Error(codes.InvalidArgument, "missing requested Snapshot ID") } @@ -344,6 +350,11 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol rbdVol := &rbdVolume{} if err = genVolFromVolID(ctx, rbdVol, volumeID, cr, req.GetSecrets()); err != nil { + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Warningf(util.Log(ctx, "failed to get backend volume for %s: %v"), volumeID, err) + return &csi.DeleteVolumeResponse{}, nil + } + // If error is ErrInvalidVolID it could be a version 1.0.0 or lower volume, attempt // to process it as such if _, ok := err.(ErrInvalidVolID); ok { @@ -360,6 +371,7 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol // or partially complete (image and imageOMap are garbage collected already), hence return // success as deletion is complete if _, ok := err.(util.ErrKeyNotFound); ok { + klog.Warningf(util.Log(ctx, "Failed to volume options for %s: %v"), volumeID, err) return &csi.DeleteVolumeResponse{}, nil } @@ -458,6 +470,11 @@ func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateS if _, ok := err.(ErrImageNotFound); ok { return nil, status.Errorf(codes.NotFound, "source Volume ID %s not found", req.GetSourceVolumeId()) } + + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Errorf(util.Log(ctx, "failed to get backend volume for %s: %v"), req.GetSourceVolumeId(), err) + return nil, status.Errorf(codes.Internal, err.Error()) + } return nil, status.Errorf(codes.Internal, err.Error()) } @@ -625,6 +642,13 @@ func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteS rbdSnap := &rbdSnapshot{} if err = genSnapFromSnapID(ctx, rbdSnap, snapshotID, cr); err != nil { + // if error is ErrPoolNotFound, the pool is already deleted we dont + // need to worry about deleting snapshot or omap data, return success + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Warningf(util.Log(ctx, "failed to get backend snapshot for %s: %v"), snapshotID, err) + return &csi.DeleteSnapshotResponse{}, nil + } + // if error is ErrKeyNotFound, then a previous attempt at deletion was complete // or partially complete (snap and snapOMap are garbage collected already), hence return // success as deletion is complete @@ -715,6 +739,12 @@ func (cs *ControllerServer) ControllerExpandVolume(ctx context.Context, req *csi if _, ok := err.(ErrImageNotFound); ok { return nil, status.Errorf(codes.NotFound, "volume ID %s not found", volID) } + + if _, ok := err.(util.ErrPoolNotFound); ok { + klog.Errorf(util.Log(ctx, "failed to get backend volume for %s: %v"), volID, err) + return nil, status.Errorf(codes.InvalidArgument, err.Error()) + } + return nil, status.Errorf(codes.Internal, err.Error()) } diff --git a/pkg/util/cephcmds.go b/pkg/util/cephcmds.go index edbc053d4..387bbf3f7 100644 --- a/pkg/util/cephcmds.go +++ b/pkg/util/cephcmds.go @@ -114,7 +114,7 @@ func GetPoolName(ctx context.Context, monitors string, cr *Credentials, poolID i } } - return "", fmt.Errorf("pool ID (%d) not found in Ceph cluster", poolID) + return "", ErrPoolNotFound{string(poolID), fmt.Errorf("pool ID (%d) not found in Ceph cluster", poolID)} } // SetOMapKeyValue sets the given key and value into the provided Ceph omap name diff --git a/pkg/util/errors.go b/pkg/util/errors.go index afea1b601..ca15c5768 100644 --- a/pkg/util/errors.go +++ b/pkg/util/errors.go @@ -56,3 +56,13 @@ type ErrSnapNameConflict struct { func (e ErrSnapNameConflict) Error() string { return e.err.Error() } + +// ErrPoolNotFound is returned when pool is not found +type ErrPoolNotFound struct { + Pool string + Err error +} + +func (e ErrPoolNotFound) Error() string { + return e.Err.Error() +}