From c1b33256774db1be23d4757fdcbf06f1f782440f Mon Sep 17 00:00:00 2001 From: Praveen M Date: Mon, 3 Mar 2025 13:12:42 +0530 Subject: [PATCH] cleanup: move rbd errors to internal/rbd/errors This commit includes: 1). Moves rbd errors from internal/util/errors to internal/rbd/errors. 2). Introduces ShouldRetryVolumeGroupGeneration helper function to determine whether volume group generation should continue based on specific error types. The function returns true if the error is of type ErrPoolNotFound, ErrRBDGroupNotFound, ErrPermissionDenied. (Ceph user might not have access to all the objects/pools where mapping exists) Signed-off-by: Praveen M --- .../csi-addons/rbd/encryptionkeyrotation.go | 3 +- internal/csi-addons/rbd/replication.go | 8 +-- internal/csi-addons/rbd/replication_test.go | 4 +- internal/csi-addons/rbd/volumegroup.go | 6 +- internal/rbd/clone.go | 3 +- internal/rbd/controllerserver.go | 14 ++--- internal/rbd/errors/errors.go | 14 ++++- internal/rbd/group/group_snapshot.go | 3 +- internal/rbd/group/util.go | 38 +++++++++++-- .../errors_test.go => rbd/group/util_test.go} | 20 +++---- internal/rbd/group/volume_group.go | 9 +-- internal/rbd/group_controllerserver.go | 6 +- internal/rbd/manager.go | 6 +- internal/rbd/rbd_journal.go | 6 +- internal/rbd/rbd_util.go | 45 ++++++++++++--- internal/rbd/rbd_util_test.go | 55 +++++++++++++++++++ internal/rbd/snapshot.go | 4 +- internal/util/errors.go | 35 +----------- 18 files changed, 180 insertions(+), 99 deletions(-) rename internal/{util/errors_test.go => rbd/group/util_test.go} (78%) diff --git a/internal/csi-addons/rbd/encryptionkeyrotation.go b/internal/csi-addons/rbd/encryptionkeyrotation.go index 0d7a09d78..a24f8770b 100644 --- a/internal/csi-addons/rbd/encryptionkeyrotation.go +++ b/internal/csi-addons/rbd/encryptionkeyrotation.go @@ -21,6 +21,7 @@ import ( "errors" "github.com/ceph/ceph-csi/internal/rbd" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" @@ -68,7 +69,7 @@ func (ekrs *EncryptionKeyRotationServer) EncryptionKeyRotate( rbdVol, err := mgr.GetVolumeByID(ctx, volID) if err != nil { switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = status.Errorf(codes.NotFound, "volume ID %s not found", volID) case errors.Is(err, util.ErrPoolNotFound): log.ErrorLog(ctx, "failed to get backend volume for %s: %v", volID, err) diff --git a/internal/csi-addons/rbd/replication.go b/internal/csi-addons/rbd/replication.go index 6ec92470a..6a3f2453f 100644 --- a/internal/csi-addons/rbd/replication.go +++ b/internal/csi-addons/rbd/replication.go @@ -652,7 +652,7 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context, sts, err := mirror.GetGlobalMirroringStatus(ctx) if err != nil { // the image gets recreated after issuing resync - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { // caller retries till RBD syncs an initial version of the image to // report its status in the resync call. Ideally, this line will not // be executed as the error would get returned due to getMirroringInfo @@ -786,7 +786,7 @@ func getGRPCError(err error) error { } errorStatusMap := map[error]codes.Code{ - util.ErrImageNotFound: codes.NotFound, + rbd_errors.ErrImageNotFound: codes.NotFound, util.ErrPoolNotFound: codes.NotFound, rbd_errors.ErrInvalidArgument: codes.InvalidArgument, rbd_errors.ErrFlattenInProgress: codes.Aborted, @@ -836,7 +836,7 @@ func (rs *ReplicationServer) GetVolumeReplicationInfo(ctx context.Context, log.ErrorLog(ctx, "failed to get volume with id %q: %v", volumeID, err) switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = status.Error(codes.NotFound, err.Error()) case errors.Is(err, util.ErrPoolNotFound): err = status.Error(codes.NotFound, err.Error()) @@ -873,7 +873,7 @@ func (rs *ReplicationServer) GetVolumeReplicationInfo(ctx context.Context, if err != nil { log.ErrorLog(ctx, "failed to get status for mirror %q: %v", mirror, err) - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { return nil, status.Error(codes.Aborted, err.Error()) } diff --git a/internal/csi-addons/rbd/replication_test.go b/internal/csi-addons/rbd/replication_test.go index 22545866f..5b0128004 100644 --- a/internal/csi-addons/rbd/replication_test.go +++ b/internal/csi-addons/rbd/replication_test.go @@ -598,8 +598,8 @@ func TestGetGRPCError(t *testing.T) { }, { name: "ErrImageNotFound", - err: util.ErrImageNotFound, - expectedErr: status.Error(codes.NotFound, util.ErrImageNotFound.Error()), + err: rbd_errors.ErrImageNotFound, + expectedErr: status.Error(codes.NotFound, rbd_errors.ErrImageNotFound.Error()), }, { name: "ErrPoolNotFound", diff --git a/internal/csi-addons/rbd/volumegroup.go b/internal/csi-addons/rbd/volumegroup.go index 4727e548b..5fbb38141 100644 --- a/internal/csi-addons/rbd/volumegroup.go +++ b/internal/csi-addons/rbd/volumegroup.go @@ -23,7 +23,7 @@ import ( "slices" "github.com/ceph/ceph-csi/internal/rbd" - "github.com/ceph/ceph-csi/internal/rbd/group" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/rbd/types" "github.com/ceph/ceph-csi/internal/util/log" @@ -194,7 +194,7 @@ func (vs *VolumeGroupServer) DeleteVolumeGroup( // resolve the volume group vg, err := mgr.GetVolumeGroupByID(ctx, req.GetVolumeGroupId()) if err != nil { - if errors.Is(err, group.ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "VolumeGroup %q doesn't exists", req.GetVolumeGroupId()) return &volumegroup.DeleteVolumeGroupResponse{}, nil @@ -433,7 +433,7 @@ func (vs *VolumeGroupServer) ControllerGetVolumeGroup( // resolve the volume group vg, err := mgr.GetVolumeGroupByID(ctx, req.GetVolumeGroupId()) if err != nil { - if errors.Is(err, group.ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "VolumeGroup %q doesn't exists", req.GetVolumeGroupId()) return nil, status.Errorf( diff --git a/internal/rbd/clone.go b/internal/rbd/clone.go index 6ca4f64e6..e70edba5d 100644 --- a/internal/rbd/clone.go +++ b/internal/rbd/clone.go @@ -22,7 +22,6 @@ import ( "fmt" rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" - "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/k8s" "github.com/ceph/ceph-csi/internal/util/log" @@ -68,7 +67,7 @@ func (rv *rbdVolume) checkCloneImage(ctx context.Context, parentVol *rbdVolume) return true, nil - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): // as the temp clone does not exist,check snapshot exists on parent volume // snapshot name is same as temporary clone image snap.RbdImageName = tempClone.RbdImageName diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index ac7e9b81e..26a4a6bb1 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -593,7 +593,7 @@ func (cs *ControllerServer) repairExistingVolume(ctx context.Context, req *csi.C func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *util.Credentials) error { snaps, children, err := rbdVol.listSnapAndChildren() if err != nil { - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { return status.Error(codes.InvalidArgument, err.Error()) } @@ -865,7 +865,7 @@ func checkContentSource( rbdvol, err := GenVolFromVolID(ctx, volID, cr, req.GetSecrets()) if err != nil { log.ErrorLog(ctx, "failed to get backend image for %s: %v", volID, err) - if !errors.Is(err, util.ErrImageNotFound) { + if !errors.Is(err, rbd_errors.ErrImageNotFound) { return nil, nil, status.Error(codes.Internal, err.Error()) } @@ -905,7 +905,7 @@ func (cs *ControllerServer) checkErrAndUndoReserve( return &csi.DeleteVolumeResponse{}, nil } - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { notFoundErr := rbdVol.ensureImageCleanup(ctx) if notFoundErr != nil { return nil, status.Errorf(codes.Internal, "failed to cleanup image %q: %v", rbdVol, notFoundErr) @@ -980,7 +980,7 @@ func (cs *ControllerServer) DeleteVolume( return nil, status.Error(codes.InvalidArgument, pErr.Error()) } pErr = deleteMigratedVolume(ctx, pmVolID, cr) - if pErr != nil && !errors.Is(pErr, util.ErrImageNotFound) { + if pErr != nil && !errors.Is(pErr, rbd_errors.ErrImageNotFound) { return nil, status.Error(codes.Internal, pErr.Error()) } @@ -1152,7 +1152,7 @@ func (cs *ControllerServer) CreateSnapshot( }() if err != nil { switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = status.Errorf(codes.NotFound, "source Volume ID %s not found", req.GetSourceVolumeId()) case errors.Is(err, util.ErrPoolNotFound): log.ErrorLog(ctx, "failed to get backend volume for %s: %v", req.GetSourceVolumeId(), err) @@ -1497,7 +1497,7 @@ func (cs *ControllerServer) DeleteSnapshot( // if the error is ErrImageNotFound, We need to cleanup the image from // trash and remove the metadata in OMAP. - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { log.UsefulLog(ctx, "cleaning up leftovers of snapshot %s: %v", snapshotID, err) err = cleanUpImageAndSnapReservation(ctx, rbdSnap, cr) @@ -1600,7 +1600,7 @@ func (cs *ControllerServer) ControllerExpandVolume( rbdVol, err := genVolFromVolIDWithMigration(ctx, volID, cr, req.GetSecrets()) if err != nil { switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = status.Errorf(codes.NotFound, "volume ID %s not found", volID) case errors.Is(err, util.ErrPoolNotFound): log.ErrorLog(ctx, "failed to get backend volume for %s: %v", volID, err) diff --git a/internal/rbd/errors/errors.go b/internal/rbd/errors/errors.go index 4257674f7..5c6b0d848 100644 --- a/internal/rbd/errors/errors.go +++ b/internal/rbd/errors/errors.go @@ -16,9 +16,17 @@ limitations under the License. package rbd_errors -import "errors" +import ( + "errors" + "fmt" + + librados "github.com/ceph/go-ceph/rados" + librbd "github.com/ceph/go-ceph/rbd" +) var ( + // ErrImageNotFound is returned when image name is not found in the cluster on the given pool and/or namespace. + ErrImageNotFound = errors.New("image not found") // ErrSnapNotFound is returned when snap name passed is not found in the list of snapshots for the // given image. ErrSnapNotFound = errors.New("snapshot not found") @@ -55,4 +63,8 @@ var ( ErrInvalidArgument = errors.New("invalid arguments provided") // ErrImageInUse is returned when the image is in use. ErrImageInUse = errors.New("image is in use") + // ErrRBDGroupNotConnected is returned when the RBD group is not connected. + ErrRBDGroupNotConnected = fmt.Errorf("%w: RBD group is not connected", librados.ErrNotConnected) + // ErrRBDGroupNotFound is returned when group name is not found in the cluster on the given pool and/or namespace. + ErrRBDGroupNotFound = fmt.Errorf("%w: RBD group not found", librbd.ErrNotFound) ) diff --git a/internal/rbd/group/group_snapshot.go b/internal/rbd/group/group_snapshot.go index 81daaf010..808bd3fd3 100644 --- a/internal/rbd/group/group_snapshot.go +++ b/internal/rbd/group/group_snapshot.go @@ -24,6 +24,7 @@ import ( "github.com/container-storage-interface/spec/lib/go/csi" "google.golang.org/protobuf/types/known/timestamppb" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/rbd/types" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" @@ -70,7 +71,7 @@ func GetVolumeGroupSnapshot( attrs, err := vgs.getVolumeGroupAttributes(ctx) if err != nil { - if errors.Is(err, ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "%v, returning empty volume group snapshot %q", vgs, err) return vgs, err diff --git a/internal/rbd/group/util.go b/internal/rbd/group/util.go index bbdec576e..b53c3505c 100644 --- a/internal/rbd/group/util.go +++ b/internal/rbd/group/util.go @@ -26,6 +26,7 @@ import ( "github.com/ceph/go-ceph/rados" "github.com/ceph/ceph-csi/internal/journal" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" ) @@ -126,7 +127,7 @@ func (cvg *commonVolumeGroup) generateVolumeGroupFromMapping( } mcsiID.LocationID = mPID err = cvg.generateVolumeGroup(mcsiID) - if util.ShouldRetryVolumeGeneration(err) { + if ShouldRetryVolumeGroupGeneration(err) { continue } @@ -161,13 +162,13 @@ func (cvg *commonVolumeGroup) initCommonVolumeGroup( err = cvg.generateVolumeGroup(csiID) // If the error is not a retryable error, return from here. - if err != nil && !util.ShouldRetryVolumeGeneration(err) { + if err != nil && !ShouldRetryVolumeGroupGeneration(err) { return err } // If the error is a retryable error, we should try to get the cluster mapping // and generate the volume group from the mapping. - if util.ShouldRetryVolumeGeneration(err) { + if ShouldRetryVolumeGroupGeneration(err) { mapping, err := util.GetClusterMappingInfo(csiID.ClusterID) if err != nil { return err @@ -229,7 +230,7 @@ func (cvg *commonVolumeGroup) getVolumeGroupAttributes(ctx context.Context) (*jo if attrs.GroupName == "" { log.ErrorLog(ctx, "volume group with id %v not found", cvg.id) - return nil, ErrRBDGroupNotFound + return nil, rbd_errors.ErrRBDGroupNotFound } cvg.requestName = attrs.RequestName @@ -356,12 +357,12 @@ func (cvg *commonVolumeGroup) GetIOContext(ctx context.Context) (*rados.IOContex conn, err := cvg.getConnection(ctx) if err != nil { - return nil, fmt.Errorf("%w: failed to connect: %w", ErrRBDGroupNotConnected, err) + return nil, fmt.Errorf("%w: failed to connect: %w", rbd_errors.ErrRBDGroupNotConnected, err) } ioctx, err := conn.GetIoctx(cvg.pool) if err != nil { - return nil, fmt.Errorf("%w: failed to get IOContext: %w", ErrRBDGroupNotConnected, err) + return nil, fmt.Errorf("%w: failed to get IOContext: %w", rbd_errors.ErrRBDGroupNotConnected, err) } if cvg.namespace != "" { @@ -417,3 +418,28 @@ func (cvg *commonVolumeGroup) GetCreationTime(ctx context.Context) (*time.Time, return cvg.creationTime, nil } + +// ShouldRetryVolumeGroupGeneration determines whether the process of finding or generating +// volumegroups should continue based on the type of error encountered. +// +// It checks if the given error matches any of the following known errors: +// - ErrPoolNotFound: The rbd pool where the volumegroup/omap is expected doesn't exist. +// - ErrRBDGroupNotFound: The volumegroup doesn't exist in the rbd pool. +// - rados.ErrPermissionDenied: Permissions to access the pool is denied. +// +// If any of these errors are encountered, the function returns `true`, indicating +// that the volumegroup search should continue because of known error. Otherwise, it +// returns `false`, meaning the search should stop. +// +// This helper function is used in scenarios where multiple attempts may be made +// to retrieve or generate volumegroup information, and we want to gracefully handle +// specific failure cases while retrying for others. +func ShouldRetryVolumeGroupGeneration(err error) bool { + if err == nil { + return false // No error, do not retry + } + // Continue searching for specific known errors + return (errors.Is(err, util.ErrPoolNotFound) || + errors.Is(err, rbd_errors.ErrRBDGroupNotFound) || + errors.Is(err, rados.ErrPermissionDenied)) +} diff --git a/internal/util/errors_test.go b/internal/rbd/group/util_test.go similarity index 78% rename from internal/util/errors_test.go rename to internal/rbd/group/util_test.go index cdb106758..8ddd6dd64 100644 --- a/internal/util/errors_test.go +++ b/internal/rbd/group/util_test.go @@ -14,16 +14,19 @@ See the License for the specific language governing permissions and limitations under the License. */ -package util +package group import ( "errors" "testing" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" + "github.com/ceph/ceph-csi/internal/util" + "github.com/ceph/go-ceph/rados" ) -func Test_shouldRetryVolumeGeneration(t *testing.T) { +func Test_shouldRetryVolumeGroupGeneration(t *testing.T) { t.Parallel() type args struct { err error @@ -38,19 +41,14 @@ func Test_shouldRetryVolumeGeneration(t *testing.T) { args: args{err: nil}, want: false, // No error, stop searching }, - { - name: "ErrKeyNotFound (continue searching)", - args: args{err: ErrKeyNotFound}, - want: true, // Known error, continue searching - }, { name: "ErrPoolNotFound (continue searching)", - args: args{err: ErrPoolNotFound}, + args: args{err: util.ErrPoolNotFound}, want: true, // Known error, continue searching }, { - name: "ErrImageNotFound (continue searching)", - args: args{err: ErrImageNotFound}, + name: "ErrRBDGroupNotFound (continue searching)", + args: args{err: rbd_errors.ErrRBDGroupNotFound}, want: true, // Known error, continue searching }, { @@ -67,7 +65,7 @@ func Test_shouldRetryVolumeGeneration(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - if got := ShouldRetryVolumeGeneration(tt.args.err); got != tt.want { + if got := ShouldRetryVolumeGroupGeneration(tt.args.err); got != tt.want { t.Errorf("ShouldRetryVolumeGeneration() = %v, want %v", got, tt.want) } }) diff --git a/internal/rbd/group/volume_group.go b/internal/rbd/group/volume_group.go index e8414f3dd..4141370df 100644 --- a/internal/rbd/group/volume_group.go +++ b/internal/rbd/group/volume_group.go @@ -22,21 +22,16 @@ import ( "fmt" "github.com/ceph/go-ceph/rados" - librados "github.com/ceph/go-ceph/rados" librbd "github.com/ceph/go-ceph/rbd" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/csi-addons/spec/lib/go/volumegroup" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/rbd/types" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" ) -var ( - ErrRBDGroupNotConnected = fmt.Errorf("%w: RBD group is not connected", librados.ErrNotConnected) - ErrRBDGroupNotFound = fmt.Errorf("%w: RBD group not found", librbd.ErrNotFound) -) - // volumeGroup handles all requests for 'rbd group' operations. type volumeGroup struct { commonVolumeGroup @@ -77,7 +72,7 @@ func GetVolumeGroup( attrs, err := vg.getVolumeGroupAttributes(ctx) if err != nil { - if errors.Is(err, ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "%v, returning empty volume group %q", vg, err) return vg, err diff --git a/internal/rbd/group_controllerserver.go b/internal/rbd/group_controllerserver.go index a4b313ea7..5edf6a914 100644 --- a/internal/rbd/group_controllerserver.go +++ b/internal/rbd/group_controllerserver.go @@ -24,7 +24,7 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" - "github.com/ceph/ceph-csi/internal/rbd/group" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/rbd/types" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" @@ -215,7 +215,7 @@ func (cs *ControllerServer) DeleteVolumeGroupSnapshot( groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, groupSnapshotID) if err != nil { - if errors.Is(err, group.ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "VolumeGroupSnapshot %q doesn't exists", groupSnapshotID) return &csi.DeleteVolumeGroupSnapshotResponse{}, nil @@ -261,7 +261,7 @@ func (cs *ControllerServer) GetVolumeGroupSnapshot( groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, groupSnapshotID) if err != nil { - if errors.Is(err, group.ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "VolumeGroupSnapshot %q doesn't exists", groupSnapshotID) return nil, status.Errorf( diff --git a/internal/rbd/manager.go b/internal/rbd/manager.go index 44164a38d..c832e4e19 100644 --- a/internal/rbd/manager.go +++ b/internal/rbd/manager.go @@ -175,7 +175,7 @@ func (mgr *rbdManager) GetVolumeByID(ctx context.Context, id string) (types.Volu volume, err := GenVolFromVolID(ctx, id, creds, mgr.secrets) if err != nil { switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = fmt.Errorf("volume %s not found: %w", id, err) return nil, err @@ -200,7 +200,7 @@ func (mgr *rbdManager) GetSnapshotByID(ctx context.Context, id string) (types.Sn snapshot, err := genSnapFromSnapID(ctx, id, creds, mgr.secrets) if err != nil { switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = fmt.Errorf("volume %s not found: %w", id, err) return nil, err @@ -468,7 +468,7 @@ func (mgr *rbdManager) CreateVolumeGroupSnapshot( return vgs, nil } - } else if err != nil && !errors.Is(err, util.ErrImageNotFound) { + } else if err != nil && !errors.Is(err, rbd_errors.ErrImageNotFound) { // ErrImageNotFound can be returned if the VolumeGroupSnapshot // could not be found. It is expected that it does not exist // yet, in which case it will be created below. diff --git a/internal/rbd/rbd_journal.go b/internal/rbd/rbd_journal.go index 9b6e269a5..eb20ef960 100644 --- a/internal/rbd/rbd_journal.go +++ b/internal/rbd/rbd_journal.go @@ -173,7 +173,7 @@ func checkSnapCloneExists( // Fetch on-disk image attributes err = vol.getImageInfo() if err != nil { - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { err = parentVol.deleteSnapshot(ctx, rbdSnap) if err != nil { if !errors.Is(err, rbd_errors.ErrSnapNotFound) { @@ -299,7 +299,7 @@ func (rv *rbdVolume) Exists(ctx context.Context, parentVol *rbdVolume) (bool, er // Fetch on-disk image attributes and compare against request err = rv.getImageInfo() switch { - case errors.Is(err, util.ErrImageNotFound) && parentVol != nil: + case errors.Is(err, rbd_errors.ErrImageNotFound) && parentVol != nil: // Need to check cloned info here not on createvolume found, cErr := rv.checkCloneImage(ctx, parentVol) if cErr != nil { @@ -313,7 +313,7 @@ func (rv *rbdVolume) Exists(ctx context.Context, parentVol *rbdVolume) (bool, er return false, err } - case errors.Is(err, util.ErrImageNotFound) && parentVol == nil: + case errors.Is(err, rbd_errors.ErrImageNotFound) && parentVol == nil: // image not found, undo the reservation err = j.UndoReservation(ctx, rv.JournalPool, rv.Pool, rv.RbdImageName, rv.RequestName) diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 87a07b192..0c7a04bf5 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -531,7 +531,7 @@ func (ri *rbdImage) open() (*librbd.Image, error) { image, err := librbd.OpenImage(ri.ioctx, ri.RbdImageName, librbd.NoSnapshot) if err != nil { if errors.Is(err, librbd.ErrNotFound) { - err = fmt.Errorf("Failed as %w (internal %w)", util.ErrImageNotFound, err) + err = fmt.Errorf("Failed as %w (internal %w)", rbd_errors.ErrImageNotFound, err) } return nil, err @@ -549,7 +549,7 @@ func (ri *rbdImage) open() (*librbd.Image, error) { func (ri *rbdImage) isInUse() (bool, error) { image, err := ri.open() if err != nil { - if errors.Is(err, util.ErrImageNotFound) || errors.Is(err, util.ErrPoolNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) || errors.Is(err, util.ErrPoolNotFound) { return false, err } // any error should assume something else is using the image @@ -692,7 +692,7 @@ func (ri *rbdImage) Delete(ctx context.Context) error { err = rbdImage.Trash(0) if err != nil { if errors.Is(err, librbd.ErrNotFound) { - return fmt.Errorf("Failed as %w (internal %w)", util.ErrImageNotFound, err) + return fmt.Errorf("Failed as %w (internal %w)", rbd_errors.ErrImageNotFound, err) } log.ErrorLog(ctx, "failed to delete rbd image: %s, error: %v", ri, err) @@ -746,7 +746,7 @@ func (rv *rbdVolume) DeleteTempImage(ctx context.Context) error { tempClone := rv.generateTempClone() err := tempClone.Delete(ctx) if err != nil { - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { return tempClone.ensureImageCleanup(ctx) } else { // return error if it is not ErrImageNotFound @@ -785,7 +785,7 @@ func (ri *rbdImage) getCloneDepth(ctx context.Context) (uint, error) { // if the parent image is moved to trash the name will be present // in rbd image info but the image will be in trash, in that case // return the found depth - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { return depth, nil } log.ErrorLog(ctx, "failed to check depth on image %s: %s", &vol, err) @@ -975,7 +975,7 @@ func (ri *rbdImage) checkImageChainHasFeature(ctx context.Context, feature uint6 // is in the trash, when we try to open the parent image to get its // information it fails because it is already in trash. We should // treat error as nil if the parent is not found. - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { return false, nil } log.ErrorLog(ctx, "failed to get image info for %s: %s", rbdImg.String(), err) @@ -1213,6 +1213,33 @@ func generateVolumeFromVolumeID( return rbdVol, err } +// ShouldRetryVolumeGeneration determines whether the process of finding or generating +// volumes should continue based on the type of error encountered. +// +// It checks if the given error matches any of the following known errors: +// - util.ErrKeyNotFound: The key required to locate the volume is missing in Rados omap. +// - util.ErrPoolNotFound: The rbd pool where the volume/omap is expected doesn't exist. +// - ErrImageNotFound: The image doesn't exist in the rbd pool. +// - rados.ErrPermissionDenied: Permissions to access the pool is denied. +// +// If any of these errors are encountered, the function returns `true`, indicating +// that the volume search should continue because of known error. Otherwise, it +// returns `false`, meaning the search should stop. +// +// This helper function is used in scenarios where multiple attempts may be made +// to retrieve or generate volume information, and we want to gracefully handle +// specific failure cases while retrying for others. +func ShouldRetryVolumeGeneration(err error) bool { + if err == nil { + return false // No error, do not retry + } + // Continue searching for specific known errors + return (errors.Is(err, util.ErrKeyNotFound) || + errors.Is(err, util.ErrPoolNotFound) || + errors.Is(err, rbd_errors.ErrImageNotFound) || + errors.Is(err, rados.ErrPermissionDenied)) +} + // GenVolFromVolID generates a rbdVolume structure from the provided identifier, updating // the structure with elements from on-disk image metadata as well. func GenVolFromVolID( @@ -1233,7 +1260,7 @@ func GenVolFromVolID( } vol, err = generateVolumeFromVolumeID(ctx, volumeID, vi, cr, secrets) - if !util.ShouldRetryVolumeGeneration(err) { + if !ShouldRetryVolumeGeneration(err) { return vol, err } @@ -1244,7 +1271,7 @@ func GenVolFromVolID( } if mapping != nil { rbdVol, vErr := generateVolumeFromMapping(ctx, mapping, volumeID, vi, cr, secrets) - if !util.ShouldRetryVolumeGeneration(vErr) { + if !ShouldRetryVolumeGeneration(vErr) { return rbdVol, vErr } } @@ -1297,7 +1324,7 @@ func generateVolumeFromMapping( // Add mapping poolID to Identifier nvi.LocationID = pID vol, err = generateVolumeFromVolumeID(ctx, volumeID, nvi, cr, secrets) - if !util.ShouldRetryVolumeGeneration(err) { + if !ShouldRetryVolumeGeneration(err) { return vol, err } } diff --git a/internal/rbd/rbd_util_test.go b/internal/rbd/rbd_util_test.go index 905e97707..cef48f723 100644 --- a/internal/rbd/rbd_util_test.go +++ b/internal/rbd/rbd_util_test.go @@ -23,6 +23,10 @@ import ( "strings" "testing" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" + "github.com/ceph/ceph-csi/internal/util" + + "github.com/ceph/go-ceph/rados" librbd "github.com/ceph/go-ceph/rbd" "github.com/stretchr/testify/require" ) @@ -387,3 +391,54 @@ func Test_checkValidImageFeatures(t *testing.T) { }) } } + +func Test_shouldRetryVolumeGeneration(t *testing.T) { + t.Parallel() + type args struct { + err error + } + tests := []struct { + name string + args args + want bool + }{ + { + name: "No error (stop searching)", + args: args{err: nil}, + want: false, // No error, stop searching + }, + { + name: "ErrKeyNotFound (continue searching)", + args: args{err: util.ErrKeyNotFound}, + want: true, // Known error, continue searching + }, + { + name: "ErrPoolNotFound (continue searching)", + args: args{err: util.ErrPoolNotFound}, + want: true, // Known error, continue searching + }, + { + name: "ErrImageNotFound (continue searching)", + args: args{err: rbd_errors.ErrImageNotFound}, + want: true, // Known error, continue searching + }, + { + name: "ErrPermissionDenied (continue searching)", + args: args{err: rados.ErrPermissionDenied}, + want: true, // Known error, continue searching + }, + { + name: "Different error (stop searching)", + args: args{err: errors.New("unknown error")}, + want: false, // Unknown error, stop searching + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + if got := ShouldRetryVolumeGeneration(tt.args.err); got != tt.want { + t.Errorf("ShouldRetryVolumeGeneration() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/internal/rbd/snapshot.go b/internal/rbd/snapshot.go index 9d578368b..532e7823e 100644 --- a/internal/rbd/snapshot.go +++ b/internal/rbd/snapshot.go @@ -83,7 +83,7 @@ func cleanUpSnapshot( ) error { err := parentVol.deleteSnapshot(ctx, rbdSnap) if err != nil { - if !errors.Is(err, util.ErrImageNotFound) && !errors.Is(err, rbd_errors.ErrSnapNotFound) { + if !errors.Is(err, rbd_errors.ErrImageNotFound) && !errors.Is(err, rbd_errors.ErrSnapNotFound) { log.ErrorLog(ctx, "failed to delete snapshot %q: %v", rbdSnap, err) return err @@ -93,7 +93,7 @@ func cleanUpSnapshot( if rbdVol != nil { err := rbdVol.Delete(ctx) if err != nil { - if !errors.Is(err, util.ErrImageNotFound) { + if !errors.Is(err, rbd_errors.ErrImageNotFound) { log.ErrorLog(ctx, "failed to delete rbd image %q with error: %v", rbdVol, err) return err diff --git a/internal/util/errors.go b/internal/util/errors.go index 11a3f2964..ed821769a 100644 --- a/internal/util/errors.go +++ b/internal/util/errors.go @@ -16,15 +16,9 @@ limitations under the License. package util -import ( - "errors" - - "github.com/ceph/go-ceph/rados" -) +import "errors" var ( - // ErrImageNotFound is returned when image name is not found in the cluster on the given pool and/or namespace. - ErrImageNotFound = errors.New("image not found") // ErrKeyNotFound is returned when requested key in omap is not found. ErrKeyNotFound = errors.New("key not found") // ErrObjectExists is returned when named omap is already present in rados. @@ -41,30 +35,3 @@ var ( // ErrMissingConfigForMonitor is returned when clusterID is not found for the mon. ErrMissingConfigForMonitor = errors.New("missing configuration of cluster ID for monitor") ) - -// ShouldRetryVolumeGeneration determines whether the process of finding or generating -// volumes should continue based on the type of error encountered. -// -// It checks if the given error matches any of the following known errors: -// - util.ErrKeyNotFound: The key required to locate the volume is missing in Rados omap. -// - util.ErrPoolNotFound: The rbd pool where the volume/omap is expected doesn't exist. -// - ErrImageNotFound: The image doesn't exist in the rbd pool. -// - rados.ErrPermissionDenied: Permissions to access the pool is denied. -// -// If any of these errors are encountered, the function returns `true`, indicating -// that the volume search should continue because of known error. Otherwise, it -// returns `false`, meaning the search should stop. -// -// This helper function is used in scenarios where multiple attempts may be made -// to retrieve or generate volume information, and we want to gracefully handle -// specific failure cases while retrying for others. -func ShouldRetryVolumeGeneration(err error) bool { - if err == nil { - return false // No error, do not retry - } - // Continue searching for specific known errors - return (errors.Is(err, ErrKeyNotFound) || - errors.Is(err, ErrPoolNotFound) || - errors.Is(err, ErrImageNotFound) || - errors.Is(err, rados.ErrPermissionDenied)) -}