diff --git a/internal/csi-addons/rbd/encryptionkeyrotation.go b/internal/csi-addons/rbd/encryptionkeyrotation.go index 0d7a09d78..a24f8770b 100644 --- a/internal/csi-addons/rbd/encryptionkeyrotation.go +++ b/internal/csi-addons/rbd/encryptionkeyrotation.go @@ -21,6 +21,7 @@ import ( "errors" "github.com/ceph/ceph-csi/internal/rbd" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" @@ -68,7 +69,7 @@ func (ekrs *EncryptionKeyRotationServer) EncryptionKeyRotate( rbdVol, err := mgr.GetVolumeByID(ctx, volID) if err != nil { switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = status.Errorf(codes.NotFound, "volume ID %s not found", volID) case errors.Is(err, util.ErrPoolNotFound): log.ErrorLog(ctx, "failed to get backend volume for %s: %v", volID, err) diff --git a/internal/csi-addons/rbd/replication.go b/internal/csi-addons/rbd/replication.go index 6ec92470a..6a3f2453f 100644 --- a/internal/csi-addons/rbd/replication.go +++ b/internal/csi-addons/rbd/replication.go @@ -652,7 +652,7 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context, sts, err := mirror.GetGlobalMirroringStatus(ctx) if err != nil { // the image gets recreated after issuing resync - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { // caller retries till RBD syncs an initial version of the image to // report its status in the resync call. Ideally, this line will not // be executed as the error would get returned due to getMirroringInfo @@ -786,7 +786,7 @@ func getGRPCError(err error) error { } errorStatusMap := map[error]codes.Code{ - util.ErrImageNotFound: codes.NotFound, + rbd_errors.ErrImageNotFound: codes.NotFound, util.ErrPoolNotFound: codes.NotFound, rbd_errors.ErrInvalidArgument: codes.InvalidArgument, rbd_errors.ErrFlattenInProgress: codes.Aborted, @@ -836,7 +836,7 @@ func (rs *ReplicationServer) GetVolumeReplicationInfo(ctx context.Context, log.ErrorLog(ctx, "failed to get volume with id %q: %v", volumeID, err) switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = status.Error(codes.NotFound, err.Error()) case errors.Is(err, util.ErrPoolNotFound): err = status.Error(codes.NotFound, err.Error()) @@ -873,7 +873,7 @@ func (rs *ReplicationServer) GetVolumeReplicationInfo(ctx context.Context, if err != nil { log.ErrorLog(ctx, "failed to get status for mirror %q: %v", mirror, err) - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { return nil, status.Error(codes.Aborted, err.Error()) } diff --git a/internal/csi-addons/rbd/replication_test.go b/internal/csi-addons/rbd/replication_test.go index 22545866f..5b0128004 100644 --- a/internal/csi-addons/rbd/replication_test.go +++ b/internal/csi-addons/rbd/replication_test.go @@ -598,8 +598,8 @@ func TestGetGRPCError(t *testing.T) { }, { name: "ErrImageNotFound", - err: util.ErrImageNotFound, - expectedErr: status.Error(codes.NotFound, util.ErrImageNotFound.Error()), + err: rbd_errors.ErrImageNotFound, + expectedErr: status.Error(codes.NotFound, rbd_errors.ErrImageNotFound.Error()), }, { name: "ErrPoolNotFound", diff --git a/internal/csi-addons/rbd/volumegroup.go b/internal/csi-addons/rbd/volumegroup.go index 4727e548b..5fbb38141 100644 --- a/internal/csi-addons/rbd/volumegroup.go +++ b/internal/csi-addons/rbd/volumegroup.go @@ -23,7 +23,7 @@ import ( "slices" "github.com/ceph/ceph-csi/internal/rbd" - "github.com/ceph/ceph-csi/internal/rbd/group" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/rbd/types" "github.com/ceph/ceph-csi/internal/util/log" @@ -194,7 +194,7 @@ func (vs *VolumeGroupServer) DeleteVolumeGroup( // resolve the volume group vg, err := mgr.GetVolumeGroupByID(ctx, req.GetVolumeGroupId()) if err != nil { - if errors.Is(err, group.ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "VolumeGroup %q doesn't exists", req.GetVolumeGroupId()) return &volumegroup.DeleteVolumeGroupResponse{}, nil @@ -433,7 +433,7 @@ func (vs *VolumeGroupServer) ControllerGetVolumeGroup( // resolve the volume group vg, err := mgr.GetVolumeGroupByID(ctx, req.GetVolumeGroupId()) if err != nil { - if errors.Is(err, group.ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "VolumeGroup %q doesn't exists", req.GetVolumeGroupId()) return nil, status.Errorf( diff --git a/internal/rbd/clone.go b/internal/rbd/clone.go index 6ca4f64e6..e70edba5d 100644 --- a/internal/rbd/clone.go +++ b/internal/rbd/clone.go @@ -22,7 +22,6 @@ import ( "fmt" rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" - "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/k8s" "github.com/ceph/ceph-csi/internal/util/log" @@ -68,7 +67,7 @@ func (rv *rbdVolume) checkCloneImage(ctx context.Context, parentVol *rbdVolume) return true, nil - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): // as the temp clone does not exist,check snapshot exists on parent volume // snapshot name is same as temporary clone image snap.RbdImageName = tempClone.RbdImageName diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index ac7e9b81e..26a4a6bb1 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -593,7 +593,7 @@ func (cs *ControllerServer) repairExistingVolume(ctx context.Context, req *csi.C func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *util.Credentials) error { snaps, children, err := rbdVol.listSnapAndChildren() if err != nil { - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { return status.Error(codes.InvalidArgument, err.Error()) } @@ -865,7 +865,7 @@ func checkContentSource( rbdvol, err := GenVolFromVolID(ctx, volID, cr, req.GetSecrets()) if err != nil { log.ErrorLog(ctx, "failed to get backend image for %s: %v", volID, err) - if !errors.Is(err, util.ErrImageNotFound) { + if !errors.Is(err, rbd_errors.ErrImageNotFound) { return nil, nil, status.Error(codes.Internal, err.Error()) } @@ -905,7 +905,7 @@ func (cs *ControllerServer) checkErrAndUndoReserve( return &csi.DeleteVolumeResponse{}, nil } - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { notFoundErr := rbdVol.ensureImageCleanup(ctx) if notFoundErr != nil { return nil, status.Errorf(codes.Internal, "failed to cleanup image %q: %v", rbdVol, notFoundErr) @@ -980,7 +980,7 @@ func (cs *ControllerServer) DeleteVolume( return nil, status.Error(codes.InvalidArgument, pErr.Error()) } pErr = deleteMigratedVolume(ctx, pmVolID, cr) - if pErr != nil && !errors.Is(pErr, util.ErrImageNotFound) { + if pErr != nil && !errors.Is(pErr, rbd_errors.ErrImageNotFound) { return nil, status.Error(codes.Internal, pErr.Error()) } @@ -1152,7 +1152,7 @@ func (cs *ControllerServer) CreateSnapshot( }() if err != nil { switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = status.Errorf(codes.NotFound, "source Volume ID %s not found", req.GetSourceVolumeId()) case errors.Is(err, util.ErrPoolNotFound): log.ErrorLog(ctx, "failed to get backend volume for %s: %v", req.GetSourceVolumeId(), err) @@ -1497,7 +1497,7 @@ func (cs *ControllerServer) DeleteSnapshot( // if the error is ErrImageNotFound, We need to cleanup the image from // trash and remove the metadata in OMAP. - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { log.UsefulLog(ctx, "cleaning up leftovers of snapshot %s: %v", snapshotID, err) err = cleanUpImageAndSnapReservation(ctx, rbdSnap, cr) @@ -1600,7 +1600,7 @@ func (cs *ControllerServer) ControllerExpandVolume( rbdVol, err := genVolFromVolIDWithMigration(ctx, volID, cr, req.GetSecrets()) if err != nil { switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = status.Errorf(codes.NotFound, "volume ID %s not found", volID) case errors.Is(err, util.ErrPoolNotFound): log.ErrorLog(ctx, "failed to get backend volume for %s: %v", volID, err) diff --git a/internal/rbd/errors/errors.go b/internal/rbd/errors/errors.go index 4257674f7..5c6b0d848 100644 --- a/internal/rbd/errors/errors.go +++ b/internal/rbd/errors/errors.go @@ -16,9 +16,17 @@ limitations under the License. package rbd_errors -import "errors" +import ( + "errors" + "fmt" + + librados "github.com/ceph/go-ceph/rados" + librbd "github.com/ceph/go-ceph/rbd" +) var ( + // ErrImageNotFound is returned when image name is not found in the cluster on the given pool and/or namespace. + ErrImageNotFound = errors.New("image not found") // ErrSnapNotFound is returned when snap name passed is not found in the list of snapshots for the // given image. ErrSnapNotFound = errors.New("snapshot not found") @@ -55,4 +63,8 @@ var ( ErrInvalidArgument = errors.New("invalid arguments provided") // ErrImageInUse is returned when the image is in use. ErrImageInUse = errors.New("image is in use") + // ErrRBDGroupNotConnected is returned when the RBD group is not connected. + ErrRBDGroupNotConnected = fmt.Errorf("%w: RBD group is not connected", librados.ErrNotConnected) + // ErrRBDGroupNotFound is returned when group name is not found in the cluster on the given pool and/or namespace. + ErrRBDGroupNotFound = fmt.Errorf("%w: RBD group not found", librbd.ErrNotFound) ) diff --git a/internal/rbd/group/group_snapshot.go b/internal/rbd/group/group_snapshot.go index 81daaf010..808bd3fd3 100644 --- a/internal/rbd/group/group_snapshot.go +++ b/internal/rbd/group/group_snapshot.go @@ -24,6 +24,7 @@ import ( "github.com/container-storage-interface/spec/lib/go/csi" "google.golang.org/protobuf/types/known/timestamppb" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/rbd/types" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" @@ -70,7 +71,7 @@ func GetVolumeGroupSnapshot( attrs, err := vgs.getVolumeGroupAttributes(ctx) if err != nil { - if errors.Is(err, ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "%v, returning empty volume group snapshot %q", vgs, err) return vgs, err diff --git a/internal/rbd/group/util.go b/internal/rbd/group/util.go index bbdec576e..b53c3505c 100644 --- a/internal/rbd/group/util.go +++ b/internal/rbd/group/util.go @@ -26,6 +26,7 @@ import ( "github.com/ceph/go-ceph/rados" "github.com/ceph/ceph-csi/internal/journal" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" ) @@ -126,7 +127,7 @@ func (cvg *commonVolumeGroup) generateVolumeGroupFromMapping( } mcsiID.LocationID = mPID err = cvg.generateVolumeGroup(mcsiID) - if util.ShouldRetryVolumeGeneration(err) { + if ShouldRetryVolumeGroupGeneration(err) { continue } @@ -161,13 +162,13 @@ func (cvg *commonVolumeGroup) initCommonVolumeGroup( err = cvg.generateVolumeGroup(csiID) // If the error is not a retryable error, return from here. - if err != nil && !util.ShouldRetryVolumeGeneration(err) { + if err != nil && !ShouldRetryVolumeGroupGeneration(err) { return err } // If the error is a retryable error, we should try to get the cluster mapping // and generate the volume group from the mapping. - if util.ShouldRetryVolumeGeneration(err) { + if ShouldRetryVolumeGroupGeneration(err) { mapping, err := util.GetClusterMappingInfo(csiID.ClusterID) if err != nil { return err @@ -229,7 +230,7 @@ func (cvg *commonVolumeGroup) getVolumeGroupAttributes(ctx context.Context) (*jo if attrs.GroupName == "" { log.ErrorLog(ctx, "volume group with id %v not found", cvg.id) - return nil, ErrRBDGroupNotFound + return nil, rbd_errors.ErrRBDGroupNotFound } cvg.requestName = attrs.RequestName @@ -356,12 +357,12 @@ func (cvg *commonVolumeGroup) GetIOContext(ctx context.Context) (*rados.IOContex conn, err := cvg.getConnection(ctx) if err != nil { - return nil, fmt.Errorf("%w: failed to connect: %w", ErrRBDGroupNotConnected, err) + return nil, fmt.Errorf("%w: failed to connect: %w", rbd_errors.ErrRBDGroupNotConnected, err) } ioctx, err := conn.GetIoctx(cvg.pool) if err != nil { - return nil, fmt.Errorf("%w: failed to get IOContext: %w", ErrRBDGroupNotConnected, err) + return nil, fmt.Errorf("%w: failed to get IOContext: %w", rbd_errors.ErrRBDGroupNotConnected, err) } if cvg.namespace != "" { @@ -417,3 +418,28 @@ func (cvg *commonVolumeGroup) GetCreationTime(ctx context.Context) (*time.Time, return cvg.creationTime, nil } + +// ShouldRetryVolumeGroupGeneration determines whether the process of finding or generating +// volumegroups should continue based on the type of error encountered. +// +// It checks if the given error matches any of the following known errors: +// - ErrPoolNotFound: The rbd pool where the volumegroup/omap is expected doesn't exist. +// - ErrRBDGroupNotFound: The volumegroup doesn't exist in the rbd pool. +// - rados.ErrPermissionDenied: Permissions to access the pool is denied. +// +// If any of these errors are encountered, the function returns `true`, indicating +// that the volumegroup search should continue because of known error. Otherwise, it +// returns `false`, meaning the search should stop. +// +// This helper function is used in scenarios where multiple attempts may be made +// to retrieve or generate volumegroup information, and we want to gracefully handle +// specific failure cases while retrying for others. +func ShouldRetryVolumeGroupGeneration(err error) bool { + if err == nil { + return false // No error, do not retry + } + // Continue searching for specific known errors + return (errors.Is(err, util.ErrPoolNotFound) || + errors.Is(err, rbd_errors.ErrRBDGroupNotFound) || + errors.Is(err, rados.ErrPermissionDenied)) +} diff --git a/internal/util/errors_test.go b/internal/rbd/group/util_test.go similarity index 78% rename from internal/util/errors_test.go rename to internal/rbd/group/util_test.go index cdb106758..8ddd6dd64 100644 --- a/internal/util/errors_test.go +++ b/internal/rbd/group/util_test.go @@ -14,16 +14,19 @@ See the License for the specific language governing permissions and limitations under the License. */ -package util +package group import ( "errors" "testing" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" + "github.com/ceph/ceph-csi/internal/util" + "github.com/ceph/go-ceph/rados" ) -func Test_shouldRetryVolumeGeneration(t *testing.T) { +func Test_shouldRetryVolumeGroupGeneration(t *testing.T) { t.Parallel() type args struct { err error @@ -38,19 +41,14 @@ func Test_shouldRetryVolumeGeneration(t *testing.T) { args: args{err: nil}, want: false, // No error, stop searching }, - { - name: "ErrKeyNotFound (continue searching)", - args: args{err: ErrKeyNotFound}, - want: true, // Known error, continue searching - }, { name: "ErrPoolNotFound (continue searching)", - args: args{err: ErrPoolNotFound}, + args: args{err: util.ErrPoolNotFound}, want: true, // Known error, continue searching }, { - name: "ErrImageNotFound (continue searching)", - args: args{err: ErrImageNotFound}, + name: "ErrRBDGroupNotFound (continue searching)", + args: args{err: rbd_errors.ErrRBDGroupNotFound}, want: true, // Known error, continue searching }, { @@ -67,7 +65,7 @@ func Test_shouldRetryVolumeGeneration(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - if got := ShouldRetryVolumeGeneration(tt.args.err); got != tt.want { + if got := ShouldRetryVolumeGroupGeneration(tt.args.err); got != tt.want { t.Errorf("ShouldRetryVolumeGeneration() = %v, want %v", got, tt.want) } }) diff --git a/internal/rbd/group/volume_group.go b/internal/rbd/group/volume_group.go index e8414f3dd..4141370df 100644 --- a/internal/rbd/group/volume_group.go +++ b/internal/rbd/group/volume_group.go @@ -22,21 +22,16 @@ import ( "fmt" "github.com/ceph/go-ceph/rados" - librados "github.com/ceph/go-ceph/rados" librbd "github.com/ceph/go-ceph/rbd" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/csi-addons/spec/lib/go/volumegroup" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/rbd/types" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" ) -var ( - ErrRBDGroupNotConnected = fmt.Errorf("%w: RBD group is not connected", librados.ErrNotConnected) - ErrRBDGroupNotFound = fmt.Errorf("%w: RBD group not found", librbd.ErrNotFound) -) - // volumeGroup handles all requests for 'rbd group' operations. type volumeGroup struct { commonVolumeGroup @@ -77,7 +72,7 @@ func GetVolumeGroup( attrs, err := vg.getVolumeGroupAttributes(ctx) if err != nil { - if errors.Is(err, ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "%v, returning empty volume group %q", vg, err) return vg, err diff --git a/internal/rbd/group_controllerserver.go b/internal/rbd/group_controllerserver.go index a4b313ea7..5edf6a914 100644 --- a/internal/rbd/group_controllerserver.go +++ b/internal/rbd/group_controllerserver.go @@ -24,7 +24,7 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" - "github.com/ceph/ceph-csi/internal/rbd/group" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" "github.com/ceph/ceph-csi/internal/rbd/types" "github.com/ceph/ceph-csi/internal/util" "github.com/ceph/ceph-csi/internal/util/log" @@ -215,7 +215,7 @@ func (cs *ControllerServer) DeleteVolumeGroupSnapshot( groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, groupSnapshotID) if err != nil { - if errors.Is(err, group.ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "VolumeGroupSnapshot %q doesn't exists", groupSnapshotID) return &csi.DeleteVolumeGroupSnapshotResponse{}, nil @@ -261,7 +261,7 @@ func (cs *ControllerServer) GetVolumeGroupSnapshot( groupSnapshot, err := mgr.GetVolumeGroupSnapshotByID(ctx, groupSnapshotID) if err != nil { - if errors.Is(err, group.ErrRBDGroupNotFound) { + if errors.Is(err, rbd_errors.ErrRBDGroupNotFound) { log.ErrorLog(ctx, "VolumeGroupSnapshot %q doesn't exists", groupSnapshotID) return nil, status.Errorf( diff --git a/internal/rbd/manager.go b/internal/rbd/manager.go index 44164a38d..c832e4e19 100644 --- a/internal/rbd/manager.go +++ b/internal/rbd/manager.go @@ -175,7 +175,7 @@ func (mgr *rbdManager) GetVolumeByID(ctx context.Context, id string) (types.Volu volume, err := GenVolFromVolID(ctx, id, creds, mgr.secrets) if err != nil { switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = fmt.Errorf("volume %s not found: %w", id, err) return nil, err @@ -200,7 +200,7 @@ func (mgr *rbdManager) GetSnapshotByID(ctx context.Context, id string) (types.Sn snapshot, err := genSnapFromSnapID(ctx, id, creds, mgr.secrets) if err != nil { switch { - case errors.Is(err, util.ErrImageNotFound): + case errors.Is(err, rbd_errors.ErrImageNotFound): err = fmt.Errorf("volume %s not found: %w", id, err) return nil, err @@ -468,7 +468,7 @@ func (mgr *rbdManager) CreateVolumeGroupSnapshot( return vgs, nil } - } else if err != nil && !errors.Is(err, util.ErrImageNotFound) { + } else if err != nil && !errors.Is(err, rbd_errors.ErrImageNotFound) { // ErrImageNotFound can be returned if the VolumeGroupSnapshot // could not be found. It is expected that it does not exist // yet, in which case it will be created below. diff --git a/internal/rbd/rbd_journal.go b/internal/rbd/rbd_journal.go index 9b6e269a5..eb20ef960 100644 --- a/internal/rbd/rbd_journal.go +++ b/internal/rbd/rbd_journal.go @@ -173,7 +173,7 @@ func checkSnapCloneExists( // Fetch on-disk image attributes err = vol.getImageInfo() if err != nil { - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { err = parentVol.deleteSnapshot(ctx, rbdSnap) if err != nil { if !errors.Is(err, rbd_errors.ErrSnapNotFound) { @@ -299,7 +299,7 @@ func (rv *rbdVolume) Exists(ctx context.Context, parentVol *rbdVolume) (bool, er // Fetch on-disk image attributes and compare against request err = rv.getImageInfo() switch { - case errors.Is(err, util.ErrImageNotFound) && parentVol != nil: + case errors.Is(err, rbd_errors.ErrImageNotFound) && parentVol != nil: // Need to check cloned info here not on createvolume found, cErr := rv.checkCloneImage(ctx, parentVol) if cErr != nil { @@ -313,7 +313,7 @@ func (rv *rbdVolume) Exists(ctx context.Context, parentVol *rbdVolume) (bool, er return false, err } - case errors.Is(err, util.ErrImageNotFound) && parentVol == nil: + case errors.Is(err, rbd_errors.ErrImageNotFound) && parentVol == nil: // image not found, undo the reservation err = j.UndoReservation(ctx, rv.JournalPool, rv.Pool, rv.RbdImageName, rv.RequestName) diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 87a07b192..0c7a04bf5 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -531,7 +531,7 @@ func (ri *rbdImage) open() (*librbd.Image, error) { image, err := librbd.OpenImage(ri.ioctx, ri.RbdImageName, librbd.NoSnapshot) if err != nil { if errors.Is(err, librbd.ErrNotFound) { - err = fmt.Errorf("Failed as %w (internal %w)", util.ErrImageNotFound, err) + err = fmt.Errorf("Failed as %w (internal %w)", rbd_errors.ErrImageNotFound, err) } return nil, err @@ -549,7 +549,7 @@ func (ri *rbdImage) open() (*librbd.Image, error) { func (ri *rbdImage) isInUse() (bool, error) { image, err := ri.open() if err != nil { - if errors.Is(err, util.ErrImageNotFound) || errors.Is(err, util.ErrPoolNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) || errors.Is(err, util.ErrPoolNotFound) { return false, err } // any error should assume something else is using the image @@ -692,7 +692,7 @@ func (ri *rbdImage) Delete(ctx context.Context) error { err = rbdImage.Trash(0) if err != nil { if errors.Is(err, librbd.ErrNotFound) { - return fmt.Errorf("Failed as %w (internal %w)", util.ErrImageNotFound, err) + return fmt.Errorf("Failed as %w (internal %w)", rbd_errors.ErrImageNotFound, err) } log.ErrorLog(ctx, "failed to delete rbd image: %s, error: %v", ri, err) @@ -746,7 +746,7 @@ func (rv *rbdVolume) DeleteTempImage(ctx context.Context) error { tempClone := rv.generateTempClone() err := tempClone.Delete(ctx) if err != nil { - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { return tempClone.ensureImageCleanup(ctx) } else { // return error if it is not ErrImageNotFound @@ -785,7 +785,7 @@ func (ri *rbdImage) getCloneDepth(ctx context.Context) (uint, error) { // if the parent image is moved to trash the name will be present // in rbd image info but the image will be in trash, in that case // return the found depth - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { return depth, nil } log.ErrorLog(ctx, "failed to check depth on image %s: %s", &vol, err) @@ -975,7 +975,7 @@ func (ri *rbdImage) checkImageChainHasFeature(ctx context.Context, feature uint6 // is in the trash, when we try to open the parent image to get its // information it fails because it is already in trash. We should // treat error as nil if the parent is not found. - if errors.Is(err, util.ErrImageNotFound) { + if errors.Is(err, rbd_errors.ErrImageNotFound) { return false, nil } log.ErrorLog(ctx, "failed to get image info for %s: %s", rbdImg.String(), err) @@ -1213,6 +1213,33 @@ func generateVolumeFromVolumeID( return rbdVol, err } +// ShouldRetryVolumeGeneration determines whether the process of finding or generating +// volumes should continue based on the type of error encountered. +// +// It checks if the given error matches any of the following known errors: +// - util.ErrKeyNotFound: The key required to locate the volume is missing in Rados omap. +// - util.ErrPoolNotFound: The rbd pool where the volume/omap is expected doesn't exist. +// - ErrImageNotFound: The image doesn't exist in the rbd pool. +// - rados.ErrPermissionDenied: Permissions to access the pool is denied. +// +// If any of these errors are encountered, the function returns `true`, indicating +// that the volume search should continue because of known error. Otherwise, it +// returns `false`, meaning the search should stop. +// +// This helper function is used in scenarios where multiple attempts may be made +// to retrieve or generate volume information, and we want to gracefully handle +// specific failure cases while retrying for others. +func ShouldRetryVolumeGeneration(err error) bool { + if err == nil { + return false // No error, do not retry + } + // Continue searching for specific known errors + return (errors.Is(err, util.ErrKeyNotFound) || + errors.Is(err, util.ErrPoolNotFound) || + errors.Is(err, rbd_errors.ErrImageNotFound) || + errors.Is(err, rados.ErrPermissionDenied)) +} + // GenVolFromVolID generates a rbdVolume structure from the provided identifier, updating // the structure with elements from on-disk image metadata as well. func GenVolFromVolID( @@ -1233,7 +1260,7 @@ func GenVolFromVolID( } vol, err = generateVolumeFromVolumeID(ctx, volumeID, vi, cr, secrets) - if !util.ShouldRetryVolumeGeneration(err) { + if !ShouldRetryVolumeGeneration(err) { return vol, err } @@ -1244,7 +1271,7 @@ func GenVolFromVolID( } if mapping != nil { rbdVol, vErr := generateVolumeFromMapping(ctx, mapping, volumeID, vi, cr, secrets) - if !util.ShouldRetryVolumeGeneration(vErr) { + if !ShouldRetryVolumeGeneration(vErr) { return rbdVol, vErr } } @@ -1297,7 +1324,7 @@ func generateVolumeFromMapping( // Add mapping poolID to Identifier nvi.LocationID = pID vol, err = generateVolumeFromVolumeID(ctx, volumeID, nvi, cr, secrets) - if !util.ShouldRetryVolumeGeneration(err) { + if !ShouldRetryVolumeGeneration(err) { return vol, err } } diff --git a/internal/rbd/rbd_util_test.go b/internal/rbd/rbd_util_test.go index 905e97707..cef48f723 100644 --- a/internal/rbd/rbd_util_test.go +++ b/internal/rbd/rbd_util_test.go @@ -23,6 +23,10 @@ import ( "strings" "testing" + rbd_errors "github.com/ceph/ceph-csi/internal/rbd/errors" + "github.com/ceph/ceph-csi/internal/util" + + "github.com/ceph/go-ceph/rados" librbd "github.com/ceph/go-ceph/rbd" "github.com/stretchr/testify/require" ) @@ -387,3 +391,54 @@ func Test_checkValidImageFeatures(t *testing.T) { }) } } + +func Test_shouldRetryVolumeGeneration(t *testing.T) { + t.Parallel() + type args struct { + err error + } + tests := []struct { + name string + args args + want bool + }{ + { + name: "No error (stop searching)", + args: args{err: nil}, + want: false, // No error, stop searching + }, + { + name: "ErrKeyNotFound (continue searching)", + args: args{err: util.ErrKeyNotFound}, + want: true, // Known error, continue searching + }, + { + name: "ErrPoolNotFound (continue searching)", + args: args{err: util.ErrPoolNotFound}, + want: true, // Known error, continue searching + }, + { + name: "ErrImageNotFound (continue searching)", + args: args{err: rbd_errors.ErrImageNotFound}, + want: true, // Known error, continue searching + }, + { + name: "ErrPermissionDenied (continue searching)", + args: args{err: rados.ErrPermissionDenied}, + want: true, // Known error, continue searching + }, + { + name: "Different error (stop searching)", + args: args{err: errors.New("unknown error")}, + want: false, // Unknown error, stop searching + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + if got := ShouldRetryVolumeGeneration(tt.args.err); got != tt.want { + t.Errorf("ShouldRetryVolumeGeneration() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/internal/rbd/snapshot.go b/internal/rbd/snapshot.go index 9d578368b..532e7823e 100644 --- a/internal/rbd/snapshot.go +++ b/internal/rbd/snapshot.go @@ -83,7 +83,7 @@ func cleanUpSnapshot( ) error { err := parentVol.deleteSnapshot(ctx, rbdSnap) if err != nil { - if !errors.Is(err, util.ErrImageNotFound) && !errors.Is(err, rbd_errors.ErrSnapNotFound) { + if !errors.Is(err, rbd_errors.ErrImageNotFound) && !errors.Is(err, rbd_errors.ErrSnapNotFound) { log.ErrorLog(ctx, "failed to delete snapshot %q: %v", rbdSnap, err) return err @@ -93,7 +93,7 @@ func cleanUpSnapshot( if rbdVol != nil { err := rbdVol.Delete(ctx) if err != nil { - if !errors.Is(err, util.ErrImageNotFound) { + if !errors.Is(err, rbd_errors.ErrImageNotFound) { log.ErrorLog(ctx, "failed to delete rbd image %q with error: %v", rbdVol, err) return err diff --git a/internal/util/errors.go b/internal/util/errors.go index 11a3f2964..ed821769a 100644 --- a/internal/util/errors.go +++ b/internal/util/errors.go @@ -16,15 +16,9 @@ limitations under the License. package util -import ( - "errors" - - "github.com/ceph/go-ceph/rados" -) +import "errors" var ( - // ErrImageNotFound is returned when image name is not found in the cluster on the given pool and/or namespace. - ErrImageNotFound = errors.New("image not found") // ErrKeyNotFound is returned when requested key in omap is not found. ErrKeyNotFound = errors.New("key not found") // ErrObjectExists is returned when named omap is already present in rados. @@ -41,30 +35,3 @@ var ( // ErrMissingConfigForMonitor is returned when clusterID is not found for the mon. ErrMissingConfigForMonitor = errors.New("missing configuration of cluster ID for monitor") ) - -// ShouldRetryVolumeGeneration determines whether the process of finding or generating -// volumes should continue based on the type of error encountered. -// -// It checks if the given error matches any of the following known errors: -// - util.ErrKeyNotFound: The key required to locate the volume is missing in Rados omap. -// - util.ErrPoolNotFound: The rbd pool where the volume/omap is expected doesn't exist. -// - ErrImageNotFound: The image doesn't exist in the rbd pool. -// - rados.ErrPermissionDenied: Permissions to access the pool is denied. -// -// If any of these errors are encountered, the function returns `true`, indicating -// that the volume search should continue because of known error. Otherwise, it -// returns `false`, meaning the search should stop. -// -// This helper function is used in scenarios where multiple attempts may be made -// to retrieve or generate volume information, and we want to gracefully handle -// specific failure cases while retrying for others. -func ShouldRetryVolumeGeneration(err error) bool { - if err == nil { - return false // No error, do not retry - } - // Continue searching for specific known errors - return (errors.Is(err, ErrKeyNotFound) || - errors.Is(err, ErrPoolNotFound) || - errors.Is(err, ErrImageNotFound) || - errors.Is(err, rados.ErrPermissionDenied)) -}