diff --git a/README.md b/README.md index 0f1748cca..21a267989 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,7 @@ in the Kubernetes documentation. | Ceph CSI Release/Branch | Container image name | Image Tag | | ----------------------- | ---------------------------- | --------- | | devel (Branch) | quay.io/cephcsi/cephcsi | canary | +| v3.4.0 (Release) | quay.io/cephcsi/cephcsi | v3.4.0 | | v3.3.1 (Release) | quay.io/cephcsi/cephcsi | v3.3.1 | | v3.3.0 (Release) | quay.io/cephcsi/cephcsi | v3.3.0 | | v3.2.2 (Release) | quay.io/cephcsi/cephcsi | v3.2.2 | @@ -158,7 +159,7 @@ welcome and encouraged to join. Please use the following to reach members of the community: - Slack: Join our [slack channel](https://cephcsi.slack.com) to discuss - about anything related to this project. You can join the slack by + anything related to this project. You can join the slack by this [invite link](https://bit.ly/2MeS4KY ) - Forums: [ceph-csi](https://groups.google.com/forum/#!forum/ceph-csi) - Twitter: [@CephCsi](https://twitter.com/CephCsi) diff --git a/build.env b/build.env index 4f5abc645..98522367b 100644 --- a/build.env +++ b/build.env @@ -48,6 +48,13 @@ ROOK_VERSION=v1.6.2 # Provide ceph image path ROOK_CEPH_CLUSTER_IMAGE=docker.io/ceph/ceph:v16 +# CSI sidecar version +CSI_ATTACHER_VERSION=v3.3.0 +CSI_SNAPSHOTTER_VERSION=v4.2.0 +CSI_PROVISIONER_VERSION=v3.0.0 +CSI_RESIZER_VERSION=v1.2.0 +CSI_NODE_DRIVER_REGISTRAR_VERSION=v2.3.0 + # e2e settings # - enable CEPH_CSI_RUN_ALL_TESTS when running tests with if it has root # permissions on the host diff --git a/charts/ceph-csi-cephfs/values.yaml b/charts/ceph-csi-cephfs/values.yaml index cd5806336..962bc6190 100644 --- a/charts/ceph-csi-cephfs/values.yaml +++ b/charts/ceph-csi-cephfs/values.yaml @@ -80,7 +80,7 @@ nodeplugin: registrar: image: repository: k8s.gcr.io/sig-storage/csi-node-driver-registrar - tag: v2.2.0 + tag: v2.3.0 pullPolicy: IfNotPresent resources: {} @@ -161,7 +161,7 @@ provisioner: provisioner: image: repository: k8s.gcr.io/sig-storage/csi-provisioner - tag: v2.2.2 + tag: v3.0.0 pullPolicy: IfNotPresent resources: {} @@ -170,7 +170,7 @@ provisioner: enabled: true image: repository: k8s.gcr.io/sig-storage/csi-attacher - tag: v3.2.1 + tag: v3.3.0 pullPolicy: IfNotPresent resources: {} @@ -186,7 +186,7 @@ provisioner: snapshotter: image: repository: k8s.gcr.io/sig-storage/csi-snapshotter - tag: v4.1.1 + tag: v4.2.0 pullPolicy: IfNotPresent resources: {} diff --git a/charts/ceph-csi-rbd/templates/csidriver-crd.yaml b/charts/ceph-csi-rbd/templates/csidriver-crd.yaml index 4e6966fdc..92c3d916a 100644 --- a/charts/ceph-csi-rbd/templates/csidriver-crd.yaml +++ b/charts/ceph-csi-rbd/templates/csidriver-crd.yaml @@ -1,7 +1,7 @@ {{ if semverCompare ">=1.18.0-beta.1" .Capabilities.KubeVersion.Version }} apiVersion: storage.k8s.io/v1 {{ else }} -apiVersion: storage.k8s.io/betav1 +apiVersion: storage.k8s.io/v1beta1 {{ end }} kind: CSIDriver metadata: diff --git a/charts/ceph-csi-rbd/templates/provisioner-clusterrole.yaml b/charts/ceph-csi-rbd/templates/provisioner-clusterrole.yaml index c620183a7..032705753 100644 --- a/charts/ceph-csi-rbd/templates/provisioner-clusterrole.yaml +++ b/charts/ceph-csi-rbd/templates/provisioner-clusterrole.yaml @@ -62,7 +62,7 @@ rules: {{- if .Values.topology.enabled }} - apiGroups: [""] resources: ["nodes"] - verbs: ["get", "list", watch"] + verbs: ["get", "list","watch"] - apiGroups: ["storage.k8s.io"] resources: ["csinodes"] verbs: ["get", "list", "watch"] diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml index 19a6e134f..28fc9fd8a 100644 --- a/charts/ceph-csi-rbd/values.yaml +++ b/charts/ceph-csi-rbd/values.yaml @@ -102,7 +102,7 @@ nodeplugin: registrar: image: repository: k8s.gcr.io/sig-storage/csi-node-driver-registrar - tag: v2.2.0 + tag: v2.3.0 pullPolicy: IfNotPresent resources: {} @@ -198,7 +198,7 @@ provisioner: provisioner: image: repository: k8s.gcr.io/sig-storage/csi-provisioner - tag: v2.2.2 + tag: v3.0.0 pullPolicy: IfNotPresent resources: {} @@ -207,7 +207,7 @@ provisioner: enabled: true image: repository: k8s.gcr.io/sig-storage/csi-attacher - tag: v3.2.1 + tag: v3.3.0 pullPolicy: IfNotPresent resources: {} @@ -223,7 +223,7 @@ provisioner: snapshotter: image: repository: k8s.gcr.io/sig-storage/csi-snapshotter - tag: v4.1.1 + tag: v4.2.0 pullPolicy: IfNotPresent resources: {} diff --git a/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml b/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml index 5b235401e..68cfdf88b 100644 --- a/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml +++ b/deploy/cephfs/kubernetes/csi-cephfsplugin-provisioner.yaml @@ -43,7 +43,7 @@ spec: priorityClassName: system-cluster-critical containers: - name: csi-provisioner - image: k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2 + image: k8s.gcr.io/sig-storage/csi-provisioner:v3.0.0 args: - "--csi-address=$(ADDRESS)" - "--v=5" @@ -76,7 +76,7 @@ spec: - name: socket-dir mountPath: /csi - name: csi-snapshotter - image: k8s.gcr.io/sig-storage/csi-snapshotter:v4.1.1 + image: k8s.gcr.io/sig-storage/csi-snapshotter:v4.2.0 args: - "--csi-address=$(ADDRESS)" - "--v=5" @@ -92,7 +92,7 @@ spec: - name: socket-dir mountPath: /csi - name: csi-cephfsplugin-attacher - image: k8s.gcr.io/sig-storage/csi-attacher:v3.2.1 + image: k8s.gcr.io/sig-storage/csi-attacher:v3.3.0 args: - "--v=5" - "--csi-address=$(ADDRESS)" diff --git a/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml b/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml index 19961a1a3..e182beb38 100644 --- a/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml +++ b/deploy/cephfs/kubernetes/csi-cephfsplugin.yaml @@ -25,7 +25,7 @@ spec: # created by privileged CSI driver container. securityContext: privileged: true - image: k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.2.0 + image: k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.3.0 args: - "--v=5" - "--csi-address=/csi/csi.sock" diff --git a/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml b/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml index be93dc6ea..25980a214 100644 --- a/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml +++ b/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml @@ -47,7 +47,7 @@ spec: priorityClassName: system-cluster-critical containers: - name: csi-provisioner - image: k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2 + image: k8s.gcr.io/sig-storage/csi-provisioner:v3.0.0 args: - "--csi-address=$(ADDRESS)" - "--v=5" @@ -67,7 +67,7 @@ spec: - name: socket-dir mountPath: /csi - name: csi-snapshotter - image: k8s.gcr.io/sig-storage/csi-snapshotter:v4.1.1 + image: k8s.gcr.io/sig-storage/csi-snapshotter:v4.2.0 args: - "--csi-address=$(ADDRESS)" - "--v=5" @@ -83,7 +83,7 @@ spec: - name: socket-dir mountPath: /csi - name: csi-attacher - image: k8s.gcr.io/sig-storage/csi-attacher:v3.2.1 + image: k8s.gcr.io/sig-storage/csi-attacher:v3.3.0 args: - "--v=5" - "--csi-address=$(ADDRESS)" diff --git a/deploy/rbd/kubernetes/csi-rbdplugin.yaml b/deploy/rbd/kubernetes/csi-rbdplugin.yaml index d99e2d4a3..9a424a7d6 100644 --- a/deploy/rbd/kubernetes/csi-rbdplugin.yaml +++ b/deploy/rbd/kubernetes/csi-rbdplugin.yaml @@ -28,7 +28,7 @@ spec: # created by privileged CSI driver container. securityContext: privileged: true - image: k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.2.0 + image: k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.3.0 args: - "--v=5" - "--csi-address=/csi/csi.sock" diff --git a/go.mod b/go.mod index 9506bb6d8..18f8dd288 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/hashicorp/vault/api v1.0.5-0.20200902155336-f9d5ce5a171a github.com/kubernetes-csi/csi-lib-utils v0.9.1 - github.com/kubernetes-csi/external-snapshotter/client/v4 v4.1.0 + github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0 github.com/libopenstorage/secrets v0.0.0-20210709082113-dde442ea20ec github.com/onsi/ginkgo v1.16.4 github.com/onsi/gomega v1.13.0 diff --git a/go.sum b/go.sum index a90c1891b..250fbaadb 100644 --- a/go.sum +++ b/go.sum @@ -641,8 +641,8 @@ github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kubernetes-csi/csi-lib-utils v0.9.1 h1:sGq6ifVujfMSkfTsMZip44Ttv8SDXvsBlFk9GdYl/b8= github.com/kubernetes-csi/csi-lib-utils v0.9.1/go.mod h1:8E2jVUX9j3QgspwHXa6LwyN7IHQDjW9jX3kwoWnSC+M= github.com/kubernetes-csi/external-snapshotter/client/v4 v4.0.0/go.mod h1:YBCo4DoEeDndqvAn6eeu0vWM7QdXmHEeI9cFWplmBys= -github.com/kubernetes-csi/external-snapshotter/client/v4 v4.1.0 h1:DecASDOSUnp0ftwi4aU87joEpZfLv9iMPwNYzrGb9Lc= -github.com/kubernetes-csi/external-snapshotter/client/v4 v4.1.0/go.mod h1:YBCo4DoEeDndqvAn6eeu0vWM7QdXmHEeI9cFWplmBys= +github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0 h1:nHHjmvjitIiyPlUHk/ofpgvBcNcawJLtf4PYHORLjAA= +github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0/go.mod h1:YBCo4DoEeDndqvAn6eeu0vWM7QdXmHEeI9cFWplmBys= github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/libopenstorage/autopilot-api v0.6.1-0.20210128210103-5fbb67948648/go.mod h1:6JLrPbR3ZJQFbUY/+QJMl/aF00YdIrLf8/GWAplgvJs= diff --git a/internal/cephfs/controllerserver.go b/internal/cephfs/controllerserver.go index 4a4277c3e..25c298bcf 100644 --- a/internal/cephfs/controllerserver.go +++ b/internal/cephfs/controllerserver.go @@ -595,7 +595,7 @@ func (cs *ControllerServer) CreateSnapshot( info, err := parentVolOptions.getSubVolumeInfo(ctx, volumeID(vid.FsSubvolName)) if err != nil { // Check error code value against ErrInvalidCommand to understand the cluster - // support it or not, its safe to evaluat as the filtering + // support it or not, It's safe to evaluate as the filtering // is already done from getSubVolumeInfo() and send out the error here. if errors.Is(err, ErrInvalidCommand) { return nil, status.Error( diff --git a/internal/cephfs/fsjournal.go b/internal/cephfs/fsjournal.go index 9ee2e0c44..31dbad94e 100644 --- a/internal/cephfs/fsjournal.go +++ b/internal/cephfs/fsjournal.go @@ -345,7 +345,7 @@ volume names as requested by the CSI drivers. Hence, these need to be invoked on respective CSI driver generated volume name based locks are held, as otherwise racy access to these omaps may end up leaving them in an inconsistent state. -These functions also cleanup omap reservations that are stale. I.e when omap entries exist and +These functions also cleanup omap reservations that are stale. I.e. when omap entries exist and backing subvolumes are missing, or one of the omaps exist and the next is missing. This is because, the order of omap creation and deletion are inverse of each other, and protected by the request name lock, and hence any stale omaps are leftovers from incomplete transactions and are diff --git a/internal/cephfs/nodeserver.go b/internal/cephfs/nodeserver.go index 067b9adb8..d321ed681 100644 --- a/internal/cephfs/nodeserver.go +++ b/internal/cephfs/nodeserver.go @@ -308,7 +308,7 @@ func (ns *NodeServer) NodeUnpublishVolume( return nil, status.Error(codes.Internal, err.Error()) } - util.DebugLog(ctx, "cephfs: successfully unbinded volume %s from %s", req.GetVolumeId(), targetPath) + util.DebugLog(ctx, "cephfs: successfully unbounded volume %s from %s", req.GetVolumeId(), targetPath) return &csi.NodeUnpublishVolumeResponse{}, nil } diff --git a/internal/cephfs/volume.go b/internal/cephfs/volume.go index 7d11f59ba..9f3c84a67 100644 --- a/internal/cephfs/volume.go +++ b/internal/cephfs/volume.go @@ -109,9 +109,7 @@ func (vo *volumeOptions) getSubVolumeInfo(ctx context.Context, volID volumeID) ( // If info.BytesQuota == Infinite (in case it is not set) // or nil (in case the subvolume is in snapshot-retained state), // just continue without returning quota information. - // TODO: make use of subvolume "state" attribute once - // https://github.com/ceph/go-ceph/issues/453 is fixed. - if !(info.BytesQuota == fsAdmin.Infinite || info.BytesQuota == nil) { + if !(info.BytesQuota == fsAdmin.Infinite || info.State == fsAdmin.StateSnapRetained) { return nil, fmt.Errorf("subvolume %s has unsupported quota: %v", string(volID), info.BytesQuota) } } else { diff --git a/internal/rbd/clone.go b/internal/rbd/clone.go index 8e3a442df..d93b06e43 100644 --- a/internal/rbd/clone.go +++ b/internal/rbd/clone.go @@ -97,7 +97,7 @@ func (rv *rbdVolume) checkCloneImage(ctx context.Context, parentVol *rbdVolume) return false, err } } - // snap will be create after we flatten the temporary cloned image,no + // snap will be created after we flatten the temporary cloned image,no // need to check for flatten here. // as the snap exists,create clone image and delete temporary snapshot // and add task to flatten temporary cloned image @@ -266,8 +266,8 @@ func (rv *rbdVolume) flattenCloneImage(ctx context.Context) error { // If the intermediate clone reaches the depth we may need to return ABORT // error message as it need to be flatten before continuing, this may leak // omap entries and stale temporary snapshots in corner cases, if we reduce - // the limit and check for the depth of the parent image clain it self we - // can flatten the parent images before use to avoid the stale omap entries. + // the limit and check for the depth of the parent image clain itself we + // can flatten the parent images before used to avoid the stale omap entries. hardLimit := rbdHardMaxCloneDepth softLimit := rbdSoftMaxCloneDepth // choosing 2 so that we don't need to flatten the image in the request. diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index ce7b51770..9a35e3e3c 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -418,7 +418,7 @@ func (cs *ControllerServer) repairExistingVolume(ctx context.Context, req *csi.C "restoring thick-provisioned volume %q has been interrupted, please retry", rbdVol) } } - // restore from snapshot imploes rbdSnap != nil + // restore from snapshot implies rbdSnap != nil // check if image depth is reached limit and requires flatten err := checkFlatten(ctx, rbdVol, cr) if err != nil { @@ -533,7 +533,7 @@ func flattenTemporaryClonedImages(ctx context.Context, rbdVol *rbdVolume, cr *ut return nil } -// checkFlatten ensures that that the image chain depth is not reached +// checkFlatten ensures that the image chain depth is not reached // hardlimit or softlimit. if the softlimit is reached it adds a task and // return success,the hardlimit is reached it starts a task to flatten the // image and return Aborted. @@ -823,7 +823,7 @@ func (cs *ControllerServer) DeleteVolume( } // lock out parallel create requests against the same volume name as we - // cleanup the image and associated omaps for the same + // clean up the image and associated omaps for the same if acquired := cs.VolumeLocks.TryAcquire(rbdVol.RequestName); !acquired { util.ErrorLog(ctx, util.VolumeOperationAlreadyExistsFmt, rbdVol.RequestName) @@ -831,6 +831,51 @@ func (cs *ControllerServer) DeleteVolume( } defer cs.VolumeLocks.Release(rbdVol.RequestName) + return cleanupRBDImage(ctx, rbdVol, cr) +} + +// cleanupRBDImage removes the rbd image and OMAP metadata associated with it. +func cleanupRBDImage(ctx context.Context, + rbdVol *rbdVolume, cr *util.Credentials) (*csi.DeleteVolumeResponse, error) { + mirroringInfo, err := rbdVol.getImageMirroringInfo() + if err != nil { + util.ErrorLog(ctx, err.Error()) + + return nil, status.Error(codes.Internal, err.Error()) + } + // Cleanup only omap data if the following condition is met + // Mirroring is enabled on the image + // Local image is secondary + // Local image is in up+replaying state + if mirroringInfo.State == librbd.MirrorImageEnabled && !mirroringInfo.Primary { + // If the image is in a secondary state and its up+replaying means its + // an healthy secondary and the image is primary somewhere in the + // remote cluster and the local image is getting replayed. Delete the + // OMAP data generated as we cannot delete the secondary image. When + // the image on the primary cluster gets deleted/mirroring disabled, + // the image on all the remote (secondary) clusters will get + // auto-deleted. This helps in garbage collecting the OMAP, PVC and PV + // objects after failback operation. + localStatus, rErr := rbdVol.getLocalState() + if rErr != nil { + return nil, status.Error(codes.Internal, rErr.Error()) + } + if localStatus.Up && localStatus.State == librbd.MirrorImageStatusStateReplaying { + if err = undoVolReservation(ctx, rbdVol, cr); err != nil { + util.ErrorLog(ctx, "failed to remove reservation for volume (%s) with backing image (%s) (%s)", + rbdVol.RequestName, rbdVol.RbdImageName, err) + + return nil, status.Error(codes.Internal, err.Error()) + } + + return &csi.DeleteVolumeResponse{}, nil + } + util.ErrorLog(ctx, + "secondary image status is up=%t and state=%s", + localStatus.Up, + localStatus.State) + } + inUse, err := rbdVol.isInUse() if err != nil { util.ErrorLog(ctx, "failed getting information for image (%s): (%s)", rbdVol, err) @@ -1257,7 +1302,7 @@ func (cs *ControllerServer) DeleteSnapshot( rbdSnap := &rbdSnapshot{} if err = genSnapFromSnapID(ctx, rbdSnap, snapshotID, cr, req.GetSecrets()); err != nil { - // if error is ErrPoolNotFound, the pool is already deleted we dont + // if error is ErrPoolNotFound, the pool is already deleted we don't // need to worry about deleting snapshot or omap data, return success if errors.Is(err, util.ErrPoolNotFound) { util.WarningLog(ctx, "failed to get backend snapshot for %s: %v", snapshotID, err) diff --git a/internal/rbd/driver.go b/internal/rbd/driver.go index fc073859d..f48daabe5 100644 --- a/internal/rbd/driver.go +++ b/internal/rbd/driver.go @@ -50,11 +50,11 @@ var ( // VolumeName to backing RBD images. volJournal *journal.Config snapJournal *journal.Config - // rbdHardMaxCloneDepth is the hard limit for maximum number of nested volume clones that are taken before a flatten + // rbdHardMaxCloneDepth is the hard limit for maximum number of nested volume clones that are taken before flatten // occurs. rbdHardMaxCloneDepth uint - // rbdSoftMaxCloneDepth is the soft limit for maximum number of nested volume clones that are taken before a flatten + // rbdSoftMaxCloneDepth is the soft limit for maximum number of nested volume clones that are taken before flatten // occurs. rbdSoftMaxCloneDepth uint maxSnapshotsOnImage uint @@ -141,7 +141,7 @@ func (r *Driver) Run(conf *util.Config) { // general // In addition, we want to add the remaining modes like MULTI_NODE_READER_ONLY, // MULTI_NODE_SINGLE_WRITER etc, but need to do some verification of RO modes first - // will work those as follow up features + // will work those as follow-up features r.cd.AddVolumeCapabilityAccessModes( []csi.VolumeCapability_AccessMode_Mode{ csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER, diff --git a/internal/rbd/mirror.go b/internal/rbd/mirror.go index 2ce2389fb..0d010be01 100644 --- a/internal/rbd/mirror.go +++ b/internal/rbd/mirror.go @@ -128,3 +128,24 @@ func (ri *rbdImage) getImageMirroringStatus() (*librbd.GlobalMirrorImageStatus, return &statusInfo, nil } + +// getLocalState returns the local state of the image. +func (ri *rbdImage) getLocalState() (librbd.SiteMirrorImageStatus, error) { + localStatus := librbd.SiteMirrorImageStatus{} + image, err := ri.open() + if err != nil { + return localStatus, fmt.Errorf("failed to open image %q with error: %w", ri, err) + } + defer image.Close() + + statusInfo, err := image.GetGlobalMirrorStatus() + if err != nil { + return localStatus, fmt.Errorf("failed to get image mirroring status %q with error: %w", ri, err) + } + localStatus, err = statusInfo.LocalStatus() + if err != nil { + return localStatus, fmt.Errorf("failed to get local status: %w", err) + } + + return localStatus, nil +} diff --git a/internal/rbd/replicationcontrollerserver.go b/internal/rbd/replicationcontrollerserver.go index 5a177ad4f..feb656cb4 100644 --- a/internal/rbd/replicationcontrollerserver.go +++ b/internal/rbd/replicationcontrollerserver.go @@ -321,28 +321,7 @@ func (rs *ReplicationServer) DisableVolumeReplication(ctx context.Context, case librbd.MirrorImageDisabling: return nil, status.Errorf(codes.Aborted, "%s is in disabling state", volumeID) case librbd.MirrorImageEnabled: - if !force && !mirroringInfo.Primary { - return nil, status.Error(codes.InvalidArgument, "image is in non-primary state") - } - err = rbdVol.disableImageMirroring(force) - if err != nil { - util.ErrorLog(ctx, err.Error()) - - return nil, status.Error(codes.Internal, err.Error()) - } - // the image state can be still disabling once we disable the mirroring - // check the mirroring is disabled or not - mirroringInfo, err = rbdVol.getImageMirroringInfo() - if err != nil { - util.ErrorLog(ctx, err.Error()) - - return nil, status.Error(codes.Internal, err.Error()) - } - if mirroringInfo.State == librbd.MirrorImageDisabling { - return nil, status.Errorf(codes.Aborted, "%s is in disabling state", volumeID) - } - - return &replication.DisableVolumeReplicationResponse{}, nil + return disableVolumeReplication(rbdVol, mirroringInfo, force) default: // TODO: use string instead of int for returning valid error message return nil, status.Errorf(codes.InvalidArgument, "image is in %d Mode", mirroringInfo.State) @@ -351,6 +330,52 @@ func (rs *ReplicationServer) DisableVolumeReplication(ctx context.Context, return &replication.DisableVolumeReplicationResponse{}, nil } +func disableVolumeReplication(rbdVol *rbdVolume, + mirroringInfo *librbd.MirrorImageInfo, + force bool) (*replication.DisableVolumeReplicationResponse, error) { + if !mirroringInfo.Primary { + // Return success if the below condition is met + // Local image is secondary + // Local image is in up+replaying state + + // If the image is in a secondary and its state is up+replaying means + // its an healthy secondary and the image is primary somewhere in the + // remote cluster and the local image is getting replayed. Return + // success for the Disabling mirroring as we cannot disable mirroring + // on the secondary image, when the image on the primary site gets + // disabled the image on all the remote (secondary) clusters will get + // auto-deleted. This helps in garbage collecting the volume + // replication Kubernetes artifacts after failback operation. + localStatus, rErr := rbdVol.getLocalState() + if rErr != nil { + return nil, status.Error(codes.Internal, rErr.Error()) + } + if localStatus.Up && localStatus.State == librbd.MirrorImageStatusStateReplaying { + return &replication.DisableVolumeReplicationResponse{}, nil + } + + return nil, status.Errorf(codes.InvalidArgument, + "secondary image status is up=%t and state=%s", + localStatus.Up, + localStatus.State) + } + err := rbdVol.disableImageMirroring(force) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + // the image state can be still disabling once we disable the mirroring + // check the mirroring is disabled or not + mirroringInfo, err = rbdVol.getImageMirroringInfo() + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + if mirroringInfo.State == librbd.MirrorImageDisabling { + return nil, status.Errorf(codes.Aborted, "%s is in disabling state", rbdVol.VolID) + } + + return &replication.DisableVolumeReplicationResponse{}, nil +} + // PromoteVolume extracts the RBD volume information from the volumeID, If the // image is present, mirroring is enabled and the image is in demoted state it // will promote the volume as primary. diff --git a/scripts/minikube.sh b/scripts/minikube.sh index d026ba683..de72c6dd2 100755 --- a/scripts/minikube.sh +++ b/scripts/minikube.sh @@ -162,6 +162,12 @@ if [[ "${VM_DRIVER}" == "kvm2" ]]; then # use vda1 instead of sda1 when running with the libvirt driver DISK="vda1" fi +#configure csi sidecar version +CSI_ATTACHER_VERSION=${CSI_ATTACHER_VERSION:-"v3.2.1"} +CSI_SNAPSHOTTER_VERSION=${CSI_SNAPSHOTTER_VERSION:-"v4.1.1"} +CSI_PROVISIONER_VERSION=${CSI_PROVISIONER_VERSION:-"v2.2.2"} +CSI_RESIZER_VERSION=${CSI_RESIZER_VERSION:-"v1.2.0"} +CSI_NODE_DRIVER_REGISTRAR_VERSION=${CSI_NODE_DRIVER_REGISTRAR_VERSION:-"v2.2.0"} #feature-gates for kube K8S_FEATURE_GATES=${K8S_FEATURE_GATES:-"ExpandCSIVolumes=true"} @@ -278,11 +284,11 @@ cephcsi) ;; k8s-sidecar) echo "copying the kubernetes sidecar images" - copy_image_to_cluster "${K8S_IMAGE_REPO}"/csi-attacher:v3.0.2 "${K8S_IMAGE_REPO}"/csi-attacher:v3.0.2 - copy_image_to_cluster "${K8S_IMAGE_REPO}"/csi-snapshotter:v3.0.2 $"${K8S_IMAGE_REPO}"/csi-snapshotter:v3.0.2 - copy_image_to_cluster "${K8S_IMAGE_REPO}"/csi-provisioner:v2.0.4 "${K8S_IMAGE_REPO}"/csi-provisioner:v2.0.4 - copy_image_to_cluster "${K8S_IMAGE_REPO}"/csi-node-driver-registrar:v2.0.1 "${K8S_IMAGE_REPO}"/csi-node-driver-registrar:v2.0.1 - copy_image_to_cluster "${K8S_IMAGE_REPO}"/csi-resizer:v1.0.1 "${K8S_IMAGE_REPO}"/csi-resizer:v1.0.1 + copy_image_to_cluster "${K8S_IMAGE_REPO}/csi-attacher:${CSI_ATTACHER_VERSION}" "${K8S_IMAGE_REPO}/csi-attacher:${CSI_ATTACHER_VERSION}" + copy_image_to_cluster "${K8S_IMAGE_REPO}/csi-snapshotter:${CSI_SNAPSHOTTER_VERSION}" "${K8S_IMAGE_REPO}/csi-snapshotter:${CSI_SNAPSHOTTER_VERSION}" + copy_image_to_cluster "${K8S_IMAGE_REPO}/csi-provisioner:${CSI_PROVISIONER_VERSION}" "${K8S_IMAGE_REPO}/csi-provisioner:${CSI_PROVISIONER_VERSION}" + copy_image_to_cluster "${K8S_IMAGE_REPO}/csi-node-driver-registrar:${CSI_NODE_DRIVER_REGISTRAR_VERSION}" "${K8S_IMAGE_REPO}/csi-node-driver-registrar:${CSI_NODE_DRIVER_REGISTRAR_VERSION}" + copy_image_to_cluster "${K8S_IMAGE_REPO}/csi-resizer:${CSI_RESIZER_VERSION}" "${K8S_IMAGE_REPO}/csi-resizer:${CSI_RESIZER_VERSION}" ;; clean) ${minikube} delete diff --git a/vendor/modules.txt b/vendor/modules.txt index 13520e3c4..56ce6d7d9 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -182,7 +182,7 @@ github.com/kubernetes-csi/csi-lib-utils/connection github.com/kubernetes-csi/csi-lib-utils/metrics github.com/kubernetes-csi/csi-lib-utils/protosanitizer github.com/kubernetes-csi/csi-lib-utils/rpc -# github.com/kubernetes-csi/external-snapshotter/client/v4 v4.1.0 +# github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0 ## explicit github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1 github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1beta1