From 297b14ed54eb1d958a228051d5e9a0dca1853107 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Wed, 3 Aug 2022 11:33:59 +0530 Subject: [PATCH 1/5] ci: update minikube to v1.26.1 update minikube to latest patch release. Signed-off-by: Madhu Rajanna --- build.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.env b/build.env index 63948df68..33702ed19 100644 --- a/build.env +++ b/build.env @@ -38,7 +38,7 @@ SNAPSHOT_VERSION=v6.0.1 HELM_VERSION=v3.9.0 # minikube settings -MINIKUBE_VERSION=v1.26.0 +MINIKUBE_VERSION=v1.26.1 VM_DRIVER=none CHANGE_MINIKUBE_NONE_USER=true From 5aabd4e1d256f4a2f68c0b9930db3e96911ce687 Mon Sep 17 00:00:00 2001 From: Humble Chirammal Date: Mon, 1 Aug 2022 21:40:36 +0530 Subject: [PATCH 2/5] deploy: remove the snapshot controller installation check no need to have 1.17 kube version check anymore before we install snapshot controller. Signed-off-by: Humble Chirammal --- scripts/install-snapshot.sh | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/scripts/install-snapshot.sh b/scripts/install-snapshot.sh index 8ba636608..f6b6d35cf 100755 --- a/scripts/install-snapshot.sh +++ b/scripts/install-snapshot.sh @@ -84,30 +84,6 @@ function delete_snapshot_crd() { kubectl delete -f "${VOLUME_SNAPSHOT}" --ignore-not-found } -# parse the kubernetes version -# v1.17.2 -> kube_version 1 -> 1 (Major) -# v1.17.2 -> kube_version 2 -> 17 (Minor) -function kube_version() { - echo "${KUBE_VERSION}" | sed 's/^v//' | cut -d'.' -f"${1}" -} - -if ! get_kube_version=$(kubectl version --short) || - [[ -z "${get_kube_version}" ]]; then - echo "could not get Kubernetes server version" - echo "hint: check if you have specified the right host or port" - exit 1 -fi - -KUBE_VERSION=$(echo "${get_kube_version}" | grep "^Server Version" | cut -d' ' -f3) -KUBE_MAJOR=$(kube_version 1) -KUBE_MINOR=$(kube_version 2) - -# skip snapshot operation if kube version is less than 1.17.0 -if [[ "${KUBE_MAJOR}" -lt 1 ]] || [[ "${KUBE_MAJOR}" -eq 1 && "${KUBE_MINOR}" -lt 17 ]]; then - echo "skipping: Kubernetes server version is < 1.17.0" - exit 1 -fi - case "${1:-}" in install) install_snapshot_controller "$2" @@ -115,13 +91,9 @@ install) cleanup) cleanup_snapshot_controller "$2" ;; -delete-crd) - delete_snapshot_crd - ;; *) echo "usage:" >&2 echo " $0 install" >&2 echo " $0 cleanup" >&2 - echo " $0 delete-crd" >&2 ;; esac From c9773db3f3e3030d20daf97d8cad4c8498ed19f7 Mon Sep 17 00:00:00 2001 From: Humble Chirammal Date: Mon, 1 Aug 2022 21:41:15 +0530 Subject: [PATCH 3/5] ci: remove check for snapshot controller installation and cleanup At present, the check is performed to validate the version of kube is v1.17 and this commit remove the same. Signed-off-by: Humble Chirammal --- scripts/travis-functest.sh | 17 ++++------------- scripts/travis-helmtest.sh | 19 ++++--------------- 2 files changed, 8 insertions(+), 28 deletions(-) diff --git a/scripts/travis-functest.sh b/scripts/travis-functest.sh index 44649acab..d86c80de9 100755 --- a/scripts/travis-functest.sh +++ b/scripts/travis-functest.sh @@ -27,21 +27,12 @@ sudo scripts/minikube.sh create-block-pool # pull docker images to speed up e2e sudo scripts/minikube.sh cephcsi sudo scripts/minikube.sh k8s-sidecar -KUBE_MAJOR=$(kube_version 1) -KUBE_MINOR=$(kube_version 2) -# skip snapshot operation if kube version is less than 1.17.0 -if [[ "${KUBE_MAJOR}" -ge 1 ]] && [[ "${KUBE_MINOR}" -ge 17 ]]; then - # delete snapshot CRD created by ceph-csi in rook - scripts/install-snapshot.sh delete-crd - # install snapshot controller - scripts/install-snapshot.sh install -fi +# install snapshot controller and create snapshot CRD +scripts/install-snapshot.sh install # functional tests make run-e2e E2E_ARGS="${*}" -if [[ "${KUBE_MAJOR}" -ge 1 ]] && [[ "${KUBE_MINOR}" -ge 17 ]]; then - # delete snapshot CRD - scripts/install-snapshot.sh cleanup -fi +# cleanup +scripts/install-snapshot.sh cleanup sudo scripts/minikube.sh clean diff --git a/scripts/travis-helmtest.sh b/scripts/travis-helmtest.sh index c1cb7b24c..e602152c9 100755 --- a/scripts/travis-helmtest.sh +++ b/scripts/travis-helmtest.sh @@ -35,15 +35,8 @@ sudo scripts/minikube.sh k8s-sidecar NAMESPACE=cephcsi-e2e-$RANDOM # create ns for e2e kubectl create ns ${NAMESPACE} -KUBE_MAJOR=$(kube_version 1) -KUBE_MINOR=$(kube_version 2) -# skip snapshot operation if kube version is less than 1.17.0 -if [[ "${KUBE_MAJOR}" -ge 1 ]] && [[ "${KUBE_MINOR}" -ge 17 ]]; then - # delete snapshot CRD created by ceph-csi in rook - scripts/install-snapshot.sh delete-crd - # install snapshot controller - scripts/install-snapshot.sh install -fi +# install snapshot controller and create snapshot CRD +scripts/install-snapshot.sh install # set up helm scripts/install-helm.sh up # install cephcsi helm charts @@ -51,12 +44,8 @@ scripts/install-helm.sh install-cephcsi --namespace ${NAMESPACE} # functional tests make run-e2e NAMESPACE="${NAMESPACE}" E2E_ARGS="--deploy-cephfs=false --deploy-rbd=false ${*}" -#cleanup -# skip snapshot operation if kube version is less than 1.17.0 -if [[ "${KUBE_MAJOR}" -ge 1 ]] && [[ "${KUBE_MINOR}" -ge 17 ]]; then - # delete snapshot CRD - scripts/install-snapshot.sh cleanup -fi +# cleanup +scripts/install-snapshot.sh cleanup scripts/install-helm.sh cleanup-cephcsi --namespace ${NAMESPACE} scripts/install-helm.sh clean kubectl delete ns ${NAMESPACE} From 1ea4a1b790b3c1a57b8ff91c9588e39cdf77d4e7 Mon Sep 17 00:00:00 2001 From: Rakshith R Date: Mon, 8 Aug 2022 11:43:30 +0530 Subject: [PATCH 4/5] ci: fix invalid mergifyio configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Comment out `comment: ` settings, since it does not have any options set, otherwise throws the following error. ``` The current Mergify configuration is invalid required key not provided @ defaults → actions → comment → message ``` Signed-off-by: Rakshith R --- .mergify.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.mergify.yml b/.mergify.yml index 4ccc2eb79..4f060387b 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -2,7 +2,7 @@ defaults: actions: # mergify.io has removed bot_account from its free open source plan. - comment: + # comment: # bot_account: ceph-csi-bot # mergify[bot] will be commenting. queue: # merge_bot_account: ceph-csi-bot #mergify[bot] will be merging prs. From 8d7b6ee59f8b2ac6bfb3e096fc4d10732aeeb3ac Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Mon, 8 Aug 2022 17:23:35 +0530 Subject: [PATCH 5/5] rbd: consider mirror deamon state for ResyncVolume During ResyncVolume we check if the image is in an error state, and we resync. After resync, the image will move to either the `Error` or the `Resyncing` state. And if the image is in the above two conditions, we will return a successful response and Ready=false so that the consumer can wait until the volume is ready to use. If the image is in any other state we return an error message to indicate the syncing is not going on. The whole resync and image state change depends on the rbd mirror daemon. If the mirror daemon is not running, the image can be in Resyncing or Unknown state. The Ramen marks the volume replication as secondary, and once the resync starts, it will delete the volume replication CR as a cleanup process. As we dont have a check for the rbd mirror daemon, we are returning a resync success response and Ready=false. Due to this false response Ramen is assuming the resync started and deleted the volume replication CR, and because of this, the cluster goes into a bad state and needs manual intervention. fixes #3289 Signed-off-by: Madhu Rajanna --- internal/rbd/replicationcontrollerserver.go | 11 ++++++----- internal/rbd/replicationcontrollerserver_test.go | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/internal/rbd/replicationcontrollerserver.go b/internal/rbd/replicationcontrollerserver.go index e93887686..7393c2fdd 100644 --- a/internal/rbd/replicationcontrollerserver.go +++ b/internal/rbd/replicationcontrollerserver.go @@ -856,12 +856,13 @@ func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error { // If the state is Replaying means the resync is going on. // Once the volume on remote cluster is demoted and resync - // is completed the image state will be moved to UNKNOWN . - if localStatus.State != librbd.MirrorImageStatusStateReplaying && - localStatus.State != librbd.MirrorImageStatusStateUnknown { + // is completed the image state will be moved to UNKNOWN. + // RBD mirror daemon should be always running on the primary cluster. + if !localStatus.Up || (localStatus.State != librbd.MirrorImageStatusStateReplaying && + localStatus.State != librbd.MirrorImageStatusStateUnknown) { return fmt.Errorf( - "not resyncing. image is in %q state", - localStatus.State) + "not resyncing. Local status: daemon up=%t image is in %q state", + localStatus.Up, localStatus.State) } return nil diff --git a/internal/rbd/replicationcontrollerserver_test.go b/internal/rbd/replicationcontrollerserver_test.go index 401292587..e10d35249 100644 --- a/internal/rbd/replicationcontrollerserver_test.go +++ b/internal/rbd/replicationcontrollerserver_test.go @@ -212,10 +212,19 @@ func TestCheckVolumeResyncStatus(t *testing.T) { args librbd.SiteMirrorImageStatus wantErr bool }{ + { + name: "test when rbd mirror daemon is not running", + args: librbd.SiteMirrorImageStatus{ + State: librbd.MirrorImageStatusStateUnknown, + Up: false, + }, + wantErr: true, + }, { name: "test for unknown state", args: librbd.SiteMirrorImageStatus{ State: librbd.MirrorImageStatusStateUnknown, + Up: true, }, wantErr: false, }, @@ -223,6 +232,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) { name: "test for error state", args: librbd.SiteMirrorImageStatus{ State: librbd.MirrorImageStatusStateError, + Up: true, }, wantErr: true, }, @@ -230,6 +240,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) { name: "test for syncing state", args: librbd.SiteMirrorImageStatus{ State: librbd.MirrorImageStatusStateSyncing, + Up: true, }, wantErr: true, }, @@ -237,6 +248,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) { name: "test for starting_replay state", args: librbd.SiteMirrorImageStatus{ State: librbd.MirrorImageStatusStateStartingReplay, + Up: true, }, wantErr: true, }, @@ -244,6 +256,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) { name: "test for replaying state", args: librbd.SiteMirrorImageStatus{ State: librbd.MirrorImageStatusStateReplaying, + Up: true, }, wantErr: false, }, @@ -251,6 +264,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) { name: "test for stopping_replay state", args: librbd.SiteMirrorImageStatus{ State: librbd.MirrorImageStatusStateStoppingReplay, + Up: true, }, wantErr: true, }, @@ -258,6 +272,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) { name: "test for stopped state", args: librbd.SiteMirrorImageStatus{ State: librbd.MirrorImageStatusStateStopped, + Up: true, }, wantErr: true, }, @@ -265,6 +280,7 @@ func TestCheckVolumeResyncStatus(t *testing.T) { name: "test for invalid state", args: librbd.SiteMirrorImageStatus{ State: librbd.MirrorImageStatusState(100), + Up: true, }, wantErr: true, },