From 23e0874ebfb111c7763421d406186e0be2403a66 Mon Sep 17 00:00:00 2001
From: Yug <yuggupta27@gmail.com>
Date: Fri, 7 Aug 2020 13:55:58 +0530
Subject: [PATCH] deploy: add wrapper function of kubectl

Add retries to prevent ci failure instantly.
Now, the command execution will retry upto
5 times, to avoid failures in some runs.

Signed-off-by: Yug <yuggupta27@gmail.com>
---
 scripts/rook.sh | 57 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/scripts/rook.sh b/scripts/rook.sh
index 71adefcae..34cef1a50 100755
--- a/scripts/rook.sh
+++ b/scripts/rook.sh
@@ -5,14 +5,31 @@ ROOK_DEPLOY_TIMEOUT=${ROOK_DEPLOY_TIMEOUT:-300}
 ROOK_URL="https://raw.githubusercontent.com/rook/rook/${ROOK_VERSION}/cluster/examples/kubernetes/ceph"
 ROOK_BLOCK_POOL_NAME=${ROOK_BLOCK_POOL_NAME:-"newrbdpool"}
 ROOK_CEPH_CLUSTER_VERSION="v14.2.10"
+KUBECTL_RETRY=5
+KUBECTL_RETRY_DELAY=10
 
 rook_version() {
 	echo "${ROOK_VERSION#v}" | cut -d'.' -f"${1}"
 }
 
+kubectl_retry() {
+    local retries=0
+
+    while ! kubectl "${@}"
+    do
+        retries=$((retries+1))
+        if [ ${retries} -eq ${KUBECTL_RETRY} ]
+        then
+            return 1
+        fi
+        sleep ${KUBECTL_RETRY_DELAY}
+    done
+    return 0
+}
+
 function deploy_rook() {
-	kubectl create -f "${ROOK_URL}/common.yaml"
-	kubectl create -f "${ROOK_URL}/operator.yaml"
+	kubectl_retry create -f "${ROOK_URL}/common.yaml"
+	kubectl_retry create -f "${ROOK_URL}/operator.yaml"
 	# find out the rook version to decide on the ceph cluster image to be used
 	ROOK_MAJOR=$(rook_version 1)
 	ROOK_MINOR=$(rook_version 2)
@@ -23,7 +40,7 @@ function deploy_rook() {
 		curl -o "${TEMP_DIR}"/cluster-test.yaml "${ROOK_URL}/cluster-test.yaml"
 		sed -i "s|image.*|${ROOK_CEPH_CLUSTER_VERSION_IMAGE_PATH}|g" "${TEMP_DIR}"/cluster-test.yaml
 		cat  "${TEMP_DIR}"/cluster-test.yaml
-		kubectl create -f "${TEMP_DIR}/cluster-test.yaml"
+		kubectl_retry create -f "${TEMP_DIR}/cluster-test.yaml"
 		rm -rf "${TEMP_DIR}"
 	else
 		# add "mon_warn_on_pool_no_redundancy = false" to ceph.conf if missing
@@ -33,26 +50,26 @@ function deploy_rook() {
 		if ! grep -q mon_warn_on_pool_no_redundancy "${TEMP_DIR}"/cluster-test.yaml; then
 			sed -i '/osd_pool_default_size =/a \    mon_warn_on_pool_no_redundancy = false' "${TEMP_DIR}"/cluster-test.yaml
 		fi
-		kubectl create -f "${TEMP_DIR}/cluster-test.yaml"
+		kubectl_retry create -f "${TEMP_DIR}/cluster-test.yaml"
 		rm -rf "${TEMP_DIR}"
 	fi
 
-	kubectl create -f "${ROOK_URL}/toolbox.yaml"
-	kubectl create -f "${ROOK_URL}/filesystem-test.yaml"
-	kubectl create -f "${ROOK_URL}/pool-test.yaml"
+	kubectl_retry create -f "${ROOK_URL}/toolbox.yaml"
+	kubectl_retry create -f "${ROOK_URL}/filesystem-test.yaml"
+	kubectl_retry create -f "${ROOK_URL}/pool-test.yaml"
 
 	# Check if CephCluster is empty
-	if ! kubectl -n rook-ceph get cephclusters -oyaml | grep 'items: \[\]' &>/dev/null; then
+	if ! kubectl_retry -n rook-ceph get cephclusters -oyaml | grep 'items: \[\]' &>/dev/null; then
 		check_ceph_cluster_health
 	fi
 
 	# Check if CephFileSystem is empty
-	if ! kubectl -n rook-ceph get cephfilesystems -oyaml | grep 'items: \[\]' &>/dev/null; then
+	if ! kubectl_retry -n rook-ceph get cephfilesystems -oyaml | grep 'items: \[\]' &>/dev/null; then
 		check_mds_stat
 	fi
 
 	# Check if CephBlockPool is empty
-	if ! kubectl -n rook-ceph get cephblockpools -oyaml | grep 'items: \[\]' &>/dev/null; then
+	if ! kubectl_retry -n rook-ceph get cephblockpools -oyaml | grep 'items: \[\]' &>/dev/null; then
 		check_rbd_stat ""
 	fi
 }
@@ -69,7 +86,7 @@ function teardown_rook() {
 function create_block_pool() {
 	curl -o newpool.yaml "${ROOK_URL}/pool-test.yaml"
 	sed -i "s/replicapool/$ROOK_BLOCK_POOL_NAME/g" newpool.yaml
-	kubectl create -f "./newpool.yaml"
+	kubectl_retry create -f "./newpool.yaml"
 	rm -f "./newpool.yaml"
 
 	check_rbd_stat "$ROOK_BLOCK_POOL_NAME"
@@ -86,8 +103,8 @@ function check_ceph_cluster_health() {
 	for ((retry = 0; retry <= ROOK_DEPLOY_TIMEOUT; retry = retry + 5)); do
 		echo "Wait for rook deploy... ${retry}s" && sleep 5
 
-		CEPH_STATE=$(kubectl -n rook-ceph get cephclusters -o jsonpath='{.items[0].status.state}')
-		CEPH_HEALTH=$(kubectl -n rook-ceph get cephclusters -o jsonpath='{.items[0].status.ceph.health}')
+		CEPH_STATE=$(kubectl_retry -n rook-ceph get cephclusters -o jsonpath='{.items[0].status.state}')
+		CEPH_HEALTH=$(kubectl_retry -n rook-ceph get cephclusters -o jsonpath='{.items[0].status.ceph.health}')
 		echo "Checking CEPH cluster state: [$CEPH_STATE]"
 		if [ "$CEPH_STATE" = "Created" ]; then
 			if [ "$CEPH_HEALTH" = "HEALTH_OK" ]; then
@@ -106,17 +123,17 @@ function check_ceph_cluster_health() {
 
 function check_mds_stat() {
 	for ((retry = 0; retry <= ROOK_DEPLOY_TIMEOUT; retry = retry + 5)); do
-		FS_NAME=$(kubectl -n rook-ceph get cephfilesystems.ceph.rook.io -ojsonpath='{.items[0].metadata.name}')
+		FS_NAME=$(kubectl_retry -n rook-ceph get cephfilesystems.ceph.rook.io -ojsonpath='{.items[0].metadata.name}')
 		echo "Checking MDS ($FS_NAME) stats... ${retry}s" && sleep 5
 
-		ACTIVE_COUNT=$(kubectl -n rook-ceph get cephfilesystems myfs -ojsonpath='{.spec.metadataServer.activeCount}')
+		ACTIVE_COUNT=$(kubectl_retry -n rook-ceph get cephfilesystems myfs -ojsonpath='{.spec.metadataServer.activeCount}')
 
 		ACTIVE_COUNT_NUM=$((ACTIVE_COUNT + 0))
 		echo "MDS ($FS_NAME) active_count: [$ACTIVE_COUNT_NUM]"
 		if ((ACTIVE_COUNT_NUM < 1)); then
 			continue
 		else
-			if kubectl -n rook-ceph get pod -l rook_file_system=myfs | grep Running &>/dev/null; then
+			if kubectl_retry -n rook-ceph get pod -l rook_file_system=myfs | grep Running &>/dev/null; then
 				echo "Filesystem ($FS_NAME) is successfully created..."
 				break
 			fi
@@ -133,18 +150,18 @@ function check_mds_stat() {
 function check_rbd_stat() {
 	for ((retry = 0; retry <= ROOK_DEPLOY_TIMEOUT; retry = retry + 5)); do
 		if [ -z "$1" ]; then
-			RBD_POOL_NAME=$(kubectl -n rook-ceph get cephblockpools -ojsonpath='{.items[0].metadata.name}')
+			RBD_POOL_NAME=$(kubectl_retry -n rook-ceph get cephblockpools -ojsonpath='{.items[0].metadata.name}')
 		else
 			RBD_POOL_NAME=$1
 		fi
 		echo "Checking RBD ($RBD_POOL_NAME) stats... ${retry}s" && sleep 5
 
-		TOOLBOX_POD=$(kubectl -n rook-ceph get pods -l app=rook-ceph-tools -o jsonpath='{.items[0].metadata.name}')
-		TOOLBOX_POD_STATUS=$(kubectl -n rook-ceph get pod "$TOOLBOX_POD" -ojsonpath='{.status.phase}')
+		TOOLBOX_POD=$(kubectl_retry -n rook-ceph get pods -l app=rook-ceph-tools -o jsonpath='{.items[0].metadata.name}')
+		TOOLBOX_POD_STATUS=$(kubectl_retry -n rook-ceph get pod "$TOOLBOX_POD" -ojsonpath='{.status.phase}')
 		[[ "$TOOLBOX_POD_STATUS" != "Running" ]] && \
 			{ echo "Toolbox POD ($TOOLBOX_POD) status: [$TOOLBOX_POD_STATUS]"; continue; }
 
-		if kubectl exec -n rook-ceph "$TOOLBOX_POD" -it -- rbd pool stats "$RBD_POOL_NAME" &>/dev/null; then
+		if kubectl_retry exec -n rook-ceph "$TOOLBOX_POD" -it -- rbd pool stats "$RBD_POOL_NAME" &>/dev/null; then
 			echo "RBD ($RBD_POOL_NAME) is successfully created..."
 			break
 		fi