From 23e0874ebfb111c7763421d406186e0be2403a66 Mon Sep 17 00:00:00 2001 From: Yug Date: Fri, 7 Aug 2020 13:55:58 +0530 Subject: [PATCH] deploy: add wrapper function of kubectl Add retries to prevent ci failure instantly. Now, the command execution will retry upto 5 times, to avoid failures in some runs. Signed-off-by: Yug --- scripts/rook.sh | 57 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/scripts/rook.sh b/scripts/rook.sh index 71adefcae..34cef1a50 100755 --- a/scripts/rook.sh +++ b/scripts/rook.sh @@ -5,14 +5,31 @@ ROOK_DEPLOY_TIMEOUT=${ROOK_DEPLOY_TIMEOUT:-300} ROOK_URL="https://raw.githubusercontent.com/rook/rook/${ROOK_VERSION}/cluster/examples/kubernetes/ceph" ROOK_BLOCK_POOL_NAME=${ROOK_BLOCK_POOL_NAME:-"newrbdpool"} ROOK_CEPH_CLUSTER_VERSION="v14.2.10" +KUBECTL_RETRY=5 +KUBECTL_RETRY_DELAY=10 rook_version() { echo "${ROOK_VERSION#v}" | cut -d'.' -f"${1}" } +kubectl_retry() { + local retries=0 + + while ! kubectl "${@}" + do + retries=$((retries+1)) + if [ ${retries} -eq ${KUBECTL_RETRY} ] + then + return 1 + fi + sleep ${KUBECTL_RETRY_DELAY} + done + return 0 +} + function deploy_rook() { - kubectl create -f "${ROOK_URL}/common.yaml" - kubectl create -f "${ROOK_URL}/operator.yaml" + kubectl_retry create -f "${ROOK_URL}/common.yaml" + kubectl_retry create -f "${ROOK_URL}/operator.yaml" # find out the rook version to decide on the ceph cluster image to be used ROOK_MAJOR=$(rook_version 1) ROOK_MINOR=$(rook_version 2) @@ -23,7 +40,7 @@ function deploy_rook() { curl -o "${TEMP_DIR}"/cluster-test.yaml "${ROOK_URL}/cluster-test.yaml" sed -i "s|image.*|${ROOK_CEPH_CLUSTER_VERSION_IMAGE_PATH}|g" "${TEMP_DIR}"/cluster-test.yaml cat "${TEMP_DIR}"/cluster-test.yaml - kubectl create -f "${TEMP_DIR}/cluster-test.yaml" + kubectl_retry create -f "${TEMP_DIR}/cluster-test.yaml" rm -rf "${TEMP_DIR}" else # add "mon_warn_on_pool_no_redundancy = false" to ceph.conf if missing @@ -33,26 +50,26 @@ function deploy_rook() { if ! grep -q mon_warn_on_pool_no_redundancy "${TEMP_DIR}"/cluster-test.yaml; then sed -i '/osd_pool_default_size =/a \ mon_warn_on_pool_no_redundancy = false' "${TEMP_DIR}"/cluster-test.yaml fi - kubectl create -f "${TEMP_DIR}/cluster-test.yaml" + kubectl_retry create -f "${TEMP_DIR}/cluster-test.yaml" rm -rf "${TEMP_DIR}" fi - kubectl create -f "${ROOK_URL}/toolbox.yaml" - kubectl create -f "${ROOK_URL}/filesystem-test.yaml" - kubectl create -f "${ROOK_URL}/pool-test.yaml" + kubectl_retry create -f "${ROOK_URL}/toolbox.yaml" + kubectl_retry create -f "${ROOK_URL}/filesystem-test.yaml" + kubectl_retry create -f "${ROOK_URL}/pool-test.yaml" # Check if CephCluster is empty - if ! kubectl -n rook-ceph get cephclusters -oyaml | grep 'items: \[\]' &>/dev/null; then + if ! kubectl_retry -n rook-ceph get cephclusters -oyaml | grep 'items: \[\]' &>/dev/null; then check_ceph_cluster_health fi # Check if CephFileSystem is empty - if ! kubectl -n rook-ceph get cephfilesystems -oyaml | grep 'items: \[\]' &>/dev/null; then + if ! kubectl_retry -n rook-ceph get cephfilesystems -oyaml | grep 'items: \[\]' &>/dev/null; then check_mds_stat fi # Check if CephBlockPool is empty - if ! kubectl -n rook-ceph get cephblockpools -oyaml | grep 'items: \[\]' &>/dev/null; then + if ! kubectl_retry -n rook-ceph get cephblockpools -oyaml | grep 'items: \[\]' &>/dev/null; then check_rbd_stat "" fi } @@ -69,7 +86,7 @@ function teardown_rook() { function create_block_pool() { curl -o newpool.yaml "${ROOK_URL}/pool-test.yaml" sed -i "s/replicapool/$ROOK_BLOCK_POOL_NAME/g" newpool.yaml - kubectl create -f "./newpool.yaml" + kubectl_retry create -f "./newpool.yaml" rm -f "./newpool.yaml" check_rbd_stat "$ROOK_BLOCK_POOL_NAME" @@ -86,8 +103,8 @@ function check_ceph_cluster_health() { for ((retry = 0; retry <= ROOK_DEPLOY_TIMEOUT; retry = retry + 5)); do echo "Wait for rook deploy... ${retry}s" && sleep 5 - CEPH_STATE=$(kubectl -n rook-ceph get cephclusters -o jsonpath='{.items[0].status.state}') - CEPH_HEALTH=$(kubectl -n rook-ceph get cephclusters -o jsonpath='{.items[0].status.ceph.health}') + CEPH_STATE=$(kubectl_retry -n rook-ceph get cephclusters -o jsonpath='{.items[0].status.state}') + CEPH_HEALTH=$(kubectl_retry -n rook-ceph get cephclusters -o jsonpath='{.items[0].status.ceph.health}') echo "Checking CEPH cluster state: [$CEPH_STATE]" if [ "$CEPH_STATE" = "Created" ]; then if [ "$CEPH_HEALTH" = "HEALTH_OK" ]; then @@ -106,17 +123,17 @@ function check_ceph_cluster_health() { function check_mds_stat() { for ((retry = 0; retry <= ROOK_DEPLOY_TIMEOUT; retry = retry + 5)); do - FS_NAME=$(kubectl -n rook-ceph get cephfilesystems.ceph.rook.io -ojsonpath='{.items[0].metadata.name}') + FS_NAME=$(kubectl_retry -n rook-ceph get cephfilesystems.ceph.rook.io -ojsonpath='{.items[0].metadata.name}') echo "Checking MDS ($FS_NAME) stats... ${retry}s" && sleep 5 - ACTIVE_COUNT=$(kubectl -n rook-ceph get cephfilesystems myfs -ojsonpath='{.spec.metadataServer.activeCount}') + ACTIVE_COUNT=$(kubectl_retry -n rook-ceph get cephfilesystems myfs -ojsonpath='{.spec.metadataServer.activeCount}') ACTIVE_COUNT_NUM=$((ACTIVE_COUNT + 0)) echo "MDS ($FS_NAME) active_count: [$ACTIVE_COUNT_NUM]" if ((ACTIVE_COUNT_NUM < 1)); then continue else - if kubectl -n rook-ceph get pod -l rook_file_system=myfs | grep Running &>/dev/null; then + if kubectl_retry -n rook-ceph get pod -l rook_file_system=myfs | grep Running &>/dev/null; then echo "Filesystem ($FS_NAME) is successfully created..." break fi @@ -133,18 +150,18 @@ function check_mds_stat() { function check_rbd_stat() { for ((retry = 0; retry <= ROOK_DEPLOY_TIMEOUT; retry = retry + 5)); do if [ -z "$1" ]; then - RBD_POOL_NAME=$(kubectl -n rook-ceph get cephblockpools -ojsonpath='{.items[0].metadata.name}') + RBD_POOL_NAME=$(kubectl_retry -n rook-ceph get cephblockpools -ojsonpath='{.items[0].metadata.name}') else RBD_POOL_NAME=$1 fi echo "Checking RBD ($RBD_POOL_NAME) stats... ${retry}s" && sleep 5 - TOOLBOX_POD=$(kubectl -n rook-ceph get pods -l app=rook-ceph-tools -o jsonpath='{.items[0].metadata.name}') - TOOLBOX_POD_STATUS=$(kubectl -n rook-ceph get pod "$TOOLBOX_POD" -ojsonpath='{.status.phase}') + TOOLBOX_POD=$(kubectl_retry -n rook-ceph get pods -l app=rook-ceph-tools -o jsonpath='{.items[0].metadata.name}') + TOOLBOX_POD_STATUS=$(kubectl_retry -n rook-ceph get pod "$TOOLBOX_POD" -ojsonpath='{.status.phase}') [[ "$TOOLBOX_POD_STATUS" != "Running" ]] && \ { echo "Toolbox POD ($TOOLBOX_POD) status: [$TOOLBOX_POD_STATUS]"; continue; } - if kubectl exec -n rook-ceph "$TOOLBOX_POD" -it -- rbd pool stats "$RBD_POOL_NAME" &>/dev/null; then + if kubectl_retry exec -n rook-ceph "$TOOLBOX_POD" -it -- rbd pool stats "$RBD_POOL_NAME" &>/dev/null; then echo "RBD ($RBD_POOL_NAME) is successfully created..." break fi