From c0fbaf4276c291e4340408906ed8bcd41f26500a Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Tue, 11 Aug 2020 11:13:29 +0200 Subject: [PATCH] ci: only retry "kubectl create" if objects are missing There can be spurious failures in the CI when running kubectl create. On occasion, the command returns with an error, but the api-server did receive and process the request. This causes a 2nd create action to fail with messages like: cephcluster.ceph.rook.io/my-cluster created Error from server: error when creating "/tmp/tmp.Ur1ZPG85o9/cluster-test.yaml": etcdserver: request timed out Error from server (AlreadyExists): error when creating "/tmp/tmp.Ur1ZPG85o9/cluster-test.yaml": configmaps "rook-config-override" already exists Error from server (AlreadyExists): error when creating "/tmp/tmp.Ur1ZPG85o9/cluster-test.yaml": cephclusters.ceph.rook.io "my-cluster" already exists Error from server (AlreadyExists): error when creating "/tmp/tmp.Ur1ZPG85o9/cluster-test.yaml": configmaps "rook-config-override" already exists Error from server (AlreadyExists): error when creating "/tmp/tmp.Ur1ZPG85o9/cluster-test.yaml": cephclusters.ceph.rook.io "my-cluster" already exists Error from server (AlreadyExists): error when creating "/tmp/tmp.Ur1ZPG85o9/cluster-test.yaml": configmaps "rook-config-override" already exists Error from server (AlreadyExists): error when creating "/tmp/tmp.Ur1ZPG85o9/cluster-test.yaml": cephclusters.ceph.rook.io "my-cluster" already exists Error from server (AlreadyExists): error when creating "/tmp/tmp.Ur1ZPG85o9/cluster-test.yaml": configmaps "rook-config-override" already exists Error from server (AlreadyExists): error when creating "/tmp/tmp.Ur1ZPG85o9/cluster-test.yaml": cephclusters.ceph.rook.io "my-cluster" already exists By handling the create action differently, and checking for the AlreadyExists word in the stderr output, it is possible to detect repeated creates that are not needed. Signed-off-by: Niels de Vos --- scripts/rook.sh | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/scripts/rook.sh b/scripts/rook.sh index 34cef1a50..9791cd191 100755 --- a/scripts/rook.sh +++ b/scripts/rook.sh @@ -13,18 +13,43 @@ rook_version() { } kubectl_retry() { - local retries=0 + local retries=0 action="${1}" ret=0 stdout stderr + shift - while ! kubectl "${@}" + # temporary files for kubectl output + stdout=$(mktemp rook-kubectl-stdout.XXXXXXXX) + stderr=$(mktemp rook-kubectl-stderr.XXXXXXXX) + + while ! kubectl "${action}" "${@}" 2>"${stderr}" 1>"${stdout}" do + # in case of a failure when running "create", ignore errors with "AlreadyExists" + if [ "${action}" == 'create' ] + then + # count lines in stderr that do not have "AlreadyExists" + ret=$(grep -cvw 'AlreadyExists' "${stderr}") + if [ "${ret}" -eq 0 ] + then + # Succes! stderr is empty after removing all "AlreadyExists" lines. + break + fi + fi + retries=$((retries+1)) if [ ${retries} -eq ${KUBECTL_RETRY} ] then - return 1 + ret=1 + break fi sleep ${KUBECTL_RETRY_DELAY} done - return 0 + + # write output so that calling functions can consume it + cat "${stdout}" > /dev/stdout + cat "${stderr}" > /dev/stderr + + rm -f "${stdout}" "${stderr}" + + return ${ret} } function deploy_rook() {