ceph-csi/scripts/utils.sh

90 lines
3.3 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
KUBECTL_RETRY=5
KUBECTL_RETRY_DELAY=10
# kubectl_retry calls `kubectl` with the passed arguments. In case of a
# failure, the `kubectl` command will be retried for `KUBECTL_RETRY` times,
# with a delay of `KUBECTL_RETRY_DELAY` between them.
#
# Upon creation failures, `AlreadyExists` and `Warning` are ignored, making
# sure the create succeeds in case some objects were created successfully in a
# previous try.
#
# Upon deletion failures, the same applies as for creation, except that
# NotFound is ignored.
#
# Logs from `kubectl` are passed on to stdout, so that a calling function can
# capture it. During the function, logs are written to stderr as to not
# interfere with the log parsing of the calling function.
kubectl_retry() {
local retries=0 action="${1}" ret=0 stdout stderr
shift
# temporary files for kubectl output
stdout=$(mktemp rook-kubectl-stdout.XXXXXXXX)
stderr=$(mktemp rook-kubectl-stderr.XXXXXXXX)
while ! ( kubectl "${action}" "${@}" 2>"${stderr}" 1>>"${stdout}" )
do
# write logs to stderr and empty stderr (only)
cat "${stdout}" > /dev/stderr
cat "${stderr}" > /dev/stderr
echo "$(date): 'kubectl_retry ${action} ${*}' try #${retries} failed, checking errors" > /dev/stderr
# in case of a failure when running "create", ignore errors with "AlreadyExists"
if [ "${action}" == 'create' ]
then
# count lines in stderr that do not have "AlreadyExists" or "Warning"
ret=$(grep -cvw -e 'AlreadyExists' -e '^Warning:' "${stderr}" || true)
if [ "${ret}" -eq 0 ]
then
# Success! stderr is empty after removing all "AlreadyExists" lines.
echo "$(date): 'kubectl_retry ${action} ${*}' succeeded without unknown errors" > /dev/stderr
break
fi
fi
# in case of a failure when running "delete", ignore errors with "NotFound"
if [ "${action}" == 'delete' ]
then
# count lines in stderr that do not have "NotFound" or "Warning"
ret=$(grep -cvw -e 'NotFound' -e '^Warning:' "${stderr}" || true)
if [ "${ret}" -eq 0 ]
then
# Success! stderr is empty after removing all "NotFound" lines.
echo "$(date): 'kubectl_retry ${action} ${*}' succeeded without unknown errors" > /dev/stderr
break
fi
fi
retries=$((retries+1))
if [ ${retries} -eq ${KUBECTL_RETRY} ]
then
echo "$(date): 'kubectl_retry ${action} ${*}' failed, no more retries left (${retries}/${KUBECTL_RETRY})" > /dev/stderr
ret=1
break
fi
# empty stderr for the next loop
true > "${stderr}"
echo "$(date): 'kubectl_retry ${action} ${*}' failed (${retries}/${KUBECTL_RETRY}), will retry in ${KUBECTL_RETRY_DELAY} seconds" > /dev/stderr
sleep ${KUBECTL_RETRY_DELAY}
# reset ret so that a next working kubectl does not cause a non-zero
# return of the function
ret=0
done
echo "$(date): 'kubectl_retry ${action} ${*}' done (ret=${ret})" > /dev/stderr
# write output so that calling functions can consume it
cat "${stdout}" > /dev/stdout
cat "${stderr}" > /dev/stderr
rm -f "${stdout}" "${stderr}"
return ${ret}
}