#!/bin/bash

KUBECTL_RETRY=5
KUBECTL_RETRY_DELAY=10

# kubectl_retry calls `kubectl` with the passed arguments. In case of a
# failure, the `kubectl` command will be retried for `KUBECTL_RETRY` times,
# with a delay of `KUBECTL_RETRY_DELAY` between them.
#
# Upon creation failures, `AlreadyExists` and `Warning` are ignored, making
# sure the create succeeds in case some objects were created successfully in a
# previous try.
#
# Upon deletion failures, the same applies as for creation, except that
# NotFound is ignored.
#
# Logs from `kubectl` are passed on to stdout, so that a calling function can
# capture it. During the function, logs are written to stderr as to not
# interfere with the log parsing of the calling function.
kubectl_retry() {
    local retries=0 action="${1}" ret=0 stdout stderr
    shift

    # temporary files for kubectl output
    stdout=$(mktemp rook-kubectl-stdout.XXXXXXXX)
    stderr=$(mktemp rook-kubectl-stderr.XXXXXXXX)

    while ! ( kubectl "${action}" "${@}" 2>"${stderr}" 1>>"${stdout}" )
    do
        # write logs to stderr and empty stderr (only)
        cat "${stdout}" > /dev/stderr
        cat "${stderr}" > /dev/stderr
        echo "$(date): 'kubectl_retry ${action} ${*}' try #${retries} failed, checking errors" > /dev/stderr

        # in case of a failure when running "create", ignore errors with "AlreadyExists"
        if [ "${action}" == 'create' ]
        then
            # count lines in stderr that do not have "AlreadyExists" or "Warning"
            ret=$(grep -cvw -e 'AlreadyExists' -e '^Warning:' "${stderr}" || true)
            if [ "${ret}" -eq 0 ]
            then
                # Success! stderr is empty after removing all "AlreadyExists" lines.
                echo "$(date): 'kubectl_retry ${action} ${*}' succeeded without unknown errors" > /dev/stderr
                break
            fi
        fi

        # in case of a failure when running "delete", ignore errors with "NotFound"
        if [ "${action}" == 'delete' ]
        then
            # count lines in stderr that do not have "NotFound" or "Warning"
            ret=$(grep -cvw -e 'NotFound' -e '^Warning:' "${stderr}" || true)
            if [ "${ret}" -eq 0 ]
            then
                # Success! stderr is empty after removing all "NotFound" lines.
                echo "$(date): 'kubectl_retry ${action} ${*}' succeeded without unknown errors" > /dev/stderr
                break
            fi
        fi

        retries=$((retries+1))
        if [ ${retries} -eq ${KUBECTL_RETRY} ]
        then
            echo "$(date): 'kubectl_retry ${action} ${*}' failed, no more retries left (${retries}/${KUBECTL_RETRY})" > /dev/stderr
            ret=1
            break
        fi

        # empty stderr for the next loop
        true > "${stderr}"
        echo "$(date): 'kubectl_retry ${action} ${*}' failed (${retries}/${KUBECTL_RETRY}), will retry in ${KUBECTL_RETRY_DELAY} seconds" > /dev/stderr

        sleep ${KUBECTL_RETRY_DELAY}

        # reset ret so that a next working kubectl does not cause a non-zero
        # return of the function
        ret=0
    done

    echo "$(date): 'kubectl_retry ${action} ${*}' done (ret=${ret})" > /dev/stderr

    # write output so that calling functions can consume it
    cat "${stdout}" > /dev/stdout
    cat "${stderr}" > /dev/stderr

    rm -f "${stdout}" "${stderr}"

    return ${ret}
}