e2e: retry deploying on API-server timeouts

The upgrade-tests-cephfs fails relative regularly with the following
error during intial deployment:

    timeout waiting for deployment csi-cephfsplugin-provisioner with error error waiting for deployment "csi-cephfsplugin-provisioner" status to match expectation: etcdserver: request timed out

By detecting if the API-server returned a non-fatal error, the test does
not need to abort, but can wait for completion. PollImmediate() will
still return ErrWaitTimeout once the timeout elapsed.

Signed-off-by: Niels de Vos <ndevos@redhat.com>
This commit is contained in:
Niels de Vos 2020-10-23 11:44:35 +02:00 committed by mergify[bot]
parent b26d33b7c1
commit 48108bc549

View File

@ -59,6 +59,12 @@ func waitForDeploymentComplete(name, ns string, c kubernetes.Interface, t int) e
err = wait.PollImmediate(poll, timeout, func() (bool, error) {
deployment, err = c.AppsV1().Deployments(ns).Get(context.TODO(), name, metav1.GetOptions{})
if err != nil {
// a StatusError is not marked as 'retryable', but we want to retry anyway
if testutils.IsRetryableAPIError(err) || strings.Contains(err.Error(), "etcdserver: request timed out") {
// hide API-server timeouts, so that PollImmediate() retries
e2elog.Logf("deployment error: %v", err)
return false, nil
}
return false, err
}