ci: retry command in Pod on "unable to upgrade connection" error

Sometimes executing a command in a Pod fails with "unable to upgrade
connection". This is most likely a temporary situation, and retrying
hopefully reduces the number of spurious failures because of it.

Signed-off-by: Niels de Vos <ndevos@redhat.com>
This commit is contained in:
Niels de Vos 2022-07-26 15:28:59 +02:00 committed by mergify[bot]
parent 856d7c264c
commit a04a0ecc9f
2 changed files with 37 additions and 6 deletions

View File

@ -41,6 +41,11 @@ func isRetryableAPIError(err error) bool {
return true return true
} }
// "unable to upgrade connection" happens occasionally when executing commands in Pods
if strings.Contains(err.Error(), "unable to upgrade connection") {
return true
}
// "transport is closing" is an internal gRPC err, we can not use ErrConnClosing // "transport is closing" is an internal gRPC err, we can not use ErrConnClosing
if strings.Contains(err.Error(), "transport is closing") { if strings.Contains(err.Error(), "transport is closing") {
return true return true

View File

@ -197,7 +197,7 @@ func execCommandInDaemonsetPod(
CaptureStderr: true, CaptureStderr: true,
} }
_ /* stdout */, stderr, err := f.ExecWithOptions(podOpt) _ /* stdout */, stderr, err := execWithRetry(f, &podOpt)
return stderr, err return stderr, err
} }
@ -212,12 +212,35 @@ func listPods(f *framework.Framework, ns string, opt *metav1.ListOptions) ([]v1.
return podList.Items, err return podList.Items, err
} }
func execWithRetry(f *framework.Framework, opts *framework.ExecOptions) (string, string, error) {
timeout := time.Duration(deployTimeout) * time.Minute
var stdOut, stdErr string
err := wait.PollImmediate(poll, timeout, func() (bool, error) {
var execErr error
stdOut, stdErr, execErr = f.ExecWithOptions(*opts)
if execErr != nil {
if isRetryableAPIError(execErr) {
return false, nil
}
e2elog.Logf("failed to execute command: %v", execErr)
return false, fmt.Errorf("failed to execute command: %w", execErr)
}
return true, nil
})
return stdOut, stdErr, err
}
func execCommandInPod(f *framework.Framework, c, ns string, opt *metav1.ListOptions) (string, string, error) { func execCommandInPod(f *framework.Framework, c, ns string, opt *metav1.ListOptions) (string, string, error) {
podOpt, err := getCommandInPodOpts(f, c, ns, "", opt) podOpt, err := getCommandInPodOpts(f, c, ns, "", opt)
if err != nil { if err != nil {
return "", "", err return "", "", err
} }
stdOut, stdErr, err := f.ExecWithOptions(podOpt)
stdOut, stdErr, err := execWithRetry(f, &podOpt)
if stdErr != "" { if stdErr != "" {
e2elog.Logf("stdErr occurred: %v", stdErr) e2elog.Logf("stdErr occurred: %v", stdErr)
} }
@ -232,7 +255,8 @@ func execCommandInContainer(
if err != nil { if err != nil {
return "", "", err return "", "", err
} }
stdOut, stdErr, err := f.ExecWithOptions(podOpt)
stdOut, stdErr, err := execWithRetry(f, &podOpt)
if stdErr != "" { if stdErr != "" {
e2elog.Logf("stdErr occurred: %v", stdErr) e2elog.Logf("stdErr occurred: %v", stdErr)
} }
@ -255,7 +279,7 @@ func execCommandInContainerByPodName(
PreserveWhitespace: true, PreserveWhitespace: true,
} }
stdOut, stdErr, err := f.ExecWithOptions(execOpts) stdOut, stdErr, err := execWithRetry(f, &execOpts)
if stdErr != "" { if stdErr != "" {
e2elog.Logf("stdErr occurred: %v", stdErr) e2elog.Logf("stdErr occurred: %v", stdErr)
} }
@ -271,7 +295,8 @@ func execCommandInToolBoxPod(f *framework.Framework, c, ns string) (string, stri
if err != nil { if err != nil {
return "", "", err return "", "", err
} }
stdOut, stdErr, err := f.ExecWithOptions(podOpt)
stdOut, stdErr, err := execWithRetry(f, &podOpt)
if stdErr != "" { if stdErr != "" {
e2elog.Logf("stdErr occurred: %v", stdErr) e2elog.Logf("stdErr occurred: %v", stdErr)
} }
@ -284,7 +309,8 @@ func execCommandInPodAndAllowFail(f *framework.Framework, c, ns string, opt *met
if err != nil { if err != nil {
return "", err.Error() return "", err.Error()
} }
stdOut, stdErr, err := f.ExecWithOptions(podOpt)
stdOut, stdErr, err := execWithRetry(f, &podOpt)
if err != nil { if err != nil {
e2elog.Logf("command %s failed: %v", c, err) e2elog.Logf("command %s failed: %v", c, err)
} }