/* Copyright 2021 The Ceph-CSI Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package e2e import ( "context" "fmt" "os" "slices" "time" appsv1 "k8s.io/api/apps/v1" autoscalingv1 "k8s.io/api/autoscaling/v1" v1 "k8s.io/api/core/v1" apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" deploymentutil "k8s.io/kubernetes/pkg/controller/deployment/util" "k8s.io/kubernetes/test/e2e/framework" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" ) // execCommandInPodWithName run command in pod using podName. func execCommandInPodWithName( f *framework.Framework, cmdString, podName, containerName, nameSpace string, ) (string, string, error) { cmd := []string{"/bin/sh", "-c", cmdString} podOpt := e2epod.ExecOptions{ Command: cmd, PodName: podName, Namespace: nameSpace, ContainerName: containerName, Stdin: nil, CaptureStdout: true, CaptureStderr: true, PreserveWhitespace: true, } return e2epod.ExecWithOptions(f, podOpt) } // loadAppDeployment loads the deployment app config and return deployment // object. func loadAppDeployment(path string) (*appsv1.Deployment, error) { deploy := appsv1.Deployment{} if err := unmarshal(path, &deploy); err != nil { return nil, err } for i := range deploy.Spec.Template.Spec.Containers { deploy.Spec.Template.Spec.Containers[i].ImagePullPolicy = v1.PullIfNotPresent } return &deploy, nil } // createDeploymentApp creates the deployment object and waits for it to be in // Available state. func createDeploymentApp(clientSet kubernetes.Interface, app *appsv1.Deployment, deployTimeout int) error { _, err := clientSet.AppsV1().Deployments(app.Namespace).Create(context.TODO(), app, metav1.CreateOptions{}) if err != nil { return fmt.Errorf("failed to create deploy: %w", err) } return waitForDeploymentInAvailableState(clientSet, app.Name, app.Namespace, deployTimeout) } // deleteDeploymentApp deletes the deployment object. func deleteDeploymentApp(clientSet kubernetes.Interface, name, ns string, deployTimeout int) error { timeout := time.Duration(deployTimeout) * time.Minute ctx := context.TODO() err := clientSet.AppsV1().Deployments(ns).Delete(ctx, name, metav1.DeleteOptions{}) if err != nil { return fmt.Errorf("failed to delete deployment: %w", err) } start := time.Now() framework.Logf("Waiting for deployment %q to be deleted", name) return wait.PollUntilContextTimeout(ctx, poll, timeout, true, func(ctx context.Context) (bool, error) { _, err := clientSet.AppsV1().Deployments(ns).Get(ctx, name, metav1.GetOptions{}) if err != nil { if isRetryableAPIError(err) { return false, nil } if apierrs.IsNotFound(err) { return true, nil } framework.Logf("%q deployment to be deleted (%d seconds elapsed)", name, int(time.Since(start).Seconds())) return false, fmt.Errorf("failed to get deployment: %w", err) } return false, nil }) } // waitForDeploymentInAvailableState wait for deployment to be in Available state. func waitForDeploymentInAvailableState(clientSet kubernetes.Interface, name, ns string, deployTimeout int) error { timeout := time.Duration(deployTimeout) * time.Minute start := time.Now() framework.Logf("Waiting up to %q to be in Available state", name) return wait.PollUntilContextTimeout(context.TODO(), poll, timeout, true, func(ctx context.Context) (bool, error) { d, err := clientSet.AppsV1().Deployments(ns).Get(ctx, name, metav1.GetOptions{}) if err != nil { if isRetryableAPIError(err) { return false, nil } if apierrs.IsNotFound(err) { return false, nil } framework.Logf("%q deployment to be Available (%d seconds elapsed)", name, int(time.Since(start).Seconds())) return false, err } cond := deploymentutil.GetDeploymentCondition(d.Status, appsv1.DeploymentAvailable) return cond != nil, nil }) } // Waits for the deployment to complete. func waitForDeploymentComplete(clientSet kubernetes.Interface, name, ns string, deployTimeout int) error { var ( deployment *appsv1.Deployment reason string err error ) timeout := time.Duration(deployTimeout) * time.Minute err = wait.PollUntilContextTimeout(context.TODO(), poll, timeout, true, func(ctx context.Context) (bool, error) { deployment, err = clientSet.AppsV1().Deployments(ns).Get(ctx, name, metav1.GetOptions{}) if err != nil { if isRetryableAPIError(err) { return false, nil } if apierrs.IsNotFound(err) { return false, nil } framework.Logf("deployment error: %v", err) return false, err } // TODO need to check rolling update // When the deployment status and its underlying resources reach the // desired state, we're done if deployment.Status.Replicas == deployment.Status.ReadyReplicas { return true, nil } framework.Logf( "deployment status: expected replica count %d running replica count %d", deployment.Status.Replicas, deployment.Status.ReadyReplicas) reason = fmt.Sprintf("deployment status: %#v", deployment.Status.String()) return false, nil }) if wait.Interrupted(err) { err = fmt.Errorf("%s", reason) } if err != nil { return fmt.Errorf("error waiting for deployment %q status to match desired state: %w", name, err) } return nil } // ResourceDeployer provides a generic interface for deploying different // resources. type ResourceDeployer interface { // Do is used to create/delete a resource with kubectl. Do(action kubectlAction) error } // yamlResource reads a YAML file and creates/deletes the resource with // kubectl. type yamlResource struct { filename string // namespace defaults to cephCSINamespace if not set namespace string // allowMissing prevents a failure in case the file is missing. allowMissing bool } func (yr *yamlResource) Do(action kubectlAction) error { data, err := os.ReadFile(yr.filename) if err != nil { if os.IsNotExist(err) && yr.allowMissing { return nil } return fmt.Errorf("failed to read content from %q: %w", yr.filename, err) } ns := cephCSINamespace if yr.namespace != "" { ns = yr.namespace } err = retryKubectlInput(ns, action, string(data), deployTimeout) if err != nil { return fmt.Errorf("failed to %s resource %q: %w", action, yr.filename, err) } return nil } // yamlResourceNamespaced takes a filename and calls // replaceNamespaceInTemplate() on it. There are several options for adjusting // templates, each has their own comment. type yamlResourceNamespaced struct { filename string namespace string domainLabel string crushLocationLabels string // set the number of replicas in a Deployment to 1. oneReplica bool // enable read affinity support (for RBD) enableReadAffinity bool } func (yrn *yamlResourceNamespaced) Do(action kubectlAction) error { data, err := replaceNamespaceInTemplate(yrn.filename) if err != nil { return fmt.Errorf("failed to read content from %q: %w", yrn.filename, err) } data = replaceLogLevelInTemplate(data) // disable VGS alpha feature, TODO: remove this in next release (3.14.0) data = disableVGSAlphaCLIArg(data) if yrn.oneReplica { data = oneReplicaDeployYaml(data) } if yrn.domainLabel != "" { data = addTopologyDomainsToDSYaml(data, yrn.domainLabel) } if yrn.enableReadAffinity { data = enableReadAffinityInTemplate(data) } if yrn.crushLocationLabels != "" { data = addCrsuhLocationLabels(data, yrn.crushLocationLabels) } err = retryKubectlInput(yrn.namespace, action, data, deployTimeout) if err != nil { return fmt.Errorf("failed to %s resource %q in namespace %q: %w", action, yrn.filename, yrn.namespace, err) } return nil } func waitForDeploymentUpdateScale( c kubernetes.Interface, ns, deploymentName string, scale *autoscalingv1.Scale, timeout int, ) error { t := time.Duration(timeout) * time.Minute start := time.Now() err := wait.PollUntilContextTimeout(context.TODO(), poll, t, true, func(ctx context.Context) (bool, error) { scaleResult, upsErr := c.AppsV1().Deployments(ns).UpdateScale(ctx, deploymentName, scale, metav1.UpdateOptions{}) if upsErr != nil { if isRetryableAPIError(upsErr) { return false, nil } framework.Logf( "Deployment UpdateScale %s/%s has not completed yet (%d seconds elapsed)", ns, deploymentName, int(time.Since(start).Seconds())) return false, fmt.Errorf("error update scale deployment %s/%s: %w", ns, deploymentName, upsErr) } if scaleResult.Spec.Replicas != scale.Spec.Replicas { framework.Logf("scale result not matching for deployment %s/%s, desired scale %d, got %d", ns, deploymentName, scale.Spec.Replicas, scaleResult.Spec.Replicas) return false, fmt.Errorf("error scale not matching in deployment %s/%s: %w", ns, deploymentName, upsErr) } return true, nil }) if err != nil { return fmt.Errorf("failed update scale deployment %s/%s: %w", ns, deploymentName, err) } return nil } func waitForDeploymentUpdate( c kubernetes.Interface, deployment *appsv1.Deployment, timeout int, ) error { t := time.Duration(timeout) * time.Minute start := time.Now() err := wait.PollUntilContextTimeout(context.TODO(), poll, t, true, func(ctx context.Context) (bool, error) { _, upErr := c.AppsV1().Deployments(deployment.Namespace).Update( ctx, deployment, metav1.UpdateOptions{}) if upErr != nil { if isRetryableAPIError(upErr) { return false, nil } framework.Logf( "Deployment Update %s/%s has not completed yet (%d seconds elapsed)", deployment.Namespace, deployment.Name, int(time.Since(start).Seconds())) return false, fmt.Errorf("error updating deployment %s/%s: %w", deployment.Namespace, deployment.Name, upErr) } return true, nil }) if err != nil { return fmt.Errorf("failed update deployment %s/%s: %w", deployment.Namespace, deployment.Name, err) } return nil } func waitForContainersArgsUpdate( c kubernetes.Interface, ns, deploymentName, key, value string, containers []string, timeout int, ) error { framework.Logf("waiting for deployment updates %s/%s", ns, deploymentName) ctx := context.TODO() // wait for the deployment to be available err := waitForDeploymentInAvailableState(c, deploymentName, ns, deployTimeout) if err != nil { return fmt.Errorf("deployment %s/%s did not become available yet: %w", ns, deploymentName, err) } // Scale down to 0. scale, err := c.AppsV1().Deployments(ns).GetScale(ctx, deploymentName, metav1.GetOptions{}) if err != nil { return fmt.Errorf("error get scale deployment %s/%s: %w", ns, deploymentName, err) } count := scale.Spec.Replicas scale.ResourceVersion = "" // indicate the scale update should be unconditional scale.Spec.Replicas = 0 err = waitForDeploymentUpdateScale(c, ns, deploymentName, scale, timeout) if err != nil { return err } // Update deployment. deployment, err := c.AppsV1().Deployments(ns).Get(ctx, deploymentName, metav1.GetOptions{}) if err != nil { return fmt.Errorf("error get deployment %s/%s: %w", ns, deploymentName, err) } cid := deployment.Spec.Template.Spec.Containers // cid: read as containers in deployment for i := range cid { if slices.Contains(containers, cid[i].Name) { match := false for j, ak := range cid[i].Args { if ak == key { // do replacement of value match = true cid[i].Args[j] = fmt.Sprintf("--%s=%s", key, value) break } } if !match { // append a new key value cid[i].Args = append(cid[i].Args, fmt.Sprintf("--%s=%s", key, value)) } deployment.Spec.Template.Spec.Containers[i].Args = cid[i].Args } } // clear creationTimestamp, generation, resourceVersion, and uid deployment.CreationTimestamp = metav1.Time{} deployment.Generation = 0 deployment.ResourceVersion = "0" deployment.UID = "" err = waitForDeploymentUpdate(c, deployment, timeout) if err != nil { return err } // Scale up to count. scale.Spec.Replicas = count err = waitForDeploymentUpdateScale(c, ns, deploymentName, scale, timeout) if err != nil { return err } // wait for scale to become count t := time.Duration(timeout) * time.Minute start := time.Now() err = wait.PollUntilContextTimeout(ctx, poll, t, true, func(ctx context.Context) (bool, error) { deploy, getErr := c.AppsV1().Deployments(ns).Get(ctx, deploymentName, metav1.GetOptions{}) if getErr != nil { if isRetryableAPIError(getErr) { return false, nil } framework.Logf( "Deployment Get %s/%s has not completed yet (%d seconds elapsed)", ns, deploymentName, int(time.Since(start).Seconds())) return false, fmt.Errorf("error getting deployment %s/%s: %w", ns, deploymentName, getErr) } if deploy.Status.Replicas != count { framework.Logf("Expected deployment %s/%s replicas %d, got %d", ns, deploymentName, count, deploy.Status.Replicas) return false, fmt.Errorf("error expected deployment %s/%s replicas %d, got %d", ns, deploymentName, count, deploy.Status.Replicas) } return true, nil }) if err != nil { return fmt.Errorf("failed getting deployment %s/%s: %w", ns, deploymentName, err) } return nil }