mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-01-23 21:29:30 +00:00
113 lines
4.0 KiB
Go
113 lines
4.0 KiB
Go
|
/*
|
||
|
Copyright 2018 The Kubernetes Authors.
|
||
|
|
||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
you may not use this file except in compliance with the License.
|
||
|
You may obtain a copy of the License at
|
||
|
|
||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
Unless required by applicable law or agreed to in writing, software
|
||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
See the License for the specific language governing permissions and
|
||
|
limitations under the License.
|
||
|
*/
|
||
|
|
||
|
package upgrades
|
||
|
|
||
|
import (
|
||
|
"regexp"
|
||
|
"time"
|
||
|
|
||
|
"k8s.io/api/core/v1"
|
||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||
|
"k8s.io/kubernetes/test/e2e/scheduling"
|
||
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||
|
|
||
|
. "github.com/onsi/ginkgo"
|
||
|
. "github.com/onsi/gomega"
|
||
|
)
|
||
|
|
||
|
// NvidiaGPUUpgradeTest tests that gpu resource is available before and after
|
||
|
// a cluster upgrade.
|
||
|
type NvidiaGPUUpgradeTest struct {
|
||
|
}
|
||
|
|
||
|
func (NvidiaGPUUpgradeTest) Name() string { return "nvidia-gpu-upgrade [sig-node] [sig-scheduling]" }
|
||
|
|
||
|
// Setup creates a job requesting gpu.
|
||
|
func (t *NvidiaGPUUpgradeTest) Setup(f *framework.Framework) {
|
||
|
scheduling.SetupNVIDIAGPUNode(f, false)
|
||
|
By("Creating a job requesting gpu")
|
||
|
t.startJob(f)
|
||
|
}
|
||
|
|
||
|
// Test waits for the upgrade to complete, and then verifies that the
|
||
|
// cuda pod started by the gpu job can successfully finish.
|
||
|
func (t *NvidiaGPUUpgradeTest) Test(f *framework.Framework, done <-chan struct{}, upgrade UpgradeType) {
|
||
|
<-done
|
||
|
By("Verifying gpu job success")
|
||
|
t.verifyJobPodSuccess(f)
|
||
|
if upgrade == MasterUpgrade {
|
||
|
// MasterUpgrade should be totally hitless.
|
||
|
job, err := framework.GetJob(f.ClientSet, f.Namespace.Name, "cuda-add")
|
||
|
Expect(err).NotTo(HaveOccurred())
|
||
|
Expect(job.Status.Failed).To(BeZero(), "Job pods failed during master upgrade: %v", job.Status.Failed)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Teardown cleans up any remaining resources.
|
||
|
func (t *NvidiaGPUUpgradeTest) Teardown(f *framework.Framework) {
|
||
|
// rely on the namespace deletion to clean up everything
|
||
|
}
|
||
|
|
||
|
// startJob creates a job that requests gpu and runs a simple cuda container.
|
||
|
func (t *NvidiaGPUUpgradeTest) startJob(f *framework.Framework) {
|
||
|
var activeSeconds int64 = 3600
|
||
|
// Specifies 100 completions to make sure the job life spans across the upgrade.
|
||
|
testJob := framework.NewTestJob("succeed", "cuda-add", v1.RestartPolicyAlways, 1, 100, &activeSeconds, 6)
|
||
|
testJob.Spec.Template.Spec = v1.PodSpec{
|
||
|
RestartPolicy: v1.RestartPolicyOnFailure,
|
||
|
Containers: []v1.Container{
|
||
|
{
|
||
|
Name: "vector-addition",
|
||
|
Image: imageutils.GetE2EImage(imageutils.CudaVectorAdd),
|
||
|
Command: []string{"/bin/sh", "-c", "./vectorAdd && sleep 60"},
|
||
|
Resources: v1.ResourceRequirements{
|
||
|
Limits: v1.ResourceList{
|
||
|
framework.NVIDIAGPUResourceName: *resource.NewQuantity(1, resource.DecimalSI),
|
||
|
},
|
||
|
},
|
||
|
},
|
||
|
},
|
||
|
}
|
||
|
ns := f.Namespace.Name
|
||
|
_, err := framework.CreateJob(f.ClientSet, ns, testJob)
|
||
|
Expect(err).NotTo(HaveOccurred())
|
||
|
framework.Logf("Created job %v", testJob)
|
||
|
By("Waiting for gpu job pod start")
|
||
|
err = framework.WaitForAllJobPodsRunning(f.ClientSet, ns, testJob.Name, 1)
|
||
|
Expect(err).NotTo(HaveOccurred())
|
||
|
By("Done with gpu job pod start")
|
||
|
}
|
||
|
|
||
|
// verifyJobPodSuccess verifies that the started cuda pod successfully passes.
|
||
|
func (t *NvidiaGPUUpgradeTest) verifyJobPodSuccess(f *framework.Framework) {
|
||
|
// Wait for client pod to complete.
|
||
|
ns := f.Namespace.Name
|
||
|
err := framework.WaitForAllJobPodsRunning(f.ClientSet, f.Namespace.Name, "cuda-add", 1)
|
||
|
Expect(err).NotTo(HaveOccurred())
|
||
|
pods, err := framework.GetJobPods(f.ClientSet, f.Namespace.Name, "cuda-add")
|
||
|
Expect(err).NotTo(HaveOccurred())
|
||
|
createdPod := pods.Items[0].Name
|
||
|
framework.Logf("Created pod %v", createdPod)
|
||
|
f.PodClient().WaitForSuccess(createdPod, 5*time.Minute)
|
||
|
logs, err := framework.GetPodLogs(f.ClientSet, ns, createdPod, "vector-addition")
|
||
|
framework.ExpectNoError(err, "Should be able to get pod logs")
|
||
|
framework.Logf("Got pod logs: %v", logs)
|
||
|
regex := regexp.MustCompile("PASSED")
|
||
|
Expect(regex.MatchString(logs)).To(BeTrue())
|
||
|
}
|