vendor files

This commit is contained in:
Serguei Bezverkhi
2018-01-09 13:57:14 -05:00
parent 558bc6c02a
commit 7b24313bd6
16547 changed files with 4527373 additions and 0 deletions

View File

@ -0,0 +1,109 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
size = "large",
srcs = [
"extender_test.go",
"main_test.go",
"predicates_test.go",
"preemption_test.go",
"priorities_test.go",
"scheduler_test.go",
"taint_test.go",
"volume_binding_test.go",
],
importpath = "k8s.io/kubernetes/test/integration/scheduler",
library = ":go_default_library",
tags = ["integration"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/api/testapi:go_default_library",
"//pkg/apis/componentconfig:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/client/clientset_generated/internalclientset:go_default_library",
"//pkg/client/informers/informers_generated/internalversion:go_default_library",
"//pkg/controller/node:go_default_library",
"//pkg/controller/node/ipam:go_default_library",
"//pkg/controller/volume/persistentvolume:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubeapiserver/admission:go_default_library",
"//plugin/cmd/kube-scheduler/app:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction:go_default_library",
"//plugin/pkg/scheduler:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithmprovider:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/core:go_default_library",
"//plugin/pkg/scheduler/factory:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//test/e2e/framework:go_default_library",
"//test/integration/framework:go_default_library",
"//test/utils:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["util.go"],
importpath = "k8s.io/kubernetes/test/integration/scheduler",
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/api/v1/pod:go_default_library",
"//plugin/pkg/scheduler:go_default_library",
"//plugin/pkg/scheduler/algorithmprovider:go_default_library",
"//plugin/pkg/scheduler/factory:go_default_library",
"//test/integration/framework:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)

View File

@ -0,0 +1,4 @@
approvers:
- sig-scheduling-maintainers
reviewers:
- sig-scheduling

View File

@ -0,0 +1,452 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
// This file tests scheduler extender.
import (
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/legacyscheme"
"k8s.io/kubernetes/pkg/api/testapi"
"k8s.io/kubernetes/plugin/pkg/scheduler"
_ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
e2e "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/integration/framework"
)
const (
filter = "filter"
prioritize = "prioritize"
bind = "bind"
)
type fitPredicate func(pod *v1.Pod, node *v1.Node) (bool, error)
type priorityFunc func(pod *v1.Pod, nodes *v1.NodeList) (*schedulerapi.HostPriorityList, error)
type priorityConfig struct {
function priorityFunc
weight int
}
type Extender struct {
name string
predicates []fitPredicate
prioritizers []priorityConfig
nodeCacheCapable bool
Client clientset.Interface
}
func (e *Extender) serveHTTP(t *testing.T, w http.ResponseWriter, req *http.Request) {
decoder := json.NewDecoder(req.Body)
defer req.Body.Close()
encoder := json.NewEncoder(w)
if strings.Contains(req.URL.Path, filter) || strings.Contains(req.URL.Path, prioritize) {
var args schedulerapi.ExtenderArgs
if err := decoder.Decode(&args); err != nil {
http.Error(w, "Decode error", http.StatusBadRequest)
return
}
if strings.Contains(req.URL.Path, filter) {
resp := &schedulerapi.ExtenderFilterResult{}
resp, err := e.Filter(&args)
if err != nil {
resp.Error = err.Error()
}
if err := encoder.Encode(resp); err != nil {
t.Fatalf("Failed to encode %v", resp)
}
} else if strings.Contains(req.URL.Path, prioritize) {
// Prioritize errors are ignored. Default k8s priorities or another extender's
// priorities may be applied.
priorities, _ := e.Prioritize(&args)
if err := encoder.Encode(priorities); err != nil {
t.Fatalf("Failed to encode %+v", priorities)
}
}
} else if strings.Contains(req.URL.Path, bind) {
var args schedulerapi.ExtenderBindingArgs
if err := decoder.Decode(&args); err != nil {
http.Error(w, "Decode error", http.StatusBadRequest)
return
}
resp := &schedulerapi.ExtenderBindingResult{}
if err := e.Bind(&args); err != nil {
resp.Error = err.Error()
}
if err := encoder.Encode(resp); err != nil {
t.Fatalf("Failed to encode %+v", resp)
}
} else {
http.Error(w, "Unknown method", http.StatusNotFound)
}
}
func (e *Extender) filterUsingNodeCache(args *schedulerapi.ExtenderArgs) (*schedulerapi.ExtenderFilterResult, error) {
nodeSlice := make([]string, 0)
failedNodesMap := schedulerapi.FailedNodesMap{}
for _, nodeName := range *args.NodeNames {
fits := true
for _, predicate := range e.predicates {
fit, err := predicate(&args.Pod,
&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}})
if err != nil {
return &schedulerapi.ExtenderFilterResult{
Nodes: nil,
NodeNames: nil,
FailedNodes: schedulerapi.FailedNodesMap{},
Error: err.Error(),
}, err
}
if !fit {
fits = false
break
}
}
if fits {
nodeSlice = append(nodeSlice, nodeName)
} else {
failedNodesMap[nodeName] = fmt.Sprintf("extender failed: %s", e.name)
}
}
return &schedulerapi.ExtenderFilterResult{
Nodes: nil,
NodeNames: &nodeSlice,
FailedNodes: failedNodesMap,
}, nil
}
func (e *Extender) Filter(args *schedulerapi.ExtenderArgs) (*schedulerapi.ExtenderFilterResult, error) {
filtered := []v1.Node{}
failedNodesMap := schedulerapi.FailedNodesMap{}
if e.nodeCacheCapable {
return e.filterUsingNodeCache(args)
} else {
for _, node := range args.Nodes.Items {
fits := true
for _, predicate := range e.predicates {
fit, err := predicate(&args.Pod, &node)
if err != nil {
return &schedulerapi.ExtenderFilterResult{
Nodes: &v1.NodeList{},
NodeNames: nil,
FailedNodes: schedulerapi.FailedNodesMap{},
Error: err.Error(),
}, err
}
if !fit {
fits = false
break
}
}
if fits {
filtered = append(filtered, node)
} else {
failedNodesMap[node.Name] = fmt.Sprintf("extender failed: %s", e.name)
}
}
return &schedulerapi.ExtenderFilterResult{
Nodes: &v1.NodeList{Items: filtered},
NodeNames: nil,
FailedNodes: failedNodesMap,
}, nil
}
}
func (e *Extender) Prioritize(args *schedulerapi.ExtenderArgs) (*schedulerapi.HostPriorityList, error) {
result := schedulerapi.HostPriorityList{}
combinedScores := map[string]int{}
var nodes = &v1.NodeList{Items: []v1.Node{}}
if e.nodeCacheCapable {
for _, nodeName := range *args.NodeNames {
nodes.Items = append(nodes.Items, v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}})
}
} else {
nodes = args.Nodes
}
for _, prioritizer := range e.prioritizers {
weight := prioritizer.weight
if weight == 0 {
continue
}
priorityFunc := prioritizer.function
prioritizedList, err := priorityFunc(&args.Pod, nodes)
if err != nil {
return &schedulerapi.HostPriorityList{}, err
}
for _, hostEntry := range *prioritizedList {
combinedScores[hostEntry.Host] += hostEntry.Score * weight
}
}
for host, score := range combinedScores {
result = append(result, schedulerapi.HostPriority{Host: host, Score: score})
}
return &result, nil
}
func (e *Extender) Bind(binding *schedulerapi.ExtenderBindingArgs) error {
b := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Namespace: binding.PodNamespace, Name: binding.PodName, UID: binding.PodUID},
Target: v1.ObjectReference{
Kind: "Node",
Name: binding.Node,
},
}
return e.Client.CoreV1().Pods(b.Namespace).Bind(b)
}
func machine_1_2_3_Predicate(pod *v1.Pod, node *v1.Node) (bool, error) {
if node.Name == "machine1" || node.Name == "machine2" || node.Name == "machine3" {
return true, nil
}
return false, nil
}
func machine_2_3_5_Predicate(pod *v1.Pod, node *v1.Node) (bool, error) {
if node.Name == "machine2" || node.Name == "machine3" || node.Name == "machine5" {
return true, nil
}
return false, nil
}
func machine_2_Prioritizer(pod *v1.Pod, nodes *v1.NodeList) (*schedulerapi.HostPriorityList, error) {
result := schedulerapi.HostPriorityList{}
for _, node := range nodes.Items {
score := 1
if node.Name == "machine2" {
score = 10
}
result = append(result, schedulerapi.HostPriority{
Host: node.Name,
Score: score,
})
}
return &result, nil
}
func machine_3_Prioritizer(pod *v1.Pod, nodes *v1.NodeList) (*schedulerapi.HostPriorityList, error) {
result := schedulerapi.HostPriorityList{}
for _, node := range nodes.Items {
score := 1
if node.Name == "machine3" {
score = 10
}
result = append(result, schedulerapi.HostPriority{
Host: node.Name,
Score: score,
})
}
return &result, nil
}
func TestSchedulerExtender(t *testing.T) {
_, s, closeFn := framework.RunAMaster(nil)
defer closeFn()
ns := framework.CreateTestingNamespace("scheduler-extender", s, t)
defer framework.DeleteTestingNamespace(ns, s, t)
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: testapi.Groups[v1.GroupName].GroupVersion()}})
extender1 := &Extender{
name: "extender1",
predicates: []fitPredicate{machine_1_2_3_Predicate},
prioritizers: []priorityConfig{{machine_2_Prioritizer, 1}},
}
es1 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
extender1.serveHTTP(t, w, req)
}))
defer es1.Close()
extender2 := &Extender{
name: "extender2",
predicates: []fitPredicate{machine_2_3_5_Predicate},
prioritizers: []priorityConfig{{machine_3_Prioritizer, 1}},
Client: clientSet,
}
es2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
extender2.serveHTTP(t, w, req)
}))
defer es2.Close()
extender3 := &Extender{
name: "extender3",
predicates: []fitPredicate{machine_1_2_3_Predicate},
prioritizers: []priorityConfig{{machine_2_Prioritizer, 5}},
nodeCacheCapable: true,
}
es3 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
extender3.serveHTTP(t, w, req)
}))
defer es3.Close()
policy := schedulerapi.Policy{
ExtenderConfigs: []schedulerapi.ExtenderConfig{
{
URLPrefix: es1.URL,
FilterVerb: filter,
PrioritizeVerb: prioritize,
Weight: 3,
EnableHttps: false,
},
{
URLPrefix: es2.URL,
FilterVerb: filter,
PrioritizeVerb: prioritize,
BindVerb: bind,
Weight: 4,
EnableHttps: false,
},
{
URLPrefix: es3.URL,
FilterVerb: filter,
PrioritizeVerb: prioritize,
Weight: 10,
EnableHttps: false,
NodeCacheCapable: true,
},
},
}
policy.APIVersion = testapi.Groups[v1.GroupName].GroupVersion().String()
informerFactory := informers.NewSharedInformerFactory(clientSet, 0)
schedulerConfigFactory := factory.NewConfigFactory(
v1.DefaultSchedulerName,
clientSet,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
informerFactory.Extensions().V1beta1().ReplicaSets(),
informerFactory.Apps().V1beta1().StatefulSets(),
informerFactory.Core().V1().Services(),
informerFactory.Policy().V1beta1().PodDisruptionBudgets(),
informerFactory.Storage().V1().StorageClasses(),
v1.DefaultHardPodAffinitySymmetricWeight,
enableEquivalenceCache,
)
schedulerConfig, err := schedulerConfigFactory.CreateFromConfig(policy)
if err != nil {
t.Fatalf("Couldn't create scheduler config: %v", err)
}
eventBroadcaster := record.NewBroadcaster()
schedulerConfig.Recorder = eventBroadcaster.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: v1.DefaultSchedulerName})
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(clientSet.CoreV1().RESTClient()).Events("")})
scheduler, _ := scheduler.NewFromConfigurator(&scheduler.FakeConfigurator{Config: schedulerConfig}, nil...)
informerFactory.Start(schedulerConfig.StopEverything)
scheduler.Run()
defer close(schedulerConfig.StopEverything)
DoTestPodScheduling(ns, t, clientSet)
}
func DoTestPodScheduling(ns *v1.Namespace, t *testing.T, cs clientset.Interface) {
// NOTE: This test cannot run in parallel, because it is creating and deleting
// non-namespaced objects (Nodes).
defer cs.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
goodCondition := v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
Reason: fmt.Sprintf("schedulable condition"),
LastHeartbeatTime: metav1.Time{Time: time.Now()},
}
node := &v1.Node{
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
},
Conditions: []v1.NodeCondition{goodCondition},
},
}
for ii := 0; ii < 5; ii++ {
node.Name = fmt.Sprintf("machine%d", ii+1)
if _, err := cs.CoreV1().Nodes().Create(node); err != nil {
t.Fatalf("Failed to create nodes: %v", err)
}
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "extender-test-pod"},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: e2e.GetPauseImageName(cs)}},
},
}
myPod, err := cs.CoreV1().Pods(ns.Name).Create(pod)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
err = wait.Poll(time.Second, wait.ForeverTestTimeout, podScheduled(cs, myPod.Namespace, myPod.Name))
if err != nil {
t.Fatalf("Failed to schedule pod: %v", err)
}
myPod, err = cs.CoreV1().Pods(ns.Name).Get(myPod.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get pod: %v", err)
} else if myPod.Spec.NodeName != "machine2" {
t.Fatalf("Failed to schedule using extender, expected machine2, got %v", myPod.Spec.NodeName)
}
var gracePeriod int64
if err := cs.CoreV1().Pods(ns.Name).Delete(myPod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod}); err != nil {
t.Fatalf("Failed to delete pod: %v", err)
}
_, err = cs.CoreV1().Pods(ns.Name).Get(myPod.Name, metav1.GetOptions{})
if err == nil {
t.Fatalf("Failed to delete pod: %v", err)
}
t.Logf("Scheduled pod using extenders")
}

View File

@ -0,0 +1,27 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"testing"
"k8s.io/kubernetes/test/integration/framework"
)
func TestMain(m *testing.M) {
framework.EtcdMain(m.Run)
}

View File

@ -0,0 +1,872 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/test/integration/framework"
testutils "k8s.io/kubernetes/test/utils"
)
// This file tests the scheduler predicates functionality.
const pollInterval = 100 * time.Millisecond
// TestInterPodAffinity verifies that scheduler's inter pod affinity and
// anti-affinity predicate functions works correctly.
func TestInterPodAffinity(t *testing.T) {
context := initTest(t, "inter-pod-affinity")
defer cleanupTest(t, context)
// Add a few nodes.
nodes, err := createNodes(context.clientSet, "testnode", nil, 2)
if err != nil {
t.Fatalf("Cannot create nodes: %v", err)
}
// Add labels to the nodes.
labels1 := map[string]string{
"region": "r1",
"zone": "z11",
}
for _, node := range nodes {
if err = testutils.AddLabelsToNode(context.clientSet, node.Name, labels1); err != nil {
t.Fatalf("Cannot add labels to node: %v", err)
}
if err = waitForNodeLabels(context.clientSet, node.Name, labels1); err != nil {
t.Fatalf("Adding labels to node didn't succeed: %v", err)
}
}
cs := context.clientSet
podLabel := map[string]string{"service": "securityscan"}
// podLabel2 := map[string]string{"security": "S1"}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
node *v1.Node
fits bool
errorType string
test string
}{
/*{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpDoesNotExist,
Values: []string{"securityscan"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
node: nodes[0],
fits: false,
errorType: "invalidPod",
test: "validates that a pod with an invalid podAffinity is rejected because of the LabelSelectorRequirement is invalid",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
node: nodes[0],
fits: false,
test: "validates that Inter-pod-Affinity is respected if not matching",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: nodes[0].Name,
},
},
},
node: nodes[0],
fits: true,
test: "validates that InterPodAffinity is respected if matching. requiredDuringSchedulingIgnoredDuringExecution in PodAffinity using In operator that matches the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"securityscan3", "value3"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: nodes[0].Name},
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: true,
test: "validates that InterPodAffinity is respected if matching. requiredDuringSchedulingIgnoredDuringExecution in PodAffinity using not in operator in labelSelector that matches the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
Namespaces: []string{"diff-namespace"},
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: nodes[0].Name},
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel, Namespace: "ns"}}},
node: nodes[0],
fits: false,
test: "validates that inter-pod-affinity is respected when pods have different Namespaces",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"antivirusscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: false,
test: "Doesn't satisfy the PodAffinity because of unmatching labelSelector with the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpExists,
}, {
Key: "wrongkey",
Operator: metav1.LabelSelectorOpDoesNotExist,
},
},
},
TopologyKey: "region",
}, {
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan"},
}, {
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"WrongValue"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: true,
test: "validates that InterPodAffinity is respected if matching with multiple affinities in multiple RequiredDuringSchedulingIgnoredDuringExecution ",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: podLabel2,
Name: "fakename",
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpExists,
}, {
Key: "wrongkey",
Operator: metav1.LabelSelectorOpDoesNotExist,
},
},
},
TopologyKey: "region",
}, {
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan2"},
}, {
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"WrongValue"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: false,
test: "The labelSelector requirements(items of matchExpressions) are ANDed, the pod cannot schedule onto the node because one of the matchExpression items doesn't match.",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"antivirusscan", "value2"},
},
},
},
TopologyKey: "node",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: true,
test: "validates that InterPod Affinity and AntiAffinity is respected if matching",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"antivirusscan", "value2"},
},
},
},
TopologyKey: "node",
},
},
},
},
},
},
pods: []*v1.Pod{
{
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: nodes[0].Name,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"antivirusscan", "value2"},
},
},
},
TopologyKey: "node",
},
},
},
},
},
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel},
},
},
node: nodes[0],
fits: true,
test: "satisfies the PodAffinity and PodAntiAffinity and PodAntiAffinity symmetry with the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel2,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: false,
test: "satisfies the PodAffinity but doesn't satisfies the PodAntiAffinity with the existing pod",
},*/
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"antivirusscan", "value2"},
},
},
},
TopologyKey: "node",
},
},
},
},
},
},
pods: []*v1.Pod{
{
Spec: v1.PodSpec{
NodeName: nodes[0].Name,
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value3"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel},
},
},
node: nodes[0],
fits: false,
test: "satisfies the PodAffinity and PodAntiAffinity but doesn't satisfies PodAntiAffinity symmetry with the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel,
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel}}},
node: nodes[0],
fits: false,
test: "pod matches its own Label in PodAffinity and that matches the existing pod Labels",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fakename",
Labels: podLabel,
},
Spec: v1.PodSpec{Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}}},
},
pods: []*v1.Pod{
{
Spec: v1.PodSpec{NodeName: nodes[0].Name,
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
ObjectMeta: metav1.ObjectMeta{
Name: "fakename2",
Labels: podLabel},
},
},
node: nodes[0],
fits: false,
test: "Verify that PodAntiAffinity of an existing pod is respected when PodAntiAffinity symmetry is not satisfied with the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "fake-name",
Labels: podLabel,
},
Spec: v1.PodSpec{Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}}},
},
pods: []*v1.Pod{
{
Spec: v1.PodSpec{NodeName: nodes[0].Name,
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
ObjectMeta: metav1.ObjectMeta{
Name: "fake-name2",
Labels: podLabel},
},
},
node: nodes[0],
fits: true,
test: "Verify that PodAntiAffinity from existing pod is respected when pod statisfies PodAntiAffinity symmetry with the existing pod",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "fake-name2"},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeSelector: map[string]string{"region": "r1"},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"abc"},
},
},
},
TopologyKey: "region",
},
},
},
},
},
},
pods: []*v1.Pod{
{Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "container", Image: framework.GetPauseImageName(cs)}},
NodeName: nodes[0].Name}, ObjectMeta: metav1.ObjectMeta{Name: "fakename", Labels: map[string]string{"foo": "abc"}}},
},
fits: false,
test: "nodes[0] and nodes[1] have same topologyKey and label value. nodes[0] has an existing pod that matches the inter pod affinity rule. The new pod can not be scheduled onto either of the two nodes.",
},
}
for _, test := range tests {
for _, pod := range test.pods {
var nsName string
if pod.Namespace != "" {
nsName = pod.Namespace
} else {
nsName = context.ns.Name
}
createdPod, err := cs.CoreV1().Pods(nsName).Create(pod)
if err != nil {
t.Fatalf("Test Failed: error, %v, while creating pod during test: %v", err, test.test)
}
err = wait.Poll(pollInterval, wait.ForeverTestTimeout, podScheduled(cs, createdPod.Namespace, createdPod.Name))
if err != nil {
t.Errorf("Test Failed: error, %v, while waiting for pod during test, %v", err, test)
}
}
testPod, err := cs.CoreV1().Pods(context.ns.Name).Create(test.pod)
if err != nil {
if !(test.errorType == "invalidPod" && errors.IsInvalid(err)) {
t.Fatalf("Test Failed: error, %v, while creating pod during test: %v", err, test.test)
}
}
if test.fits {
err = wait.Poll(pollInterval, wait.ForeverTestTimeout, podScheduled(cs, testPod.Namespace, testPod.Name))
} else {
err = wait.Poll(pollInterval, wait.ForeverTestTimeout, podUnschedulable(cs, testPod.Namespace, testPod.Name))
}
if err != nil {
t.Errorf("Test Failed: %v, err %v, test.fits %v", test.test, err, test.fits)
}
err = cs.CoreV1().Pods(context.ns.Name).Delete(test.pod.Name, metav1.NewDeleteOptions(0))
if err != nil {
t.Errorf("Test Failed: error, %v, while deleting pod during test: %v", err, test.test)
}
err = wait.Poll(pollInterval, wait.ForeverTestTimeout, podDeleted(cs, context.ns.Name, test.pod.Name))
if err != nil {
t.Errorf("Test Failed: error, %v, while waiting for pod to get deleted, %v", err, test.test)
}
for _, pod := range test.pods {
var nsName string
if pod.Namespace != "" {
nsName = pod.Namespace
} else {
nsName = context.ns.Name
}
err = cs.CoreV1().Pods(nsName).Delete(pod.Name, metav1.NewDeleteOptions(0))
if err != nil {
t.Errorf("Test Failed: error, %v, while deleting pod during test: %v", err, test.test)
}
err = wait.Poll(pollInterval, wait.ForeverTestTimeout, podDeleted(cs, nsName, pod.Name))
if err != nil {
t.Errorf("Test Failed: error, %v, while waiting for pod to get deleted, %v", err, test.test)
}
}
}
}

View File

@ -0,0 +1,769 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This file tests preemption functionality of the scheduler.
package scheduler
import (
"fmt"
"testing"
"time"
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/pkg/features"
_ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider"
"k8s.io/kubernetes/plugin/pkg/scheduler/core"
testutils "k8s.io/kubernetes/test/utils"
"github.com/golang/glog"
)
var lowPriority, mediumPriority, highPriority = int32(100), int32(200), int32(300)
func waitForNominatedNodeAnnotation(cs clientset.Interface, pod *v1.Pod) error {
if err := wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, func() (bool, error) {
pod, err := cs.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{})
if err != nil {
return false, err
}
annot, found := pod.Annotations[core.NominatedNodeAnnotationKey]
if found && len(annot) > 0 {
return true, nil
}
return false, err
}); err != nil {
return fmt.Errorf("Pod %v annotation did not get set: %v", pod.Name, err)
}
return nil
}
// TestPreemption tests a few preemption scenarios.
func TestPreemption(t *testing.T) {
// Enable PodPriority feature gate.
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
// Initialize scheduler.
context := initTest(t, "preemption")
defer cleanupTest(t, context)
cs := context.clientSet
defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100, resource.BinarySI)},
}
tests := []struct {
description string
existingPods []*v1.Pod
pod *v1.Pod
preemptedPodIndexes map[int]struct{}
}{
{
description: "basic pod preemption",
existingPods: []*v1.Pod{
initPausePod(context.clientSet, &pausePodConfig{
Name: "victim-pod",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
},
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
preemptedPodIndexes: map[int]struct{}{0: {}},
},
{
description: "preemption is performed to satisfy anti-affinity",
existingPods: []*v1.Pod{
initPausePod(cs, &pausePodConfig{
Name: "pod-0", Namespace: context.ns.Name,
Priority: &mediumPriority,
Labels: map[string]string{"pod": "p0"},
Resources: defaultPodRes,
}),
initPausePod(cs, &pausePodConfig{
Name: "pod-1", Namespace: context.ns.Name,
Priority: &lowPriority,
Labels: map[string]string{"pod": "p1"},
Resources: defaultPodRes,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "pod",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"preemptor"},
},
},
},
TopologyKey: "node",
},
},
},
},
}),
},
// A higher priority pod with anti-affinity.
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Labels: map[string]string{"pod": "preemptor"},
Resources: defaultPodRes,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "pod",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"p0"},
},
},
},
TopologyKey: "node",
},
},
},
},
}),
preemptedPodIndexes: map[int]struct{}{0: {}, 1: {}},
},
{
// This is similar to the previous case only pod-1 is high priority.
description: "preemption is not performed when anti-affinity is not satisfied",
existingPods: []*v1.Pod{
initPausePod(cs, &pausePodConfig{
Name: "pod-0", Namespace: context.ns.Name,
Priority: &mediumPriority,
Labels: map[string]string{"pod": "p0"},
Resources: defaultPodRes,
}),
initPausePod(cs, &pausePodConfig{
Name: "pod-1", Namespace: context.ns.Name,
Priority: &highPriority,
Labels: map[string]string{"pod": "p1"},
Resources: defaultPodRes,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "pod",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"preemptor"},
},
},
},
TopologyKey: "node",
},
},
},
},
}),
},
// A higher priority pod with anti-affinity.
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Labels: map[string]string{"pod": "preemptor"},
Resources: defaultPodRes,
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "pod",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"p0"},
},
},
},
TopologyKey: "node",
},
},
},
},
}),
preemptedPodIndexes: map[int]struct{}{},
},
}
// Create a node with some resources and a label.
nodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
}
node, err := createNode(context.clientSet, "node1", nodeRes)
if err != nil {
t.Fatalf("Error creating nodes: %v", err)
}
nodeLabels := map[string]string{"node": node.Name}
if err = testutils.AddLabelsToNode(context.clientSet, node.Name, nodeLabels); err != nil {
t.Fatalf("Cannot add labels to node: %v", err)
}
if err = waitForNodeLabels(context.clientSet, node.Name, nodeLabels); err != nil {
t.Fatalf("Adding labels to node didn't succeed: %v", err)
}
for _, test := range tests {
pods := make([]*v1.Pod, len(test.existingPods))
// Create and run existingPods.
for i, p := range test.existingPods {
pods[i], err = runPausePod(cs, p)
if err != nil {
t.Fatalf("Test [%v]: Error running pause pod: %v", test.description, err)
}
}
// Create the "pod".
preemptor, err := createPausePod(cs, test.pod)
if err != nil {
t.Errorf("Error while creating high priority pod: %v", err)
}
// Wait for preemption of pods and make sure the other ones are not preempted.
for i, p := range pods {
if _, found := test.preemptedPodIndexes[i]; found {
if err = wait.Poll(time.Second, wait.ForeverTestTimeout, podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
t.Errorf("Test [%v]: Pod %v is not getting evicted.", test.description, p.Name)
}
} else {
if p.DeletionTimestamp != nil {
t.Errorf("Test [%v]: Didn't expect pod %v to get preempted.", test.description, p.Name)
}
}
}
// Also check that the preemptor pod gets the annotation for nominated node name.
if len(test.preemptedPodIndexes) > 0 {
if err := waitForNominatedNodeAnnotation(cs, preemptor); err != nil {
t.Errorf("Test [%v]: NominatedNodeName annotation was not set for pod %v: %v", test.description, preemptor.Name, err)
}
}
// Cleanup
pods = append(pods, preemptor)
cleanupPods(cs, t, pods)
}
}
func mkPriorityPodWithGrace(tc *TestContext, name string, priority int32, grace int64) *v1.Pod {
defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100, resource.BinarySI)},
}
pod := initPausePod(tc.clientSet, &pausePodConfig{
Name: name,
Namespace: tc.ns.Name,
Priority: &priority,
Labels: map[string]string{"pod": name},
Resources: defaultPodRes,
})
// Setting grace period to zero. Otherwise, we may never see the actual deletion
// of the pods in integration tests.
pod.Spec.TerminationGracePeriodSeconds = &grace
return pod
}
// This test ensures that while the preempting pod is waiting for the victims to
// terminate, other pending lower priority pods are not scheduled in the room created
// after preemption and while the higher priority pods is not scheduled yet.
func TestPreemptionStarvation(t *testing.T) {
// Enable PodPriority feature gate.
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
// Initialize scheduler.
context := initTest(t, "preemption")
defer cleanupTest(t, context)
cs := context.clientSet
tests := []struct {
description string
numExistingPod int
numExpectedPending int
preemptor *v1.Pod
}{
{
// This test ensures that while the preempting pod is waiting for the victims
// terminate, other lower priority pods are not scheduled in the room created
// after preemption and while the higher priority pods is not scheduled yet.
description: "starvation test: higher priority pod is scheduled before the lower priority ones",
numExistingPod: 10,
numExpectedPending: 5,
preemptor: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
},
}
// Create a node with some resources and a label.
nodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
}
_, err := createNode(context.clientSet, "node1", nodeRes)
if err != nil {
t.Fatalf("Error creating nodes: %v", err)
}
for _, test := range tests {
pendingPods := make([]*v1.Pod, test.numExpectedPending)
numRunningPods := test.numExistingPod - test.numExpectedPending
runningPods := make([]*v1.Pod, numRunningPods)
// Create and run existingPods.
for i := 0; i < numRunningPods; i++ {
runningPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(context, fmt.Sprintf("rpod-%v", i), mediumPriority, 0))
if err != nil {
t.Fatalf("Test [%v]: Error creating pause pod: %v", test.description, err)
}
}
// make sure that runningPods are all scheduled.
for _, p := range runningPods {
if err := waitForPodToSchedule(cs, p); err != nil {
t.Fatalf("Pod %v didn't get scheduled: %v", p.Name, err)
}
}
// Create pending pods.
for i := 0; i < test.numExpectedPending; i++ {
pendingPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(context, fmt.Sprintf("ppod-%v", i), mediumPriority, 0))
if err != nil {
t.Fatalf("Test [%v]: Error creating pending pod: %v", test.description, err)
}
}
// Make sure that all pending pods are being marked unschedulable.
for _, p := range pendingPods {
if err := wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout,
podUnschedulable(cs, p.Namespace, p.Name)); err != nil {
t.Errorf("Pod %v didn't get marked unschedulable: %v", p.Name, err)
}
}
// Create the preemptor.
preemptor, err := createPausePod(cs, test.preemptor)
if err != nil {
t.Errorf("Error while creating the preempting pod: %v", err)
}
// Check that the preemptor pod gets the annotation for nominated node name.
if err := waitForNominatedNodeAnnotation(cs, preemptor); err != nil {
t.Errorf("Test [%v]: NominatedNodeName annotation was not set for pod %v: %v", test.description, preemptor.Name, err)
}
// Make sure that preemptor is scheduled after preemptions.
if err := waitForPodToScheduleWithTimeout(cs, preemptor, 60*time.Second); err != nil {
t.Errorf("Preemptor pod %v didn't get scheduled: %v", preemptor.Name, err)
}
// Cleanup
glog.Info("Cleaning up all pods...")
allPods := pendingPods
allPods = append(allPods, runningPods...)
allPods = append(allPods, preemptor)
cleanupPods(cs, t, allPods)
}
}
// TestNominatedNodeCleanUp checks that when there are nominated pods on a
// node and a higher priority pod is nominated to run on the node, the nominated
// node name of the lower priority pods is cleared.
// Test scenario:
// 1. Create a few low priority pods with long grade period that fill up a node.
// 2. Create a medium priority pod that preempt some of those pods.
// 3. Check that nominated node name of the medium priority pod is set.
// 4. Create a high priority pod that preempts some pods on that node.
// 5. Check that nominated node name of the high priority pod is set and nominated
// node name of the medium priority pod is cleared.
func TestNominatedNodeCleanUp(t *testing.T) {
// Enable PodPriority feature gate.
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
// Initialize scheduler.
context := initTest(t, "preemption")
defer cleanupTest(t, context)
cs := context.clientSet
// Create a node with some resources and a label.
nodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
}
_, err := createNode(context.clientSet, "node1", nodeRes)
if err != nil {
t.Fatalf("Error creating nodes: %v", err)
}
// Step 1. Create a few low priority pods.
lowPriPods := make([]*v1.Pod, 4)
for i := 0; i < len(lowPriPods); i++ {
lowPriPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(context, fmt.Sprintf("lpod-%v", i), lowPriority, 60))
if err != nil {
t.Fatalf("Error creating pause pod: %v", err)
}
}
// make sure that the pods are all scheduled.
for _, p := range lowPriPods {
if err := waitForPodToSchedule(cs, p); err != nil {
t.Fatalf("Pod %v didn't get scheduled: %v", p.Name, err)
}
}
// Step 2. Create a medium priority pod.
podConf := initPausePod(cs, &pausePodConfig{
Name: "medium-priority",
Namespace: context.ns.Name,
Priority: &mediumPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(400, resource.BinarySI)},
},
})
medPriPod, err := createPausePod(cs, podConf)
if err != nil {
t.Errorf("Error while creating the medium priority pod: %v", err)
}
// Step 3. Check that nominated node name of the medium priority pod is set.
if err := waitForNominatedNodeAnnotation(cs, medPriPod); err != nil {
t.Errorf("NominatedNodeName annotation was not set for pod %v: %v", medPriPod.Name, err)
}
// Step 4. Create a high priority pod.
podConf = initPausePod(cs, &pausePodConfig{
Name: "high-priority",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
})
highPriPod, err := createPausePod(cs, podConf)
if err != nil {
t.Errorf("Error while creating the high priority pod: %v", err)
}
// Step 5. Check that nominated node name of the high priority pod is set.
if err := waitForNominatedNodeAnnotation(cs, highPriPod); err != nil {
t.Errorf("NominatedNodeName annotation was not set for pod %v: %v", medPriPod.Name, err)
}
// And the nominated node name of the medium priority pod is cleared.
if err := wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, func() (bool, error) {
pod, err := cs.CoreV1().Pods(medPriPod.Namespace).Get(medPriPod.Name, metav1.GetOptions{})
if err != nil {
t.Errorf("Error getting the medium priority pod info: %v", err)
}
n, found := pod.Annotations[core.NominatedNodeAnnotationKey]
if !found || len(n) == 0 {
return true, nil
}
return false, err
}); err != nil {
t.Errorf("The nominated node name of the medium priority pod was not cleared: %v", err)
}
}
func mkMinAvailablePDB(name, namespace string, minAvailable int, matchLabels map[string]string) *policy.PodDisruptionBudget {
intMinAvailable := intstr.FromInt(minAvailable)
return &policy.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
Spec: policy.PodDisruptionBudgetSpec{
MinAvailable: &intMinAvailable,
Selector: &metav1.LabelSelector{MatchLabels: matchLabels},
},
}
}
// TestPDBInPreemption tests PodDisruptionBudget support in preemption.
func TestPDBInPreemption(t *testing.T) {
// Enable PodPriority feature gate.
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority))
// Initialize scheduler.
context := initTest(t, "preemption-pdb")
defer cleanupTest(t, context)
cs := context.clientSet
defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100, resource.BinarySI)},
}
defaultNodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(500, resource.BinarySI),
}
type nodeConfig struct {
name string
res *v1.ResourceList
}
tests := []struct {
description string
nodes []*nodeConfig
pdbs []*policy.PodDisruptionBudget
existingPods []*v1.Pod
pod *v1.Pod
preemptedPodIndexes map[int]struct{}
}{
{
description: "A non-PDB violating pod is preempted despite its higher priority",
nodes: []*nodeConfig{{name: "node-1", res: defaultNodeRes}},
pdbs: []*policy.PodDisruptionBudget{
mkMinAvailablePDB("pdb-1", context.ns.Name, 2, map[string]string{"foo": "bar"}),
},
existingPods: []*v1.Pod{
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod1",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
Labels: map[string]string{"foo": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod2",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
Labels: map[string]string{"foo": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "mid-pod3",
Namespace: context.ns.Name,
Priority: &mediumPriority,
Resources: defaultPodRes,
}),
},
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
preemptedPodIndexes: map[int]struct{}{2: {}},
},
{
description: "A node without any PDB violating pods is preferred for preemption",
nodes: []*nodeConfig{
{name: "node-1", res: defaultNodeRes},
{name: "node-2", res: defaultNodeRes},
},
pdbs: []*policy.PodDisruptionBudget{
mkMinAvailablePDB("pdb-1", context.ns.Name, 2, map[string]string{"foo": "bar"}),
},
existingPods: []*v1.Pod{
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod1",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-1",
Labels: map[string]string{"foo": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "mid-pod2",
Namespace: context.ns.Name,
Priority: &mediumPriority,
NodeName: "node-2",
Resources: defaultPodRes,
}),
},
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
preemptedPodIndexes: map[int]struct{}{1: {}},
},
{
description: "A node with fewer PDB violating pods is preferred for preemption",
nodes: []*nodeConfig{
{name: "node-1", res: defaultNodeRes},
{name: "node-2", res: defaultNodeRes},
{name: "node-3", res: defaultNodeRes},
},
pdbs: []*policy.PodDisruptionBudget{
mkMinAvailablePDB("pdb-1", context.ns.Name, 2, map[string]string{"foo1": "bar"}),
mkMinAvailablePDB("pdb-2", context.ns.Name, 2, map[string]string{"foo2": "bar"}),
},
existingPods: []*v1.Pod{
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod1",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-1",
Labels: map[string]string{"foo1": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "mid-pod1",
Namespace: context.ns.Name,
Priority: &mediumPriority,
Resources: defaultPodRes,
NodeName: "node-1",
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod2",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-2",
Labels: map[string]string{"foo2": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "mid-pod2",
Namespace: context.ns.Name,
Priority: &mediumPriority,
Resources: defaultPodRes,
NodeName: "node-2",
Labels: map[string]string{"foo2": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod4",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-3",
Labels: map[string]string{"foo2": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod5",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-3",
Labels: map[string]string{"foo2": "bar"},
}),
initPausePod(context.clientSet, &pausePodConfig{
Name: "low-pod6",
Namespace: context.ns.Name,
Priority: &lowPriority,
Resources: defaultPodRes,
NodeName: "node-3",
Labels: map[string]string{"foo2": "bar"},
}),
},
pod: initPausePod(cs, &pausePodConfig{
Name: "preemptor-pod",
Namespace: context.ns.Name,
Priority: &highPriority,
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(200, resource.BinarySI)},
},
}),
preemptedPodIndexes: map[int]struct{}{0: {}, 1: {}},
},
}
for _, test := range tests {
for _, nodeConf := range test.nodes {
_, err := createNode(cs, nodeConf.name, nodeConf.res)
if err != nil {
t.Fatalf("Error creating node %v: %v", nodeConf.name, err)
}
}
// Create PDBs.
for _, pdb := range test.pdbs {
_, err := context.clientSet.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).Create(pdb)
if err != nil {
t.Fatalf("Failed to create PDB: %v", err)
}
}
// Wait for PDBs to show up in the scheduler's cache.
if err := wait.Poll(time.Second, 15*time.Second, func() (bool, error) {
cachedPDBs, err := context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if err != nil {
t.Errorf("Error while polling for PDB: %v", err)
return false, err
}
return len(cachedPDBs) == len(test.pdbs), err
}); err != nil {
t.Fatalf("Not all PDBs were added to the cache: %v", err)
}
pods := make([]*v1.Pod, len(test.existingPods))
var err error
// Create and run existingPods.
for i, p := range test.existingPods {
if pods[i], err = runPausePod(cs, p); err != nil {
t.Fatalf("Test [%v]: Error running pause pod: %v", test.description, err)
}
}
// Create the "pod".
preemptor, err := createPausePod(cs, test.pod)
if err != nil {
t.Errorf("Error while creating high priority pod: %v", err)
}
// Wait for preemption of pods and make sure the other ones are not preempted.
for i, p := range pods {
if _, found := test.preemptedPodIndexes[i]; found {
if err = wait.Poll(time.Second, wait.ForeverTestTimeout, podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
t.Errorf("Test [%v]: Pod %v is not getting evicted.", test.description, p.Name)
}
} else {
if p.DeletionTimestamp != nil {
t.Errorf("Test [%v]: Didn't expect pod %v to get preempted.", test.description, p.Name)
}
}
}
// Also check that the preemptor pod gets the annotation for nominated node name.
if len(test.preemptedPodIndexes) > 0 {
if err := waitForNominatedNodeAnnotation(cs, preemptor); err != nil {
t.Errorf("Test [%v]: NominatedNodeName annotation was not set for pod %v: %v", test.description, preemptor.Name, err)
}
}
// Cleanup
pods = append(pods, preemptor)
cleanupPods(cs, t, pods)
cs.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).DeleteCollection(nil, metav1.ListOptions{})
cs.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
}
}

View File

@ -0,0 +1,174 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
testutils "k8s.io/kubernetes/test/utils"
)
// This file tests the scheduler priority functions.
// TestNodeAffinity verifies that scheduler's node affinity priority function
// works correctly.
func TestNodeAffinity(t *testing.T) {
context := initTest(t, "node-affinity")
defer cleanupTest(t, context)
// Add a few nodes.
nodes, err := createNodes(context.clientSet, "testnode", nil, 5)
if err != nil {
t.Fatalf("Cannot create nodes: %v", err)
}
// Add a label to one of the nodes.
labeledNode := nodes[1]
labelKey := "kubernetes.io/node-topologyKey"
labelValue := "topologyvalue"
labels := map[string]string{
labelKey: labelValue,
}
if err = testutils.AddLabelsToNode(context.clientSet, labeledNode.Name, labels); err != nil {
t.Fatalf("Cannot add labels to node: %v", err)
}
if err = waitForNodeLabels(context.clientSet, labeledNode.Name, labels); err != nil {
t.Fatalf("Adding labels to node didn't succeed: %v", err)
}
// Create a pod with node affinity.
podName := "pod-with-node-affinity"
pod, err := runPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{
Name: podName,
Namespace: context.ns.Name,
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{
{
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: labelKey,
Operator: v1.NodeSelectorOpIn,
Values: []string{labelValue},
},
},
},
Weight: 20,
},
},
},
},
}))
if err != nil {
t.Fatalf("Error running pause pod: %v", err)
}
if pod.Spec.NodeName != labeledNode.Name {
t.Errorf("Pod %v got scheduled on an unexpected node: %v. Expected node: %v.", podName, pod.Spec.NodeName, labeledNode.Name)
} else {
t.Logf("Pod %v got successfully scheduled on node %v.", podName, pod.Spec.NodeName)
}
}
// TestPodAffinity verifies that scheduler's pod affinity priority function
// works correctly.
func TestPodAffinity(t *testing.T) {
context := initTest(t, "pod-affinity")
defer cleanupTest(t, context)
// Add a few nodes.
nodesInTopology, err := createNodes(context.clientSet, "in-topology", nil, 5)
if err != nil {
t.Fatalf("Cannot create nodes: %v", err)
}
topologyKey := "node-topologykey"
topologyValue := "topologyvalue"
nodeLabels := map[string]string{
topologyKey: topologyValue,
}
for _, node := range nodesInTopology {
// Add topology key to all the nodes.
if err = testutils.AddLabelsToNode(context.clientSet, node.Name, nodeLabels); err != nil {
t.Fatalf("Cannot add labels to node %v: %v", node.Name, err)
}
if err = waitForNodeLabels(context.clientSet, node.Name, nodeLabels); err != nil {
t.Fatalf("Adding labels to node %v didn't succeed: %v", node.Name, err)
}
}
// Add a pod with a label and wait for it to schedule.
labelKey := "service"
labelValue := "S1"
_, err = runPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{
Name: "attractor-pod",
Namespace: context.ns.Name,
Labels: map[string]string{labelKey: labelValue},
}))
if err != nil {
t.Fatalf("Error running the attractor pod: %v", err)
}
// Add a few more nodes without the topology label.
_, err = createNodes(context.clientSet, "other-node", nil, 5)
if err != nil {
t.Fatalf("Cannot create the second set of nodes: %v", err)
}
// Add a new pod with affinity to the attractor pod.
podName := "pod-with-podaffinity"
pod, err := runPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{
Name: podName,
Namespace: context.ns.Name,
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: labelKey,
Operator: metav1.LabelSelectorOpIn,
Values: []string{labelValue, "S3"},
},
{
Key: labelKey,
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"S2"},
}, {
Key: labelKey,
Operator: metav1.LabelSelectorOpExists,
},
},
},
TopologyKey: topologyKey,
Namespaces: []string{context.ns.Name},
},
Weight: 50,
},
},
},
},
}))
if err != nil {
t.Fatalf("Error running pause pod: %v", err)
}
// The new pod must be scheduled on one of the nodes with the same topology
// key-value as the attractor pod.
for _, node := range nodesInTopology {
if node.Name == pod.Spec.NodeName {
t.Logf("Pod %v got successfully scheduled on node %v.", podName, pod.Spec.NodeName)
return
}
}
t.Errorf("Pod %v got scheduled on an unexpected node: %v.", podName, pod.Spec.NodeName)
}

View File

@ -0,0 +1,704 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
// This file tests the scheduler.
import (
"fmt"
"reflect"
"testing"
"time"
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
clientv1core "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/legacyscheme"
"k8s.io/kubernetes/pkg/api/testapi"
"k8s.io/kubernetes/pkg/apis/componentconfig"
schedulerapp "k8s.io/kubernetes/plugin/cmd/kube-scheduler/app"
"k8s.io/kubernetes/plugin/pkg/scheduler"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
_ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"k8s.io/kubernetes/test/integration/framework"
)
const enableEquivalenceCache = true
type nodeMutationFunc func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface)
type nodeStateManager struct {
makeSchedulable nodeMutationFunc
makeUnSchedulable nodeMutationFunc
}
func PredicateOne(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
return true, nil, nil
}
func PredicateTwo(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
return true, nil, nil
}
func PriorityOne(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
return []schedulerapi.HostPriority{}, nil
}
func PriorityTwo(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
return []schedulerapi.HostPriority{}, nil
}
// TestSchedulerCreationFromConfigMap verifies that scheduler can be created
// from configurations provided by a ConfigMap object and then verifies that the
// configuration is applied correctly.
func TestSchedulerCreationFromConfigMap(t *testing.T) {
_, s, closeFn := framework.RunAMaster(nil)
defer closeFn()
ns := framework.CreateTestingNamespace("configmap", s, t)
defer framework.DeleteTestingNamespace(ns, s, t)
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: testapi.Groups[v1.GroupName].GroupVersion()}})
defer clientSet.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
informerFactory := informers.NewSharedInformerFactory(clientSet, 0)
// Pre-register some predicate and priority functions
factory.RegisterFitPredicate("PredicateOne", PredicateOne)
factory.RegisterFitPredicate("PredicateTwo", PredicateTwo)
factory.RegisterPriorityFunction("PriorityOne", PriorityOne, 1)
factory.RegisterPriorityFunction("PriorityTwo", PriorityTwo, 1)
// Add a ConfigMap object.
configPolicyName := "scheduler-custom-policy-config"
policyConfigMap := v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceSystem, Name: configPolicyName},
Data: map[string]string{
componentconfig.SchedulerPolicyConfigMapKey: `{
"kind" : "Policy",
"apiVersion" : "v1",
"predicates" : [
{"name" : "PredicateOne"},
{"name" : "PredicateTwo"}
],
"priorities" : [
{"name" : "PriorityOne", "weight" : 1},
{"name" : "PriorityTwo", "weight" : 5}
]
}`,
},
}
policyConfigMap.APIVersion = testapi.Groups[v1.GroupName].GroupVersion().String()
clientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Create(&policyConfigMap)
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartRecordingToSink(&clientv1core.EventSinkImpl{Interface: clientv1core.New(clientSet.CoreV1().RESTClient()).Events("")})
ss := &schedulerapp.SchedulerServer{
SchedulerName: v1.DefaultSchedulerName,
AlgorithmSource: componentconfig.SchedulerAlgorithmSource{
Policy: &componentconfig.SchedulerPolicySource{
ConfigMap: &componentconfig.SchedulerPolicyConfigMapSource{
Namespace: policyConfigMap.Namespace,
Name: policyConfigMap.Name,
},
},
},
HardPodAffinitySymmetricWeight: v1.DefaultHardPodAffinitySymmetricWeight,
Client: clientSet,
InformerFactory: informerFactory,
PodInformer: factory.NewPodInformer(clientSet, 0, v1.DefaultSchedulerName),
EventClient: clientSet.CoreV1(),
Recorder: eventBroadcaster.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: v1.DefaultSchedulerName}),
Broadcaster: eventBroadcaster,
}
config, err := ss.SchedulerConfig()
if err != nil {
t.Fatalf("couldn't make scheduler config: %v", err)
}
// Verify that the config is applied correctly.
schedPredicates := config.Algorithm.Predicates()
schedPrioritizers := config.Algorithm.Prioritizers()
// Includes one mandatory predicates.
if len(schedPredicates) != 3 || len(schedPrioritizers) != 2 {
t.Errorf("Unexpected number of predicates or priority functions. Number of predicates: %v, number of prioritizers: %v", len(schedPredicates), len(schedPrioritizers))
}
// Check a predicate and a priority function.
if schedPredicates["PredicateTwo"] == nil {
t.Errorf("Expected to have a PodFitsHostPorts predicate.")
}
if schedPrioritizers[1].Function == nil || schedPrioritizers[1].Weight != 5 {
t.Errorf("Unexpected prioritizer: func: %v, weight: %v", schedPrioritizers[1].Function, schedPrioritizers[1].Weight)
}
}
// TestSchedulerCreationFromNonExistentConfigMap ensures that creation of the
// scheduler from a non-existent ConfigMap fails.
func TestSchedulerCreationFromNonExistentConfigMap(t *testing.T) {
_, s, closeFn := framework.RunAMaster(nil)
defer closeFn()
ns := framework.CreateTestingNamespace("configmap", s, t)
defer framework.DeleteTestingNamespace(ns, s, t)
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: testapi.Groups[v1.GroupName].GroupVersion()}})
defer clientSet.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
informerFactory := informers.NewSharedInformerFactory(clientSet, 0)
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartRecordingToSink(&clientv1core.EventSinkImpl{Interface: clientv1core.New(clientSet.CoreV1().RESTClient()).Events("")})
ss := &schedulerapp.SchedulerServer{
SchedulerName: v1.DefaultSchedulerName,
AlgorithmSource: componentconfig.SchedulerAlgorithmSource{
Policy: &componentconfig.SchedulerPolicySource{
ConfigMap: &componentconfig.SchedulerPolicyConfigMapSource{
Namespace: "non-existent-config",
Name: "non-existent-config",
},
},
},
HardPodAffinitySymmetricWeight: v1.DefaultHardPodAffinitySymmetricWeight,
Client: clientSet,
InformerFactory: informerFactory,
PodInformer: factory.NewPodInformer(clientSet, 0, v1.DefaultSchedulerName),
EventClient: clientSet.CoreV1(),
Recorder: eventBroadcaster.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: v1.DefaultSchedulerName}),
Broadcaster: eventBroadcaster,
}
_, err := ss.SchedulerConfig()
if err == nil {
t.Fatalf("Creation of scheduler didn't fail while the policy ConfigMap didn't exist.")
}
}
func TestUnschedulableNodes(t *testing.T) {
context := initTest(t, "unschedulable-nodes")
defer cleanupTest(t, context)
nodeLister := context.schedulerConfigFactory.GetNodeLister()
// NOTE: This test cannot run in parallel, because it is creating and deleting
// non-namespaced objects (Nodes).
defer context.clientSet.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
goodCondition := v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
Reason: fmt.Sprintf("schedulable condition"),
LastHeartbeatTime: metav1.Time{Time: time.Now()},
}
badCondition := v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
Reason: fmt.Sprintf("unschedulable condition"),
LastHeartbeatTime: metav1.Time{Time: time.Now()},
}
// Create a new schedulable node, since we're first going to apply
// the unschedulable condition and verify that pods aren't scheduled.
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node-scheduling-test-node"},
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
},
Conditions: []v1.NodeCondition{goodCondition},
},
}
nodeKey, err := cache.MetaNamespaceKeyFunc(node)
if err != nil {
t.Fatalf("Couldn't retrieve key for node %v", node.Name)
}
// The test does the following for each nodeStateManager in this list:
// 1. Create a new node
// 2. Apply the makeUnSchedulable function
// 3. Create a new pod
// 4. Check that the pod doesn't get assigned to the node
// 5. Apply the schedulable function
// 6. Check that the pod *does* get assigned to the node
// 7. Delete the pod and node.
nodeModifications := []nodeStateManager{
// Test node.Spec.Unschedulable=true/false
{
makeUnSchedulable: func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface) {
n.Spec.Unschedulable = true
if _, err := c.CoreV1().Nodes().Update(n); err != nil {
t.Fatalf("Failed to update node with unschedulable=true: %v", err)
}
err = waitForReflection(t, nodeLister, nodeKey, func(node interface{}) bool {
// An unschedulable node should still be present in the store
// Nodes that are unschedulable or that are not ready or
// have their disk full (Node.Spec.Conditions) are excluded
// based on NodeConditionPredicate, a separate check
return node != nil && node.(*v1.Node).Spec.Unschedulable == true
})
if err != nil {
t.Fatalf("Failed to observe reflected update for setting unschedulable=true: %v", err)
}
},
makeSchedulable: func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface) {
n.Spec.Unschedulable = false
if _, err := c.CoreV1().Nodes().Update(n); err != nil {
t.Fatalf("Failed to update node with unschedulable=false: %v", err)
}
err = waitForReflection(t, nodeLister, nodeKey, func(node interface{}) bool {
return node != nil && node.(*v1.Node).Spec.Unschedulable == false
})
if err != nil {
t.Fatalf("Failed to observe reflected update for setting unschedulable=false: %v", err)
}
},
},
// Test node.Status.Conditions=ConditionTrue/Unknown
{
makeUnSchedulable: func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface) {
n.Status = v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
},
Conditions: []v1.NodeCondition{badCondition},
}
if _, err = c.CoreV1().Nodes().UpdateStatus(n); err != nil {
t.Fatalf("Failed to update node with bad status condition: %v", err)
}
err = waitForReflection(t, nodeLister, nodeKey, func(node interface{}) bool {
return node != nil && node.(*v1.Node).Status.Conditions[0].Status == v1.ConditionUnknown
})
if err != nil {
t.Fatalf("Failed to observe reflected update for status condition update: %v", err)
}
},
makeSchedulable: func(t *testing.T, n *v1.Node, nodeLister corelisters.NodeLister, c clientset.Interface) {
n.Status = v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
},
Conditions: []v1.NodeCondition{goodCondition},
}
if _, err = c.CoreV1().Nodes().UpdateStatus(n); err != nil {
t.Fatalf("Failed to update node with healthy status condition: %v", err)
}
err = waitForReflection(t, nodeLister, nodeKey, func(node interface{}) bool {
return node != nil && node.(*v1.Node).Status.Conditions[0].Status == v1.ConditionTrue
})
if err != nil {
t.Fatalf("Failed to observe reflected update for status condition update: %v", err)
}
},
},
}
for i, mod := range nodeModifications {
unSchedNode, err := context.clientSet.CoreV1().Nodes().Create(node)
if err != nil {
t.Fatalf("Failed to create node: %v", err)
}
// Apply the unschedulable modification to the node, and wait for the reflection
mod.makeUnSchedulable(t, unSchedNode, nodeLister, context.clientSet)
// Create the new pod, note that this needs to happen post unschedulable
// modification or we have a race in the test.
myPod, err := createPausePodWithResource(context.clientSet, "node-scheduling-test-pod", context.ns.Name, nil)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
// There are no schedulable nodes - the pod shouldn't be scheduled.
err = waitForPodToScheduleWithTimeout(context.clientSet, myPod, 2*time.Second)
if err == nil {
t.Errorf("Pod scheduled successfully on unschedulable nodes")
}
if err != wait.ErrWaitTimeout {
t.Errorf("Test %d: failed while trying to confirm the pod does not get scheduled on the node: %v", i, err)
} else {
t.Logf("Test %d: Pod did not get scheduled on an unschedulable node", i)
}
// Apply the schedulable modification to the node, and wait for the reflection
schedNode, err := context.clientSet.CoreV1().Nodes().Get(unSchedNode.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get node: %v", err)
}
mod.makeSchedulable(t, schedNode, nodeLister, context.clientSet)
// Wait until the pod is scheduled.
if err := waitForPodToSchedule(context.clientSet, myPod); err != nil {
t.Errorf("Test %d: failed to schedule a pod: %v", i, err)
} else {
t.Logf("Test %d: Pod got scheduled on a schedulable node", i)
}
// Clean up.
if err := deletePod(context.clientSet, myPod.Name, myPod.Namespace); err != nil {
t.Errorf("Failed to delete pod: %v", err)
}
err = context.clientSet.CoreV1().Nodes().Delete(schedNode.Name, nil)
if err != nil {
t.Errorf("Failed to delete node: %v", err)
}
}
}
func TestMultiScheduler(t *testing.T) {
/*
This integration tests the multi-scheduler feature in the following way:
1. create a default scheduler
2. create a node
3. create 3 pods: testPodNoAnnotation, testPodWithAnnotationFitsDefault and testPodWithAnnotationFitsFoo
- note: the first two should be picked and scheduled by default scheduler while the last one should be
picked by scheduler of name "foo-scheduler" which does not exist yet.
4. **check point-1**:
- testPodNoAnnotation, testPodWithAnnotationFitsDefault should be scheduled
- testPodWithAnnotationFitsFoo should NOT be scheduled
5. create a scheduler with name "foo-scheduler"
6. **check point-2**:
- testPodWithAnnotationFitsFoo should be scheduled
7. stop default scheduler
8. create 2 pods: testPodNoAnnotation2 and testPodWithAnnotationFitsDefault2
- note: these two pods belong to default scheduler which no longer exists
9. **check point-3**:
- testPodNoAnnotation2 and testPodWithAnnotationFitsDefault2 should NOT be scheduled
*/
// 1. create and start default-scheduler
context := initTest(t, "multi-scheduler")
defer cleanupTest(t, context)
// 2. create a node
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node-multi-scheduler-test-node"},
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
},
},
}
context.clientSet.CoreV1().Nodes().Create(node)
// 3. create 3 pods for testing
t.Logf("create 3 pods for testing")
testPod, err := createPausePodWithResource(context.clientSet, "pod-without-scheduler-name", context.ns.Name, nil)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
defaultScheduler := "default-scheduler"
testPodFitsDefault, err := createPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{Name: "pod-fits-default", Namespace: context.ns.Name, SchedulerName: defaultScheduler}))
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
fooScheduler := "foo-scheduler"
testPodFitsFoo, err := createPausePod(context.clientSet, initPausePod(context.clientSet, &pausePodConfig{Name: "pod-fits-foo", Namespace: context.ns.Name, SchedulerName: fooScheduler}))
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
// 4. **check point-1**:
// - testPod, testPodFitsDefault should be scheduled
// - testPodFitsFoo should NOT be scheduled
t.Logf("wait for pods scheduled")
if err := waitForPodToSchedule(context.clientSet, testPod); err != nil {
t.Errorf("Test MultiScheduler: %s Pod not scheduled: %v", testPod.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod scheduled", testPod.Name)
}
if err := waitForPodToSchedule(context.clientSet, testPodFitsDefault); err != nil {
t.Errorf("Test MultiScheduler: %s Pod not scheduled: %v", testPodFitsDefault.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod scheduled", testPodFitsDefault.Name)
}
if err := waitForPodToScheduleWithTimeout(context.clientSet, testPodFitsFoo, time.Second*5); err == nil {
t.Errorf("Test MultiScheduler: %s Pod got scheduled, %v", testPodFitsFoo.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod not scheduled", testPodFitsFoo.Name)
}
// 5. create and start a scheduler with name "foo-scheduler"
clientSet2 := clientset.NewForConfigOrDie(&restclient.Config{Host: context.httpServer.URL, ContentConfig: restclient.ContentConfig{GroupVersion: testapi.Groups[v1.GroupName].GroupVersion()}})
informerFactory2 := informers.NewSharedInformerFactory(context.clientSet, 0)
podInformer2 := factory.NewPodInformer(context.clientSet, 0, fooScheduler)
schedulerConfigFactory2 := factory.NewConfigFactory(
fooScheduler,
clientSet2,
informerFactory2.Core().V1().Nodes(),
podInformer2,
informerFactory2.Core().V1().PersistentVolumes(),
informerFactory2.Core().V1().PersistentVolumeClaims(),
informerFactory2.Core().V1().ReplicationControllers(),
informerFactory2.Extensions().V1beta1().ReplicaSets(),
informerFactory2.Apps().V1beta1().StatefulSets(),
informerFactory2.Core().V1().Services(),
informerFactory2.Policy().V1beta1().PodDisruptionBudgets(),
informerFactory2.Storage().V1().StorageClasses(),
v1.DefaultHardPodAffinitySymmetricWeight,
enableEquivalenceCache,
)
schedulerConfig2, err := schedulerConfigFactory2.Create()
if err != nil {
t.Errorf("Couldn't create scheduler config: %v", err)
}
eventBroadcaster2 := record.NewBroadcaster()
schedulerConfig2.Recorder = eventBroadcaster2.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: fooScheduler})
eventBroadcaster2.StartRecordingToSink(&clientv1core.EventSinkImpl{Interface: clientv1core.New(clientSet2.CoreV1().RESTClient()).Events("")})
go podInformer2.Informer().Run(schedulerConfig2.StopEverything)
informerFactory2.Start(schedulerConfig2.StopEverything)
sched2, _ := scheduler.NewFromConfigurator(&scheduler.FakeConfigurator{Config: schedulerConfig2}, nil...)
sched2.Run()
defer close(schedulerConfig2.StopEverything)
// 6. **check point-2**:
// - testPodWithAnnotationFitsFoo should be scheduled
err = waitForPodToSchedule(context.clientSet, testPodFitsFoo)
if err != nil {
t.Errorf("Test MultiScheduler: %s Pod not scheduled, %v", testPodFitsFoo.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod scheduled", testPodFitsFoo.Name)
}
// 7. delete the pods that were scheduled by the default scheduler, and stop the default scheduler
if err := deletePod(context.clientSet, testPod.Name, context.ns.Name); err != nil {
t.Errorf("Failed to delete pod: %v", err)
}
if err := deletePod(context.clientSet, testPodFitsDefault.Name, context.ns.Name); err != nil {
t.Errorf("Failed to delete pod: %v", err)
}
// The rest of this test assumes that closing StopEverything will cause the
// scheduler thread to stop immediately. It won't, and in fact it will often
// schedule 1 more pod before finally exiting. Comment out until we fix that.
//
// See https://github.com/kubernetes/kubernetes/issues/23715 for more details.
/*
close(schedulerConfig.StopEverything)
// 8. create 2 pods: testPodNoAnnotation2 and testPodWithAnnotationFitsDefault2
// - note: these two pods belong to default scheduler which no longer exists
podWithNoAnnotation2 := createPod("pod-with-no-annotation2", nil)
podWithAnnotationFitsDefault2 := createPod("pod-with-annotation-fits-default2", schedulerAnnotationFitsDefault)
testPodNoAnnotation2, err := clientSet.CoreV1().Pods(ns.Name).Create(podWithNoAnnotation2)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
testPodWithAnnotationFitsDefault2, err := clientSet.CoreV1().Pods(ns.Name).Create(podWithAnnotationFitsDefault2)
if err != nil {
t.Fatalf("Failed to create pod: %v", err)
}
// 9. **check point-3**:
// - testPodNoAnnotation2 and testPodWithAnnotationFitsDefault2 should NOT be scheduled
err = wait.Poll(time.Second, time.Second*5, podScheduled(clientSet, testPodNoAnnotation2.Namespace, testPodNoAnnotation2.Name))
if err == nil {
t.Errorf("Test MultiScheduler: %s Pod got scheduled, %v", testPodNoAnnotation2.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod not scheduled", testPodNoAnnotation2.Name)
}
err = wait.Poll(time.Second, time.Second*5, podScheduled(clientSet, testPodWithAnnotationFitsDefault2.Namespace, testPodWithAnnotationFitsDefault2.Name))
if err == nil {
t.Errorf("Test MultiScheduler: %s Pod got scheduled, %v", testPodWithAnnotationFitsDefault2.Name, err)
} else {
t.Logf("Test MultiScheduler: %s Pod scheduled", testPodWithAnnotationFitsDefault2.Name)
}
*/
}
// This test will verify scheduler can work well regardless of whether kubelet is allocatable aware or not.
func TestAllocatable(t *testing.T) {
context := initTest(t, "allocatable")
defer cleanupTest(t, context)
// 2. create a node without allocatable awareness
nodeRes := &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(30, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(30, resource.BinarySI),
}
allocNode, err := createNode(context.clientSet, "node-allocatable-scheduler-test-node", nodeRes)
if err != nil {
t.Fatalf("Failed to create node: %v", err)
}
// 3. create resource pod which requires less than Capacity
podName := "pod-test-allocatable"
podRes := &v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(20, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(20, resource.BinarySI),
}
testAllocPod, err := createPausePodWithResource(context.clientSet, podName, context.ns.Name, podRes)
if err != nil {
t.Fatalf("Test allocatable unawareness failed to create pod: %v", err)
}
// 4. Test: this test pod should be scheduled since api-server will use Capacity as Allocatable
err = waitForPodToScheduleWithTimeout(context.clientSet, testAllocPod, time.Second*5)
if err != nil {
t.Errorf("Test allocatable unawareness: %s Pod not scheduled: %v", testAllocPod.Name, err)
} else {
t.Logf("Test allocatable unawareness: %s Pod scheduled", testAllocPod.Name)
}
// 5. Change the node status to allocatable aware, note that Allocatable is less than Pod's requirement
allocNode.Status = v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(30, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(30, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(10, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(10, resource.BinarySI),
},
}
if _, err := context.clientSet.CoreV1().Nodes().UpdateStatus(allocNode); err != nil {
t.Fatalf("Failed to update node with Status.Allocatable: %v", err)
}
if err := deletePod(context.clientSet, testAllocPod.Name, context.ns.Name); err != nil {
t.Fatalf("Failed to remove the first pod: %v", err)
}
// 6. Make another pod with different name, same resource request
podName2 := "pod-test-allocatable2"
testAllocPod2, err := createPausePodWithResource(context.clientSet, podName2, context.ns.Name, podRes)
if err != nil {
t.Fatalf("Test allocatable awareness failed to create pod: %v", err)
}
// 7. Test: this test pod should not be scheduled since it request more than Allocatable
if err := waitForPodToScheduleWithTimeout(context.clientSet, testAllocPod2, time.Second*5); err == nil {
t.Errorf("Test allocatable awareness: %s Pod got scheduled unexpectedly, %v", testAllocPod2.Name, err)
} else {
t.Logf("Test allocatable awareness: %s Pod not scheduled as expected", testAllocPod2.Name)
}
}
// TestPDBCache verifies that scheduler cache works as expected when handling
// PodDisruptionBudget.
func TestPDBCache(t *testing.T) {
context := initTest(t, "pdbcache")
defer cleanupTest(t, context)
intstrMin := intstr.FromInt(4)
pdb := &policy.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Namespace: context.ns.Name,
Name: "test-pdb",
Labels: map[string]string{"tkey1": "tval1", "tkey2": "tval2"},
},
Spec: policy.PodDisruptionBudgetSpec{
MinAvailable: &intstrMin,
Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"tkey": "tvalue"}},
},
}
createdPDB, err := context.clientSet.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).Create(pdb)
if err != nil {
t.Errorf("Failed to create PDB: %v", err)
}
// Wait for PDB to show up in the scheduler's cache.
if err = wait.Poll(time.Second, 15*time.Second, func() (bool, error) {
cachedPDBs, err := context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if err != nil {
t.Errorf("Error while polling for PDB: %v", err)
return false, err
}
return len(cachedPDBs) > 0, err
}); err != nil {
t.Fatalf("No PDB was added to the cache: %v", err)
}
// Read PDB from the cache and compare it.
cachedPDBs, err := context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if len(cachedPDBs) != 1 {
t.Fatalf("Expected to have 1 pdb in cache, but found %d.", len(cachedPDBs))
}
if !reflect.DeepEqual(createdPDB, cachedPDBs[0]) {
t.Errorf("Got different PDB than expected.\nDifference detected on:\n%s", diff.ObjectReflectDiff(createdPDB, cachedPDBs[0]))
}
// Update PDB and change its labels.
pdbCopy := *cachedPDBs[0]
pdbCopy.Labels = map[string]string{}
updatedPDB, err := context.clientSet.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).Update(&pdbCopy)
if err != nil {
t.Errorf("Failed to update PDB: %v", err)
}
// Wait for PDB to be updated in the scheduler's cache.
if err = wait.Poll(time.Second, 15*time.Second, func() (bool, error) {
cachedPDBs, err := context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if err != nil {
t.Errorf("Error while polling for PDB: %v", err)
return false, err
}
return len(cachedPDBs[0].Labels) == 0, err
}); err != nil {
t.Fatalf("No PDB was updated in the cache: %v", err)
}
// Read PDB from the cache and compare it.
cachedPDBs, err = context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if len(cachedPDBs) != 1 {
t.Errorf("Expected to have 1 pdb in cache, but found %d.", len(cachedPDBs))
}
if !reflect.DeepEqual(updatedPDB, cachedPDBs[0]) {
t.Errorf("Got different PDB than expected.\nDifference detected on:\n%s", diff.ObjectReflectDiff(updatedPDB, cachedPDBs[0]))
}
// Delete PDB.
err = context.clientSet.PolicyV1beta1().PodDisruptionBudgets(context.ns.Name).Delete(pdb.Name, &metav1.DeleteOptions{})
if err != nil {
t.Errorf("Failed to delete PDB: %v", err)
}
// Wait for PDB to be deleted from the scheduler's cache.
if err = wait.Poll(time.Second, 15*time.Second, func() (bool, error) {
cachedPDBs, err := context.scheduler.Config().SchedulerCache.ListPDBs(labels.Everything())
if err != nil {
t.Errorf("Error while polling for PDB: %v", err)
return false, err
}
return len(cachedPDBs) == 0, err
}); err != nil {
t.Errorf("No PDB was deleted from the cache: %v", err)
}
}

View File

@ -0,0 +1,303 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
// This file tests the Taint feature.
import (
"net/http"
"net/http/httptest"
"reflect"
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/testapi"
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
internalinformers "k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion"
"k8s.io/kubernetes/pkg/controller/node"
"k8s.io/kubernetes/pkg/controller/node/ipam"
kubeadmission "k8s.io/kubernetes/pkg/kubeapiserver/admission"
"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
"k8s.io/kubernetes/plugin/pkg/scheduler"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider"
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
"k8s.io/kubernetes/test/integration/framework"
)
// TestTaintNodeByCondition verifies:
// 1. MemoryPressure Toleration is added to non-BestEffort Pod by PodTolerationRestriction
// 2. NodeController taints nodes by node condition
// 3. Scheduler allows pod to tolerate node condition taints, e.g. network unavailabe
func TestTaintNodeByCondition(t *testing.T) {
h := &framework.MasterHolder{Initialized: make(chan struct{})}
s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
<-h.Initialized
h.M.GenericAPIServer.Handler.ServeHTTP(w, req)
}))
// Enable TaintNodeByCondition
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=True")
// Build clientset and informers for controllers.
internalClientset := internalclientset.NewForConfigOrDie(&restclient.Config{QPS: -1, Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: testapi.Groups[v1.GroupName].GroupVersion()}})
internalInformers := internalinformers.NewSharedInformerFactory(internalClientset, time.Second)
clientset := clientset.NewForConfigOrDie(&restclient.Config{QPS: -1, Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: testapi.Groups[v1.GroupName].GroupVersion()}})
informers := informers.NewSharedInformerFactory(clientset, time.Second)
// Build PodToleration Admission.
admission := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
kubeadmission.WantsInternalKubeClientSet(admission).SetInternalKubeClientSet(internalClientset)
kubeadmission.WantsInternalKubeInformerFactory(admission).SetInternalKubeInformerFactory(internalInformers)
// Start master with admission.
masterConfig := framework.NewIntegrationTestMasterConfig()
masterConfig.GenericConfig.AdmissionControl = admission
_, _, closeFn := framework.RunAMasterUsingServer(masterConfig, s, h)
defer closeFn()
nsName := "default"
controllerCh := make(chan struct{})
defer close(controllerCh)
// Start NodeController for taint.
nc, err := node.NewNodeController(
informers.Core().V1().Pods(),
informers.Core().V1().Nodes(),
informers.Extensions().V1beta1().DaemonSets(),
nil, // CloudProvider
clientset,
time.Second, // Pod eviction timeout
100, // Eviction limiter QPS
100, // Secondary eviction limiter QPS
100, // Large cluster threshold
100, // Unhealthy zone threshold
time.Second, // Node monitor grace period
time.Second, // Node startup grace period
time.Second, // Node monitor period
nil, // Cluster CIDR
nil, // Service CIDR
0, // Node CIDR mask size
false, // Allocate node CIDRs
ipam.RangeAllocatorType, // Allocator type
true, // Run taint manger
true, // Enabled taint based eviction
true, // Enabled TaintNodeByCondition feature
)
if err != nil {
t.Errorf("Failed to create node controller: %v", err)
return
}
go nc.Run(controllerCh)
// Apply feature gates to enable TaintNodesByCondition
algorithmprovider.ApplyFeatureGates()
// Start scheduler
configurator := factory.NewConfigFactory(
v1.DefaultSchedulerName,
clientset,
informers.Core().V1().Nodes(),
informers.Core().V1().Pods(),
informers.Core().V1().PersistentVolumes(),
informers.Core().V1().PersistentVolumeClaims(),
informers.Core().V1().ReplicationControllers(),
informers.Extensions().V1beta1().ReplicaSets(),
informers.Apps().V1beta1().StatefulSets(),
informers.Core().V1().Services(),
informers.Policy().V1beta1().PodDisruptionBudgets(),
informers.Storage().V1().StorageClasses(),
v1.DefaultHardPodAffinitySymmetricWeight,
true, // Enable EqualCache by default.
)
sched, err := scheduler.NewFromConfigurator(configurator, func(cfg *scheduler.Config) {
cfg.StopEverything = controllerCh
cfg.Recorder = &record.FakeRecorder{}
})
if err != nil {
t.Errorf("Failed to create scheduler: %v.", err)
return
}
go sched.Run()
// Waiting for all controller sync.
informers.Start(controllerCh)
internalInformers.Start(controllerCh)
informers.WaitForCacheSync(controllerCh)
internalInformers.WaitForCacheSync(controllerCh)
// -------------------------------------------
// Test TaintNodeByCondition feature.
// -------------------------------------------
memoryPressureToleration := v1.Toleration{
Key: algorithm.TaintNodeMemoryPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
}
// Case 1: Add MememoryPressure Toleration for non-BestEffort pod.
burstablePod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "burstable-pod",
Namespace: nsName,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "busybox",
Image: "busybox",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
},
},
},
},
},
}
burstablePodInServ, err := clientset.CoreV1().Pods(nsName).Create(burstablePod)
if err != nil {
t.Errorf("Case 1: Failed to create pod: %v", err)
} else if !reflect.DeepEqual(burstablePodInServ.Spec.Tolerations, []v1.Toleration{memoryPressureToleration}) {
t.Errorf("Case 1: Unexpected toleration of non-BestEffort pod, expected: %+v, got: %v",
[]v1.Toleration{memoryPressureToleration},
burstablePodInServ.Spec.Tolerations)
}
// Case 2: No MemoryPressure Toleration for BestEffort pod.
besteffortPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "best-effort-pod",
Namespace: nsName,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "busybox",
Image: "busybox",
},
},
},
}
besteffortPodInServ, err := clientset.CoreV1().Pods(nsName).Create(besteffortPod)
if err != nil {
t.Errorf("Case 2: Failed to create pod: %v", err)
} else if len(besteffortPodInServ.Spec.Tolerations) != 0 {
t.Errorf("Case 2: Unexpected toleration # of BestEffort pod, expected: 0, got: %v",
len(besteffortPodInServ.Spec.Tolerations))
}
// Case 3: Taint Node by NetworkUnavailable condition.
networkUnavailableNode := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-1",
},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4000m"),
v1.ResourceMemory: resource.MustParse("16Gi"),
v1.ResourcePods: resource.MustParse("110"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4000m"),
v1.ResourceMemory: resource.MustParse("16Gi"),
v1.ResourcePods: resource.MustParse("110"),
},
Conditions: []v1.NodeCondition{
{
Type: v1.NodeNetworkUnavailable,
Status: v1.ConditionTrue,
},
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
},
},
},
}
nodeInformerCh := make(chan bool)
nodeInformer := informers.Core().V1().Nodes().Informer()
nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
UpdateFunc: func(old, cur interface{}) {
curNode := cur.(*v1.Node)
for _, taint := range curNode.Spec.Taints {
if taint.Key == algorithm.TaintNodeNetworkUnavailable &&
taint.Effect == v1.TaintEffectNoSchedule {
nodeInformerCh <- true
break
}
}
},
})
if _, err := clientset.CoreV1().Nodes().Create(networkUnavailableNode); err != nil {
t.Errorf("Case 3: Failed to create node: %v", err)
} else {
select {
case <-time.After(60 * time.Second):
t.Errorf("Case 3: Failed to taint node after 60s.")
case <-nodeInformerCh:
}
}
// Case 4: Schedule Pod with NetworkUnavailable toleration.
networkDaemonPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "network-daemon-pod",
Namespace: nsName,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "busybox",
Image: "busybox",
},
},
Tolerations: []v1.Toleration{
{
Key: algorithm.TaintNodeNetworkUnavailable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
},
},
},
}
if _, err := clientset.CoreV1().Pods(nsName).Create(networkDaemonPod); err != nil {
t.Errorf("Case 4: Failed to create pod for network daemon: %v", err)
} else {
if err := waitForPodToScheduleWithTimeout(clientset, networkDaemonPod, time.Second*60); err != nil {
t.Errorf("Case 4: Failed to schedule network daemon pod in 60s.")
}
}
}

View File

@ -0,0 +1,395 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"fmt"
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
clientv1core "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/legacyscheme"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/plugin/pkg/scheduler"
_ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider"
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
"k8s.io/kubernetes/test/integration/framework"
"net/http/httptest"
)
type TestContext struct {
closeFn framework.CloseFunc
httpServer *httptest.Server
ns *v1.Namespace
clientSet *clientset.Clientset
informerFactory informers.SharedInformerFactory
schedulerConfigFactory scheduler.Configurator
schedulerConfig *scheduler.Config
scheduler *scheduler.Scheduler
}
// initTest initializes a test environment and creates a scheduler with default
// configuration.
func initTest(t *testing.T, nsPrefix string) *TestContext {
var context TestContext
masterConfig := framework.NewIntegrationTestMasterConfig()
_, context.httpServer, context.closeFn = framework.RunAMaster(masterConfig)
context.ns = framework.CreateTestingNamespace(nsPrefix+string(uuid.NewUUID()), context.httpServer, t)
context.clientSet = clientset.NewForConfigOrDie(&restclient.Config{Host: context.httpServer.URL})
context.informerFactory = informers.NewSharedInformerFactory(context.clientSet, 0)
podInformer := factory.NewPodInformer(context.clientSet, 12*time.Hour, v1.DefaultSchedulerName)
context.schedulerConfigFactory = factory.NewConfigFactory(
v1.DefaultSchedulerName,
context.clientSet,
context.informerFactory.Core().V1().Nodes(),
podInformer,
context.informerFactory.Core().V1().PersistentVolumes(),
context.informerFactory.Core().V1().PersistentVolumeClaims(),
context.informerFactory.Core().V1().ReplicationControllers(),
context.informerFactory.Extensions().V1beta1().ReplicaSets(),
context.informerFactory.Apps().V1beta1().StatefulSets(),
context.informerFactory.Core().V1().Services(),
context.informerFactory.Policy().V1beta1().PodDisruptionBudgets(),
context.informerFactory.Storage().V1().StorageClasses(),
v1.DefaultHardPodAffinitySymmetricWeight,
true,
)
var err error
context.schedulerConfig, err = context.schedulerConfigFactory.Create()
if err != nil {
t.Fatalf("Couldn't create scheduler config: %v", err)
}
eventBroadcaster := record.NewBroadcaster()
context.schedulerConfig.Recorder = eventBroadcaster.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: v1.DefaultSchedulerName})
eventBroadcaster.StartRecordingToSink(&clientv1core.EventSinkImpl{Interface: clientv1core.New(context.clientSet.CoreV1().RESTClient()).Events("")})
go podInformer.Informer().Run(context.schedulerConfig.StopEverything)
context.informerFactory.Start(context.schedulerConfig.StopEverything)
context.scheduler, err = scheduler.NewFromConfigurator(&scheduler.FakeConfigurator{Config: context.schedulerConfig}, nil...)
if err != nil {
t.Fatalf("Couldn't create scheduler: %v", err)
}
context.scheduler.Run()
return &context
}
// cleanupTest deletes the scheduler and the test namespace. It should be called
// at the end of a test.
func cleanupTest(t *testing.T, context *TestContext) {
// Kill the scheduler.
close(context.schedulerConfig.StopEverything)
// Cleanup nodes.
context.clientSet.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
framework.DeleteTestingNamespace(context.ns, context.httpServer, t)
context.closeFn()
}
// waitForReflection waits till the passFunc confirms that the object it expects
// to see is in the store. Used to observe reflected events.
func waitForReflection(t *testing.T, nodeLister corelisters.NodeLister, key string, passFunc func(n interface{}) bool) error {
nodes := []*v1.Node{}
err := wait.Poll(time.Millisecond*100, wait.ForeverTestTimeout, func() (bool, error) {
n, err := nodeLister.Get(key)
switch {
case err == nil && passFunc(n):
return true, nil
case errors.IsNotFound(err):
nodes = append(nodes, nil)
case err != nil:
t.Errorf("Unexpected error: %v", err)
default:
nodes = append(nodes, n)
}
return false, nil
})
if err != nil {
t.Logf("Logging consecutive node versions received from store:")
for i, n := range nodes {
t.Logf("%d: %#v", i, n)
}
}
return err
}
// nodeHasLabels returns a function that checks if a node has all the given labels.
func nodeHasLabels(cs clientset.Interface, nodeName string, labels map[string]string) wait.ConditionFunc {
return func() (bool, error) {
node, err := cs.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
if errors.IsNotFound(err) {
return false, nil
}
if err != nil {
// This could be a connection error so we want to retry.
return false, nil
}
for k, v := range labels {
if node.Labels == nil || node.Labels[k] != v {
return false, nil
}
}
return true, nil
}
}
// waitForNodeLabels waits for the given node to have all the given labels.
func waitForNodeLabels(cs clientset.Interface, nodeName string, labels map[string]string) error {
return wait.Poll(time.Millisecond*100, wait.ForeverTestTimeout, nodeHasLabels(cs, nodeName, labels))
}
// createNode creates a node with the given resource list and
// returns a pointer and error status. If 'res' is nil, a predefined amount of
// resource will be used.
func createNode(cs clientset.Interface, name string, res *v1.ResourceList) (*v1.Node, error) {
// if resource is nil, we use a default amount of resources for the node.
if res == nil {
res = &v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
}
}
n := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: name},
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: *res,
},
}
return cs.CoreV1().Nodes().Create(n)
}
// createNodes creates `numNodes` nodes. The created node names will be in the
// form of "`prefix`-X" where X is an ordinal.
func createNodes(cs clientset.Interface, prefix string, res *v1.ResourceList, numNodes int) ([]*v1.Node, error) {
nodes := make([]*v1.Node, numNodes)
for i := 0; i < numNodes; i++ {
nodeName := fmt.Sprintf("%v-%d", prefix, i)
node, err := createNode(cs, nodeName, res)
if err != nil {
return nodes[:], err
}
nodes[i] = node
}
return nodes[:], nil
}
type pausePodConfig struct {
Name string
Namespace string
Affinity *v1.Affinity
Annotations, Labels, NodeSelector map[string]string
Resources *v1.ResourceRequirements
Tolerations []v1.Toleration
NodeName string
SchedulerName string
Priority *int32
}
// initPausePod initializes a pod API object from the given config. It is used
// mainly in pod creation process.
func initPausePod(cs clientset.Interface, conf *pausePodConfig) *v1.Pod {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: conf.Name,
Namespace: conf.Namespace,
Labels: conf.Labels,
Annotations: conf.Annotations,
},
Spec: v1.PodSpec{
NodeSelector: conf.NodeSelector,
Affinity: conf.Affinity,
Containers: []v1.Container{
{
Name: conf.Name,
Image: framework.GetPauseImageName(cs),
},
},
Tolerations: conf.Tolerations,
NodeName: conf.NodeName,
SchedulerName: conf.SchedulerName,
Priority: conf.Priority,
},
}
if conf.Resources != nil {
pod.Spec.Containers[0].Resources = *conf.Resources
}
return pod
}
// createPausePod creates a pod with "Pause" image and the given config and
// return its pointer and error status.
func createPausePod(cs clientset.Interface, p *v1.Pod) (*v1.Pod, error) {
return cs.CoreV1().Pods(p.Namespace).Create(p)
}
// createPausePodWithResource creates a pod with "Pause" image and the given
// resources and returns its pointer and error status. The resource list can be
// nil.
func createPausePodWithResource(cs clientset.Interface, podName string, nsName string, res *v1.ResourceList) (*v1.Pod, error) {
var conf pausePodConfig
if res == nil {
conf = pausePodConfig{
Name: podName,
Namespace: nsName,
}
} else {
conf = pausePodConfig{
Name: podName,
Namespace: nsName,
Resources: &v1.ResourceRequirements{
Requests: *res,
},
}
}
return createPausePod(cs, initPausePod(cs, &conf))
}
// runPausePod creates a pod with "Pause" image and the given config and waits
// until it is scheduled. It returns its pointer and error status.
func runPausePod(cs clientset.Interface, pod *v1.Pod) (*v1.Pod, error) {
pod, err := cs.CoreV1().Pods(pod.Namespace).Create(pod)
if err != nil {
return nil, fmt.Errorf("Error creating pause pod: %v", err)
}
if err = waitForPodToSchedule(cs, pod); err != nil {
return pod, fmt.Errorf("Pod %v didn't schedule successfully. Error: %v", pod.Name, err)
}
if pod, err = cs.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{}); err != nil {
return pod, fmt.Errorf("Error getting pod %v info: %v", pod.Name, err)
}
return pod, nil
}
// podDeleted returns true if a pod is not found in the given namespace.
func podDeleted(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
return func() (bool, error) {
pod, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
if errors.IsNotFound(err) {
return true, nil
}
if pod.DeletionTimestamp != nil {
return true, nil
}
return false, nil
}
}
// podIsGettingEvicted returns true if the pod's deletion timestamp is set.
func podIsGettingEvicted(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
return func() (bool, error) {
pod, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
if err != nil {
return false, err
}
if pod.DeletionTimestamp != nil {
return true, nil
}
return false, nil
}
}
// podScheduled returns true if a node is assigned to the given pod.
func podScheduled(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
return func() (bool, error) {
pod, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
if errors.IsNotFound(err) {
return false, nil
}
if err != nil {
// This could be a connection error so we want to retry.
return false, nil
}
if pod.Spec.NodeName == "" {
return false, nil
}
return true, nil
}
}
// podUnschedulable returns a condition function that returns true if the given pod
// gets unschedulable status.
func podUnschedulable(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
return func() (bool, error) {
pod, err := c.CoreV1().Pods(podNamespace).Get(podName, metav1.GetOptions{})
if errors.IsNotFound(err) {
return false, nil
}
if err != nil {
// This could be a connection error so we want to retry.
return false, nil
}
_, cond := podutil.GetPodCondition(&pod.Status, v1.PodScheduled)
return cond != nil && cond.Status == v1.ConditionFalse && cond.Reason == v1.PodReasonUnschedulable, nil
}
}
// waitForPodToScheduleWithTimeout waits for a pod to get scheduled and returns
// an error if it does not scheduled within the given timeout.
func waitForPodToScheduleWithTimeout(cs clientset.Interface, pod *v1.Pod, timeout time.Duration) error {
return wait.Poll(100*time.Millisecond, timeout, podScheduled(cs, pod.Namespace, pod.Name))
}
// waitForPodToSchedule waits for a pod to get scheduled and returns an error if
// it does not get scheduled within the timeout duration (30 seconds).
func waitForPodToSchedule(cs clientset.Interface, pod *v1.Pod) error {
return waitForPodToScheduleWithTimeout(cs, pod, 30*time.Second)
}
// deletePod deletes the given pod in the given namespace.
func deletePod(cs clientset.Interface, podName string, nsName string) error {
return cs.CoreV1().Pods(nsName).Delete(podName, metav1.NewDeleteOptions(0))
}
// cleanupPods deletes the given pods and waits for them to be actually deleted.
func cleanupPods(cs clientset.Interface, t *testing.T, pods []*v1.Pod) {
for _, p := range pods {
err := cs.CoreV1().Pods(p.Namespace).Delete(p.Name, metav1.NewDeleteOptions(0))
if err != nil && !errors.IsNotFound(err) {
t.Errorf("error while deleting pod %v/%v: %v", p.Namespace, p.Name, err)
}
}
for _, p := range pods {
if err := wait.Poll(time.Second, wait.ForeverTestTimeout, podDeleted(cs, p.Namespace, p.Name)); err != nil {
t.Errorf("error while waiting for pod %v/%v to get deleted: %v", p.Namespace, p.Name, err)
}
}
}
// printAllPods prints a list of all the pods and their node names. This is used
// for debugging.
func printAllPods(t *testing.T, cs clientset.Interface, nsName string) {
podList, err := cs.CoreV1().Pods(nsName).List(metav1.ListOptions{})
if err != nil {
t.Logf("Error getting pods: %v", err)
}
for _, pod := range podList.Items {
t.Logf("Pod:\n\tName:%v\n\tNamespace:%v\n\tNode Name:%v\n", pod.Name, pod.Namespace, pod.Spec.NodeName)
}
}

View File

@ -0,0 +1,494 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
// This file tests the VolumeScheduling feature.
import (
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/golang/glog"
"k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/testapi"
"k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
"k8s.io/kubernetes/plugin/pkg/scheduler"
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
"k8s.io/kubernetes/test/integration/framework"
)
type testConfig struct {
client clientset.Interface
ns string
stop <-chan struct{}
teardown func()
}
var (
// Delete API objects immediately
deletePeriod = int64(0)
deleteOption = &metav1.DeleteOptions{GracePeriodSeconds: &deletePeriod}
modeWait = storagev1.VolumeBindingWaitForFirstConsumer
modeImmediate = storagev1.VolumeBindingImmediate
classWait = "wait"
classImmediate = "immediate"
)
const (
labelKey = "test-label"
labelValue = "test-value"
nodeName = "node1"
podLimit = 100
volsPerPod = 5
)
func TestVolumeBinding(t *testing.T) {
config := setup(t, "volume-scheduling")
defer config.teardown()
cases := map[string]struct {
pod *v1.Pod
pvs []*v1.PersistentVolume
pvcs []*v1.PersistentVolumeClaim
}{
"immediate can bind": {
pod: makePod("pod-i-canbind", config.ns, []string{"pvc-i-canbind"}),
pvs: []*v1.PersistentVolume{makePV(t, "pv-i-canbind", classImmediate, "", "")},
pvcs: []*v1.PersistentVolumeClaim{makePVC("pvc-i-canbind", config.ns, &classImmediate, "")},
},
"immediate pvc prebound": {
pod: makePod("pod-i-pvc-prebound", config.ns, []string{"pvc-i-prebound"}),
pvs: []*v1.PersistentVolume{makePV(t, "pv-i-pvc-prebound", classImmediate, "", "")},
pvcs: []*v1.PersistentVolumeClaim{makePVC("pvc-i-prebound", config.ns, &classImmediate, "pv-i-pvc-prebound")},
},
"immediate pv prebound": {
pod: makePod("pod-i-pv-prebound", config.ns, []string{"pvc-i-pv-prebound"}),
pvs: []*v1.PersistentVolume{makePV(t, "pv-i-prebound", classImmediate, "pvc-i-pv-prebound", config.ns)},
pvcs: []*v1.PersistentVolumeClaim{makePVC("pvc-i-pv-prebound", config.ns, &classImmediate, "")},
},
"wait can bind": {
pod: makePod("pod-w-canbind", config.ns, []string{"pvc-w-canbind"}),
pvs: []*v1.PersistentVolume{makePV(t, "pv-w-canbind", classWait, "", "")},
pvcs: []*v1.PersistentVolumeClaim{makePVC("pvc-w-canbind", config.ns, &classWait, "")},
},
"wait pvc prebound": {
pod: makePod("pod-w-pvc-prebound", config.ns, []string{"pvc-w-prebound"}),
pvs: []*v1.PersistentVolume{makePV(t, "pv-w-pvc-prebound", classWait, "", "")},
pvcs: []*v1.PersistentVolumeClaim{makePVC("pvc-w-prebound", config.ns, &classWait, "pv-w-pvc-prebound")},
},
"wait pv prebound": {
pod: makePod("pod-w-pv-prebound", config.ns, []string{"pvc-w-pv-prebound"}),
pvs: []*v1.PersistentVolume{makePV(t, "pv-w-prebound", classWait, "pvc-w-pv-prebound", config.ns)},
pvcs: []*v1.PersistentVolumeClaim{makePVC("pvc-w-pv-prebound", config.ns, &classWait, "")},
},
"wait can bind two": {
pod: makePod("pod-w-canbind-2", config.ns, []string{"pvc-w-canbind-2", "pvc-w-canbind-3"}),
pvs: []*v1.PersistentVolume{
makePV(t, "pv-w-canbind-2", classWait, "", ""),
makePV(t, "pv-w-canbind-3", classWait, "", ""),
},
pvcs: []*v1.PersistentVolumeClaim{
makePVC("pvc-w-canbind-2", config.ns, &classWait, ""),
makePVC("pvc-w-canbind-3", config.ns, &classWait, ""),
},
},
"mix immediate and wait": {
pod: makePod("pod-mix-bound", config.ns, []string{"pvc-w-canbind-4", "pvc-i-canbind-2"}),
pvs: []*v1.PersistentVolume{
makePV(t, "pv-w-canbind-4", classWait, "", ""),
makePV(t, "pv-i-canbind-2", classImmediate, "", ""),
},
pvcs: []*v1.PersistentVolumeClaim{
makePVC("pvc-w-canbind-4", config.ns, &classWait, ""),
makePVC("pvc-i-canbind-2", config.ns, &classImmediate, ""),
},
},
// TODO:
// immediate mode - PVC cannot bound
// wait mode - PVC cannot bind
// wait mode - 2 PVCs, 1 cannot bind
}
for name, test := range cases {
glog.Infof("Running test %v", name)
// Create PVs
for _, pv := range test.pvs {
if _, err := config.client.CoreV1().PersistentVolumes().Create(pv); err != nil {
t.Fatalf("Failed to create PersistentVolume %q: %v", pv.Name, err)
}
}
// Create PVCs
for _, pvc := range test.pvcs {
if _, err := config.client.CoreV1().PersistentVolumeClaims(config.ns).Create(pvc); err != nil {
t.Fatalf("Failed to create PersistentVolumeClaim %q: %v", pvc.Name, err)
}
}
// Create Pod
if _, err := config.client.CoreV1().Pods(config.ns).Create(test.pod); err != nil {
t.Fatalf("Failed to create Pod %q: %v", test.pod.Name, err)
}
if err := waitForPodToSchedule(config.client, test.pod); err != nil {
t.Errorf("Failed to schedule Pod %q: %v", test.pod.Name, err)
}
// Validate PVC/PV binding
for _, pvc := range test.pvcs {
validatePVCPhase(t, config.client, pvc, v1.ClaimBound)
}
for _, pv := range test.pvs {
validatePVPhase(t, config.client, pv, v1.VolumeBound)
}
// TODO: validate events on Pods and PVCs
config.client.CoreV1().Pods(config.ns).DeleteCollection(deleteOption, metav1.ListOptions{})
config.client.CoreV1().PersistentVolumeClaims(config.ns).DeleteCollection(deleteOption, metav1.ListOptions{})
config.client.CoreV1().PersistentVolumes().DeleteCollection(deleteOption, metav1.ListOptions{})
}
}
// TestVolumeBindingStress creates <podLimit> pods, each with <volsPerPod> unbound PVCs.
func TestVolumeBindingStress(t *testing.T) {
config := setup(t, "volume-binding-stress")
defer config.teardown()
// Create enough PVs and PVCs for all the pods
pvs := []*v1.PersistentVolume{}
pvcs := []*v1.PersistentVolumeClaim{}
for i := 0; i < podLimit*volsPerPod; i++ {
pv := makePV(t, fmt.Sprintf("pv-stress-%v", i), classWait, "", "")
pvc := makePVC(fmt.Sprintf("pvc-stress-%v", i), config.ns, &classWait, "")
if pv, err := config.client.CoreV1().PersistentVolumes().Create(pv); err != nil {
t.Fatalf("Failed to create PersistentVolume %q: %v", pv.Name, err)
}
if pvc, err := config.client.CoreV1().PersistentVolumeClaims(config.ns).Create(pvc); err != nil {
t.Fatalf("Failed to create PersistentVolumeClaim %q: %v", pvc.Name, err)
}
pvs = append(pvs, pv)
pvcs = append(pvcs, pvc)
}
pods := []*v1.Pod{}
for i := 0; i < podLimit; i++ {
// Generate string of all the PVCs for the pod
podPvcs := []string{}
for j := i * volsPerPod; j < (i+1)*volsPerPod; j++ {
podPvcs = append(podPvcs, pvcs[j].Name)
}
pod := makePod(fmt.Sprintf("pod%v", i), config.ns, podPvcs)
if pod, err := config.client.CoreV1().Pods(config.ns).Create(pod); err != nil {
t.Fatalf("Failed to create Pod %q: %v", pod.Name, err)
}
pods = append(pods, pod)
}
// Validate Pods scheduled
for _, pod := range pods {
if err := waitForPodToSchedule(config.client, pod); err != nil {
t.Errorf("Failed to schedule Pod %q: %v", pod.Name, err)
}
}
// Validate PVC/PV binding
for _, pvc := range pvcs {
validatePVCPhase(t, config.client, pvc, v1.ClaimBound)
}
for _, pv := range pvs {
validatePVPhase(t, config.client, pv, v1.VolumeBound)
}
// TODO: validate events on Pods and PVCs
}
func setup(t *testing.T, nsName string) *testConfig {
h := &framework.MasterHolder{Initialized: make(chan struct{})}
s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
<-h.Initialized
h.M.GenericAPIServer.Handler.ServeHTTP(w, req)
}))
// Enable feature gates
utilfeature.DefaultFeatureGate.Set("VolumeScheduling=true,PersistentLocalVolumes=true")
// Build clientset and informers for controllers.
clientset := clientset.NewForConfigOrDie(&restclient.Config{QPS: -1, Host: s.URL, ContentConfig: restclient.ContentConfig{GroupVersion: testapi.Groups[v1.GroupName].GroupVersion()}})
informers := informers.NewSharedInformerFactory(clientset, time.Second)
// Start master
masterConfig := framework.NewIntegrationTestMasterConfig()
_, _, closeFn := framework.RunAMasterUsingServer(masterConfig, s, h)
ns := framework.CreateTestingNamespace(nsName, s, t).Name
controllerCh := make(chan struct{})
// Start PV controller for volume binding.
params := persistentvolume.ControllerParameters{
KubeClient: clientset,
SyncPeriod: time.Hour, // test shouldn't need to resync
VolumePlugins: nil, // TODO; need later for dynamic provisioning
Cloud: nil,
ClusterName: "volume-test-cluster",
VolumeInformer: informers.Core().V1().PersistentVolumes(),
ClaimInformer: informers.Core().V1().PersistentVolumeClaims(),
ClassInformer: informers.Storage().V1().StorageClasses(),
EventRecorder: nil, // TODO: add one so we can test PV events
EnableDynamicProvisioning: true,
}
ctrl, err := persistentvolume.NewController(params)
if err != nil {
t.Fatalf("Failed to create PV controller: %v", err)
}
go ctrl.Run(controllerCh)
// Start scheduler
configurator := factory.NewConfigFactory(
v1.DefaultSchedulerName,
clientset,
informers.Core().V1().Nodes(),
informers.Core().V1().Pods(),
informers.Core().V1().PersistentVolumes(),
informers.Core().V1().PersistentVolumeClaims(),
informers.Core().V1().ReplicationControllers(),
informers.Extensions().V1beta1().ReplicaSets(),
informers.Apps().V1beta1().StatefulSets(),
informers.Core().V1().Services(),
informers.Policy().V1beta1().PodDisruptionBudgets(),
informers.Storage().V1().StorageClasses(),
v1.DefaultHardPodAffinitySymmetricWeight,
true, // Enable EqualCache by default.
)
sched, err := scheduler.NewFromConfigurator(configurator, func(cfg *scheduler.Config) {
cfg.StopEverything = controllerCh
cfg.Recorder = &record.FakeRecorder{}
})
if err != nil {
t.Fatalf("Failed to create scheduler: %v.", err)
}
go sched.Run()
// Waiting for all controller sync.
informers.Start(controllerCh)
informers.WaitForCacheSync(controllerCh)
// Create shared objects
// Create node
testNode := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName,
Labels: map[string]string{labelKey: labelValue},
},
Spec: v1.NodeSpec{Unschedulable: false},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourcePods: *resource.NewQuantity(podLimit, resource.DecimalSI),
},
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
Reason: fmt.Sprintf("schedulable condition"),
LastHeartbeatTime: metav1.Time{Time: time.Now()},
},
},
},
}
if _, err := clientset.CoreV1().Nodes().Create(testNode); err != nil {
t.Fatalf("Failed to create Node %q: %v", testNode.Name, err)
}
// Create SCs
scs := []*storagev1.StorageClass{
makeStorageClass(classWait, &modeWait),
makeStorageClass(classImmediate, &modeImmediate),
}
for _, sc := range scs {
if _, err := clientset.StorageV1().StorageClasses().Create(sc); err != nil {
t.Fatalf("Failed to create StorageClass %q: %v", sc.Name, err)
}
}
return &testConfig{
client: clientset,
ns: ns,
stop: controllerCh,
teardown: func() {
clientset.CoreV1().Pods(ns).DeleteCollection(nil, metav1.ListOptions{})
clientset.CoreV1().PersistentVolumeClaims(ns).DeleteCollection(nil, metav1.ListOptions{})
clientset.CoreV1().PersistentVolumes().DeleteCollection(nil, metav1.ListOptions{})
clientset.StorageV1().StorageClasses().DeleteCollection(nil, metav1.ListOptions{})
clientset.CoreV1().Nodes().DeleteCollection(nil, metav1.ListOptions{})
close(controllerCh)
closeFn()
utilfeature.DefaultFeatureGate.Set("VolumeScheduling=false,LocalPersistentVolumes=false")
},
}
}
func makeStorageClass(name string, mode *storagev1.VolumeBindingMode) *storagev1.StorageClass {
return &storagev1.StorageClass{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Provisioner: "kubernetes.io/no-provisioner",
VolumeBindingMode: mode,
}
}
func makePV(t *testing.T, name, scName, pvcName, ns string) *v1.PersistentVolume {
pv := &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Annotations: map[string]string{},
},
Spec: v1.PersistentVolumeSpec{
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceStorage): resource.MustParse("5Gi"),
},
AccessModes: []v1.PersistentVolumeAccessMode{
v1.ReadWriteOnce,
},
StorageClassName: scName,
PersistentVolumeSource: v1.PersistentVolumeSource{
Local: &v1.LocalVolumeSource{
Path: "/test-path",
},
},
},
}
if pvcName != "" {
pv.Spec.ClaimRef = &v1.ObjectReference{Name: pvcName, Namespace: ns}
}
testNodeAffinity := &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: labelKey,
Operator: v1.NodeSelectorOpIn,
Values: []string{labelValue},
},
},
},
},
},
}
err := helper.StorageNodeAffinityToAlphaAnnotation(pv.Annotations, testNodeAffinity)
if err != nil {
t.Fatalf("Setting storage node affinity failed: %v", err)
}
return pv
}
func makePVC(name, ns string, scName *string, volumeName string) *v1.PersistentVolumeClaim {
return &v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: ns,
},
Spec: v1.PersistentVolumeClaimSpec{
AccessModes: []v1.PersistentVolumeAccessMode{
v1.ReadWriteOnce,
},
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceStorage): resource.MustParse("5Gi"),
},
},
StorageClassName: scName,
VolumeName: volumeName,
},
}
}
func makePod(name, ns string, pvcs []string) *v1.Pod {
volumes := []v1.Volume{}
for i, pvc := range pvcs {
volumes = append(volumes, v1.Volume{
Name: fmt.Sprintf("vol%v", i),
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: pvc,
},
},
})
}
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: ns,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "write-pod",
Image: "gcr.io/google_containers/busybox:1.24",
Command: []string{"/bin/sh"},
Args: []string{"-c", "while true; do sleep 1; done"},
},
},
Volumes: volumes,
},
}
}
func validatePVCPhase(t *testing.T, client clientset.Interface, pvc *v1.PersistentVolumeClaim, phase v1.PersistentVolumeClaimPhase) {
claim, err := client.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pvc.Name, metav1.GetOptions{})
if err != nil {
t.Errorf("Failed to get PVC %v/%v: %v", pvc.Namespace, pvc.Name, err)
}
if claim.Status.Phase != phase {
t.Errorf("PVC %v/%v phase not %v, got %v", pvc.Namespace, pvc.Name, phase, claim.Status.Phase)
}
}
func validatePVPhase(t *testing.T, client clientset.Interface, pv *v1.PersistentVolume, phase v1.PersistentVolumePhase) {
pv, err := client.CoreV1().PersistentVolumes().Get(pv.Name, metav1.GetOptions{})
if err != nil {
t.Errorf("Failed to get PV %v: %v", pv.Name, err)
}
if pv.Status.Phase != phase {
t.Errorf("PV %v phase not %v, got %v", pv.Name, phase, pv.Status.Phase)
}
}