Fresh dep ensure

This commit is contained in:
Mike Cronce
2018-11-26 13:23:56 -05:00
parent 93cb8a04d7
commit 407478ab9a
9016 changed files with 551394 additions and 279685 deletions

View File

@ -12,31 +12,27 @@ go_library(
"doc.go",
"scheduler_interface.go",
"types.go",
"well_known_labels.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm",
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//pkg/scheduler/internal/cache:go_default_library",
"//staging/src/k8s.io/api/apps/v1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/policy/v1beta1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"scheduler_interface_test.go",
"types_test.go",
],
srcs = ["types_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/cache:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
],
)

View File

@ -9,6 +9,7 @@ load(
go_library(
name = "go_default_library",
srcs = [
"csi_volume_predicate.go",
"error.go",
"metadata.go",
"predicates.go",
@ -23,29 +24,32 @@ go_library(
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//pkg/volume/util:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/storage/v1:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/storage/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/rand:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/listers/storage/v1:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"csi_volume_predicate_test.go",
"main_test.go",
"max_attachable_volume_predicate_test.go",
"metadata_test.go",
"predicates_test.go",
@ -57,17 +61,18 @@ go_test(
"//pkg/features:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/volume/util:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature/testing:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/storage/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature/testing:go_default_library",
],
)

View File

@ -0,0 +1,157 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
// CSIMaxVolumeLimitChecker defines predicate needed for counting CSI volumes
type CSIMaxVolumeLimitChecker struct {
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
}
// NewCSIMaxVolumeLimitPredicate returns a predicate for counting CSI volumes
func NewCSIMaxVolumeLimitPredicate(
pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
c := &CSIMaxVolumeLimitChecker{
pvInfo: pvInfo,
pvcInfo: pvcInfo,
}
return c.attachableLimitPredicate
}
func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
// if feature gate is disable we return
if !utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
return true, nil, nil
}
// If a pod doesn't have any volume attached to it, the predicate will always be true.
// Thus we make a fast path for it, to avoid unnecessary computations in this case.
if len(pod.Spec.Volumes) == 0 {
return true, nil, nil
}
nodeVolumeLimits := nodeInfo.VolumeLimits()
// if node does not have volume limits this predicate should exit
if len(nodeVolumeLimits) == 0 {
return true, nil, nil
}
// a map of unique volume name/csi volume handle and volume limit key
newVolumes := make(map[string]string)
if err := c.filterAttachableVolumes(pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
return false, nil, err
}
if len(newVolumes) == 0 {
return true, nil, nil
}
// a map of unique volume name/csi volume handle and volume limit key
attachedVolumes := make(map[string]string)
for _, existingPod := range nodeInfo.Pods() {
if err := c.filterAttachableVolumes(existingPod.Spec.Volumes, existingPod.Namespace, attachedVolumes); err != nil {
return false, nil, err
}
}
newVolumeCount := map[string]int{}
attachedVolumeCount := map[string]int{}
for volumeName, volumeLimitKey := range attachedVolumes {
if _, ok := newVolumes[volumeName]; ok {
delete(newVolumes, volumeName)
}
attachedVolumeCount[volumeLimitKey]++
}
for _, volumeLimitKey := range newVolumes {
newVolumeCount[volumeLimitKey]++
}
for volumeLimitKey, count := range newVolumeCount {
maxVolumeLimit, ok := nodeVolumeLimits[v1.ResourceName(volumeLimitKey)]
if ok {
currentVolumeCount := attachedVolumeCount[volumeLimitKey]
if currentVolumeCount+count > int(maxVolumeLimit) {
return false, []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}, nil
}
}
}
return true, nil, nil
}
func (c *CSIMaxVolumeLimitChecker) filterAttachableVolumes(
volumes []v1.Volume, namespace string, result map[string]string) error {
for _, vol := range volumes {
// CSI volumes can only be used as persistent volumes
if vol.PersistentVolumeClaim == nil {
continue
}
pvcName := vol.PersistentVolumeClaim.ClaimName
if pvcName == "" {
return fmt.Errorf("PersistentVolumeClaim had no name")
}
pvc, err := c.pvcInfo.GetPersistentVolumeClaimInfo(namespace, pvcName)
if err != nil {
klog.V(4).Infof("Unable to look up PVC info for %s/%s", namespace, pvcName)
continue
}
pvName := pvc.Spec.VolumeName
// TODO - the actual handling of unbound PVCs will be fixed by late binding design.
if pvName == "" {
klog.V(4).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName)
continue
}
pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName)
if err != nil {
klog.V(4).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
continue
}
csiSource := pv.Spec.PersistentVolumeSource.CSI
if csiSource == nil {
klog.V(4).Infof("Not considering non-CSI volume %s/%s", namespace, pvcName)
continue
}
driverName := csiSource.Driver
volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
result[csiSource.VolumeHandle] = volumeLimitKey
}
return nil
}

View File

@ -0,0 +1,179 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
)
func TestCSIVolumeCountPredicate(t *testing.T) {
// for pods with CSI pvcs
oneVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "csi-ebs",
},
},
},
},
},
}
twoVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "cs-ebs-1",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "csi-ebs-2",
},
},
},
},
},
}
runningPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "csi-ebs-3",
},
},
},
},
},
}
tests := []struct {
newPod *v1.Pod
existingPods []*v1.Pod
filterName string
maxVols int
fits bool
test string
}{
{
newPod: oneVolPod,
existingPods: []*v1.Pod{runningPod, twoVolPod},
filterName: "csi-ebs",
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pods CSI volume",
},
{
newPod: oneVolPod,
existingPods: []*v1.Pod{runningPod, twoVolPod},
filterName: "csi-ebs",
maxVols: 2,
fits: false,
test: "doesn't when node capacity <= pods CSI volume",
},
}
defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.AttachVolumeLimit, true)()
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}
// running attachable predicate tests with feature gate and limit present on nodes
for _, test := range tests {
node := getNodeWithPodAndVolumeLimits(test.existingPods, int64(test.maxVols), test.filterName)
pred := NewCSIMaxVolumeLimitPredicate(getFakeCSIPVInfo("csi-ebs", "csi-ebs"), getFakeCSIPVCInfo("csi-ebs"))
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), node)
if err != nil {
t.Errorf("Using allocatable [%s]%s: unexpected error: %v", test.filterName, test.test, err)
}
if !fits && !reflect.DeepEqual(reasons, expectedFailureReasons) {
t.Errorf("Using allocatable [%s]%s: unexpected failure reasons: %v, want: %v", test.filterName, test.test, reasons, expectedFailureReasons)
}
if fits != test.fits {
t.Errorf("Using allocatable [%s]%s: expected %v, got %v", test.filterName, test.test, test.fits, fits)
}
}
}
func getFakeCSIPVInfo(volumeName, driverName string) FakePersistentVolumeInfo {
return FakePersistentVolumeInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
CSI: &v1.CSIPersistentVolumeSource{
Driver: driverName,
VolumeHandle: volumeName,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-2"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
CSI: &v1.CSIPersistentVolumeSource{
Driver: driverName,
VolumeHandle: volumeName + "-2",
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-3"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
CSI: &v1.CSIPersistentVolumeSource{
Driver: driverName,
VolumeHandle: volumeName + "-3",
},
},
},
},
}
}
func getFakeCSIPVCInfo(volumeName string) FakePersistentVolumeClaimInfo {
return FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: volumeName},
},
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-2"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: volumeName + "-2"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-3"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: volumeName + "-3"},
},
}
}

View File

@ -1,5 +1,5 @@
/*
Copyright 2016 The Kubernetes Authors.
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -14,23 +14,16 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package util
package predicates
import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"testing"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
_ "k8s.io/kubernetes/pkg/features"
)
// GetControllerRef gets pod's owner controller reference from a pod object.
func GetControllerRef(pod *v1.Pod) *metav1.OwnerReference {
if len(pod.OwnerReferences) == 0 {
return nil
}
for i := range pod.OwnerReferences {
ref := &pod.OwnerReferences[i]
if ref.Controller != nil && *ref.Controller {
return ref
}
}
return nil
func TestMain(m *testing.M) {
utilfeaturetesting.VerifyFeatureGatesUnchanged(utilfeature.DefaultFeatureGate, m.Run)
}

View File

@ -29,6 +29,7 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
"k8s.io/kubernetes/pkg/features"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
@ -741,60 +742,12 @@ func TestVolumeCountConflicts(t *testing.T) {
},
}
pvInfo := func(filterName string) FakePersistentVolumeInfo {
return FakePersistentVolumeInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: strings.ToLower(filterName) + "Vol"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{},
},
},
}
}
pvcInfo := func(filterName string) FakePersistentVolumeClaimInfo {
return FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "some" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someNon" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvcWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherPVCWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "unboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherUnboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
}
}
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}
// running attachable predicate tests without feature gate and no limit present on nodes
for _, test := range tests {
os.Setenv(KubeMaxPDVols, strconv.Itoa(test.maxVols))
pred := NewMaxPDVolumeCountPredicate(test.filterName, pvInfo(test.filterName), pvcInfo(test.filterName))
pred := NewMaxPDVolumeCountPredicate(test.filterName, getFakePVInfo(test.filterName), getFakePVCInfo(test.filterName))
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), schedulercache.NewNodeInfo(test.existingPods...))
if err != nil {
t.Errorf("[%s]%s: unexpected error: %v", test.filterName, test.test, err)
@ -812,7 +765,7 @@ func TestVolumeCountConflicts(t *testing.T) {
// running attachable predicate tests with feature gate and limit present on nodes
for _, test := range tests {
node := getNodeWithPodAndVolumeLimits(test.existingPods, int64(test.maxVols), test.filterName)
pred := NewMaxPDVolumeCountPredicate(test.filterName, pvInfo(test.filterName), pvcInfo(test.filterName))
pred := NewMaxPDVolumeCountPredicate(test.filterName, getFakePVInfo(test.filterName), getFakePVCInfo(test.filterName))
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), node)
if err != nil {
t.Errorf("Using allocatable [%s]%s: unexpected error: %v", test.filterName, test.test, err)
@ -826,6 +779,122 @@ func TestVolumeCountConflicts(t *testing.T) {
}
}
func getFakePVInfo(filterName string) FakePersistentVolumeInfo {
return FakePersistentVolumeInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: strings.ToLower(filterName) + "Vol"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{},
},
},
}
}
func getFakePVCInfo(filterName string) FakePersistentVolumeClaimInfo {
return FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "some" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someNon" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvcWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherPVCWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "unboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherUnboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
}
}
func TestMaxVolumeFuncM5(t *testing.T) {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-for-m5-instance",
Labels: map[string]string{
kubeletapis.LabelInstanceType: "m5.large",
},
},
}
os.Unsetenv(KubeMaxPDVols)
maxVolumeFunc := getMaxVolumeFunc(EBSVolumeFilterType)
maxVolume := maxVolumeFunc(node)
if maxVolume != volumeutil.DefaultMaxEBSNitroVolumeLimit {
t.Errorf("Expected max volume to be %d got %d", volumeutil.DefaultMaxEBSNitroVolumeLimit, maxVolume)
}
}
func TestMaxVolumeFuncT3(t *testing.T) {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-for-t3-instance",
Labels: map[string]string{
kubeletapis.LabelInstanceType: "t3.medium",
},
},
}
os.Unsetenv(KubeMaxPDVols)
maxVolumeFunc := getMaxVolumeFunc(EBSVolumeFilterType)
maxVolume := maxVolumeFunc(node)
if maxVolume != volumeutil.DefaultMaxEBSNitroVolumeLimit {
t.Errorf("Expected max volume to be %d got %d", volumeutil.DefaultMaxEBSNitroVolumeLimit, maxVolume)
}
}
func TestMaxVolumeFuncR5(t *testing.T) {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-for-r5-instance",
Labels: map[string]string{
kubeletapis.LabelInstanceType: "r5d.xlarge",
},
},
}
os.Unsetenv(KubeMaxPDVols)
maxVolumeFunc := getMaxVolumeFunc(EBSVolumeFilterType)
maxVolume := maxVolumeFunc(node)
if maxVolume != volumeutil.DefaultMaxEBSNitroVolumeLimit {
t.Errorf("Expected max volume to be %d got %d", volumeutil.DefaultMaxEBSNitroVolumeLimit, maxVolume)
}
}
func TestMaxVolumeFuncM4(t *testing.T) {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-for-m4-instance",
Labels: map[string]string{
kubeletapis.LabelInstanceType: "m4.2xlarge",
},
},
}
os.Unsetenv(KubeMaxPDVols)
maxVolumeFunc := getMaxVolumeFunc(EBSVolumeFilterType)
maxVolume := maxVolumeFunc(node)
if maxVolume != volumeutil.DefaultMaxEBSVolumes {
t.Errorf("Expected max volume to be %d got %d", volumeutil.DefaultMaxEBSVolumes, maxVolume)
}
}
func getNodeWithPodAndVolumeLimits(pods []*v1.Pod, limit int64, filter string) *schedulercache.NodeInfo {
nodeInfo := schedulercache.NewNodeInfo(pods...)
node := &v1.Node{
@ -849,6 +918,6 @@ func getVolumeLimitKey(filterType string) v1.ResourceName {
case AzureDiskVolumeFilterType:
return v1.ResourceName(volumeutil.AzureVolumeLimitKey)
default:
return ""
return v1.ResourceName(volumeutil.GetCSIAttachLimitKey(filterType))
}
}

View File

@ -17,10 +17,11 @@ limitations under the License.
package predicates
import (
"context"
"fmt"
"sync"
"github.com/golang/glog"
"k8s.io/klog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -38,6 +39,12 @@ type PredicateMetadataFactory struct {
podLister algorithm.PodLister
}
// AntiAffinityTerm's topology key value used in predicate metadata
type topologyPair struct {
key string
value string
}
// Note that predicateMetadata and matchingPodAntiAffinityTerm need to be declared in the same file
// due to the way declarations are processed in predicate declaration unit tests.
type matchingPodAntiAffinityTerm struct {
@ -45,6 +52,17 @@ type matchingPodAntiAffinityTerm struct {
node *v1.Node
}
type podSet map[*v1.Pod]struct{}
type topologyPairSet map[topologyPair]struct{}
// topologyPairsMaps keeps topologyPairToAntiAffinityPods and antiAffinityPodToTopologyPairs in sync
// as they are the inverse of each others.
type topologyPairsMaps struct {
topologyPairToPods map[topologyPair]podSet
podToTopologyPairs map[string]topologyPairSet
}
// NOTE: When new fields are added/removed or logic is changed, please make sure that
// RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes.
type predicateMetadata struct {
@ -52,17 +70,17 @@ type predicateMetadata struct {
podBestEffort bool
podRequest *schedulercache.Resource
podPorts []*v1.ContainerPort
//key is a pod full name with the anti-affinity rules.
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
// A map of node name to a list of Pods on the node that can potentially match
// the affinity rules of the "pod".
nodeNameToMatchingAffinityPods map[string][]*v1.Pod
// A map of node name to a list of Pods on the node that can potentially match
// the anti-affinity rules of the "pod".
nodeNameToMatchingAntiAffinityPods map[string][]*v1.Pod
serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service
topologyPairsAntiAffinityPodsMap *topologyPairsMaps
// A map of topology pairs to a list of Pods that can potentially match
// the affinity terms of the "pod" and its inverse.
topologyPairsPotentialAffinityPods *topologyPairsMaps
// A map of topology pairs to a list of Pods that can potentially match
// the anti-affinity terms of the "pod" and its inverse.
topologyPairsPotentialAntiAffinityPods *topologyPairsMaps
serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service
// ignoredExtendedResources is a set of extended resource names that will
// be ignored in the PodFitsResources predicate.
//
@ -113,31 +131,74 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
if pod == nil {
return nil
}
matchingTerms, err := getMatchingAntiAffinityTerms(pod, nodeNameToInfoMap)
// existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity
existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, nodeNameToInfoMap)
if err != nil {
return nil
}
affinityPods, antiAffinityPods, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap)
// incomingPodAffinityMap will be used later for efficient check on incoming pod's affinity
// incomingPodAntiAffinityMap will be used later for efficient check on incoming pod's anti-affinity
incomingPodAffinityMap, incomingPodAntiAffinityMap, err := getTPMapMatchingIncomingAffinityAntiAffinity(pod, nodeNameToInfoMap)
if err != nil {
glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
klog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
matchingAntiAffinityTerms: matchingTerms,
nodeNameToMatchingAffinityPods: affinityPods,
nodeNameToMatchingAntiAffinityPods: antiAffinityPods,
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
topologyPairsPotentialAffinityPods: incomingPodAffinityMap,
topologyPairsPotentialAntiAffinityPods: incomingPodAntiAffinityMap,
topologyPairsAntiAffinityPodsMap: existingPodAntiAffinityMap,
}
for predicateName, precomputeFunc := range predicateMetadataProducers {
glog.V(10).Infof("Precompute: %v", predicateName)
klog.V(10).Infof("Precompute: %v", predicateName)
precomputeFunc(predicateMetadata)
}
return predicateMetadata
}
// returns a pointer to a new topologyPairsMaps
func newTopologyPairsMaps() *topologyPairsMaps {
return &topologyPairsMaps{topologyPairToPods: make(map[topologyPair]podSet),
podToTopologyPairs: make(map[string]topologyPairSet)}
}
func (topologyPairsMaps *topologyPairsMaps) addTopologyPair(pair topologyPair, pod *v1.Pod) {
podFullName := schedutil.GetPodFullName(pod)
if topologyPairsMaps.topologyPairToPods[pair] == nil {
topologyPairsMaps.topologyPairToPods[pair] = make(map[*v1.Pod]struct{})
}
topologyPairsMaps.topologyPairToPods[pair][pod] = struct{}{}
if topologyPairsMaps.podToTopologyPairs[podFullName] == nil {
topologyPairsMaps.podToTopologyPairs[podFullName] = make(map[topologyPair]struct{})
}
topologyPairsMaps.podToTopologyPairs[podFullName][pair] = struct{}{}
}
func (topologyPairsMaps *topologyPairsMaps) removePod(deletedPod *v1.Pod) {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
for pair := range topologyPairsMaps.podToTopologyPairs[deletedPodFullName] {
delete(topologyPairsMaps.topologyPairToPods[pair], deletedPod)
if len(topologyPairsMaps.topologyPairToPods[pair]) == 0 {
delete(topologyPairsMaps.topologyPairToPods, pair)
}
}
delete(topologyPairsMaps.podToTopologyPairs, deletedPodFullName)
}
func (topologyPairsMaps *topologyPairsMaps) appendMaps(toAppend *topologyPairsMaps) {
if toAppend == nil {
return
}
for pair := range toAppend.topologyPairToPods {
for pod := range toAppend.topologyPairToPods[pair] {
topologyPairsMaps.addTopologyPair(pair, pod)
}
}
}
// RemovePod changes predicateMetadata assuming that the given `deletedPod` is
// deleted from the system.
func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
@ -145,35 +206,10 @@ func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
if deletedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("deletedPod and meta.pod must not be the same")
}
// Delete any anti-affinity rule from the deletedPod.
delete(meta.matchingAntiAffinityTerms, deletedPodFullName)
// Delete pod from the matching affinity or anti-affinity pods if exists.
affinity := meta.pod.Spec.Affinity
podNodeName := deletedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
if affinity.PodAffinity != nil {
for i, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] {
if p == deletedPod {
s := meta.nodeNameToMatchingAffinityPods[podNodeName]
s[i] = s[len(s)-1]
s = s[:len(s)-1]
meta.nodeNameToMatchingAffinityPods[podNodeName] = s
break
}
}
}
if affinity.PodAntiAffinity != nil {
for i, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] {
if p == deletedPod {
s := meta.nodeNameToMatchingAntiAffinityPods[podNodeName]
s[i] = s[len(s)-1]
s = s[:len(s)-1]
meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = s
break
}
}
}
}
meta.topologyPairsAntiAffinityPodsMap.removePod(deletedPod)
// Delete pod from the matching affinity or anti-affinity topology pairs maps.
meta.topologyPairsPotentialAffinityPods.removePod(deletedPod)
meta.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod)
// All pods in the serviceAffinityMatchingPodList are in the same namespace.
// So, if the namespace of the first one is not the same as the namespace of the
// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
@ -203,46 +239,36 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache
return fmt.Errorf("invalid node in nodeInfo")
}
// Add matching anti-affinity terms of the addedPod to the map.
podMatchingTerms, err := getMatchingAntiAffinityTermsOfExistingPod(meta.pod, addedPod, nodeInfo.Node())
topologyPairsMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(meta.pod, addedPod, nodeInfo.Node())
if err != nil {
return err
}
if len(podMatchingTerms) > 0 {
existingTerms, found := meta.matchingAntiAffinityTerms[addedPodFullName]
if found {
meta.matchingAntiAffinityTerms[addedPodFullName] = append(existingTerms,
podMatchingTerms...)
} else {
meta.matchingAntiAffinityTerms[addedPodFullName] = podMatchingTerms
}
}
meta.topologyPairsAntiAffinityPodsMap.appendMaps(topologyPairsMaps)
// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
affinity := meta.pod.Spec.Affinity
podNodeName := addedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
podNode := nodeInfo.Node()
// It is assumed that when the added pod matches affinity of the meta.pod, all the terms must match,
// this should be changed when the implementation of targetPodMatchesAffinityOfPod/podMatchesAffinityTermProperties
// is changed
if targetPodMatchesAffinityOfPod(meta.pod, addedPod) {
found := false
for _, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] {
if p == addedPod {
found = true
break
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
for _, term := range affinityTerms {
if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
meta.topologyPairsPotentialAffinityPods.addTopologyPair(pair, addedPod)
}
}
if !found {
meta.nodeNameToMatchingAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAffinityPods[podNodeName], addedPod)
}
}
if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) {
found := false
for _, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] {
if p == addedPod {
found = true
break
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
for _, term := range antiAffinityTerms {
if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
meta.topologyPairsPotentialAntiAffinityPods.addTopologyPair(pair, addedPod)
}
}
if !found {
meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAntiAffinityPods[podNodeName], addedPod)
}
}
}
// If addedPod is in the same namespace as the meta.pod, update the list
@ -268,18 +294,12 @@ func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata {
ignoredExtendedResources: meta.ignoredExtendedResources,
}
newPredMeta.podPorts = append([]*v1.ContainerPort(nil), meta.podPorts...)
newPredMeta.matchingAntiAffinityTerms = map[string][]matchingPodAntiAffinityTerm{}
for k, v := range meta.matchingAntiAffinityTerms {
newPredMeta.matchingAntiAffinityTerms[k] = append([]matchingPodAntiAffinityTerm(nil), v...)
}
newPredMeta.nodeNameToMatchingAffinityPods = make(map[string][]*v1.Pod)
for k, v := range meta.nodeNameToMatchingAffinityPods {
newPredMeta.nodeNameToMatchingAffinityPods[k] = append([]*v1.Pod(nil), v...)
}
newPredMeta.nodeNameToMatchingAntiAffinityPods = make(map[string][]*v1.Pod)
for k, v := range meta.nodeNameToMatchingAntiAffinityPods {
newPredMeta.nodeNameToMatchingAntiAffinityPods[k] = append([]*v1.Pod(nil), v...)
}
newPredMeta.topologyPairsPotentialAffinityPods = newTopologyPairsMaps()
newPredMeta.topologyPairsPotentialAffinityPods.appendMaps(meta.topologyPairsPotentialAffinityPods)
newPredMeta.topologyPairsPotentialAntiAffinityPods = newTopologyPairsMaps()
newPredMeta.topologyPairsPotentialAntiAffinityPods.appendMaps(meta.topologyPairsPotentialAntiAffinityPods)
newPredMeta.topologyPairsAntiAffinityPodsMap = newTopologyPairsMaps()
newPredMeta.topologyPairsAntiAffinityPodsMap.appendMaps(meta.topologyPairsAntiAffinityPodsMap)
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
meta.serviceAffinityMatchingPodServices...)
newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
@ -310,8 +330,8 @@ func getAffinityTermProperties(pod *v1.Pod, terms []v1.PodAffinityTerm) (propert
return properties, nil
}
// podMatchesAffinityTermProperties return true IFF the given pod matches all the given properties.
func podMatchesAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
// podMatchesAllAffinityTermProperties returns true IFF the given pod matches all the given properties.
func podMatchesAllAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
if len(properties) == 0 {
return false
}
@ -323,34 +343,93 @@ func podMatchesAffinityTermProperties(pod *v1.Pod, properties []*affinityTermPro
return true
}
// getPodsMatchingAffinity finds existing Pods that match affinity terms of the given "pod".
// It ignores topology. It returns a set of Pods that are checked later by the affinity
// predicate. With this set of pods available, the affinity predicate does not
// need to check all the pods in the cluster.
func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (affinityPods map[string][]*v1.Pod, antiAffinityPods map[string][]*v1.Pod, err error) {
allNodeNames := make([]string, 0, len(nodeInfoMap))
affinity := pod.Spec.Affinity
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return nil, nil, nil
// podMatchesAnyAffinityTermProperties returns true if the given pod matches any given property.
func podMatchesAnyAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
if len(properties) == 0 {
return false
}
for _, property := range properties {
if priorityutil.PodMatchesTermsNamespaceAndSelector(pod, property.namespaces, property.selector) {
return true
}
}
return false
}
// getTPMapMatchingExistingAntiAffinity calculates the following for each existing pod on each node:
// (1) Whether it has PodAntiAffinity
// (2) Whether any AffinityTerm matches the incoming pod
func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (*topologyPairsMaps, error) {
allNodeNames := make([]string, 0, len(nodeInfoMap))
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
}
var lock sync.Mutex
var firstError error
affinityPods = make(map[string][]*v1.Pod)
antiAffinityPods = make(map[string][]*v1.Pod)
appendResult := func(nodeName string, affPods, antiAffPods []*v1.Pod) {
topologyMaps := newTopologyPairsMaps()
appendTopologyPairsMaps := func(toAppend *topologyPairsMaps) {
lock.Lock()
defer lock.Unlock()
if len(affPods) > 0 {
affinityPods[nodeName] = affPods
topologyMaps.appendMaps(toAppend)
}
catchError := func(err error) {
lock.Lock()
defer lock.Unlock()
if firstError == nil {
firstError = err
}
if len(antiAffPods) > 0 {
antiAffinityPods[nodeName] = antiAffPods
}
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
node := nodeInfo.Node()
if node == nil {
catchError(fmt.Errorf("node not found"))
return
}
for _, existingPod := range nodeInfo.PodsWithAffinity() {
existingPodTopologyMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, existingPod, node)
if err != nil {
catchError(err)
return
}
appendTopologyPairsMaps(existingPodTopologyMaps)
}
}
workqueue.ParallelizeUntil(context.TODO(), 16, len(allNodeNames), processNode)
return topologyMaps, firstError
}
// getTPMapMatchingIncomingAffinityAntiAffinity finds existing Pods that match affinity terms of the given "pod".
// It returns a topologyPairsMaps that are checked later by the affinity
// predicate. With this topologyPairsMaps available, the affinity predicate does not
// need to check all the pods in the cluster.
func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (topologyPairsAffinityPodsMaps *topologyPairsMaps, topologyPairsAntiAffinityPodsMaps *topologyPairsMaps, err error) {
affinity := pod.Spec.Affinity
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return newTopologyPairsMaps(), newTopologyPairsMaps(), nil
}
allNodeNames := make([]string, 0, len(nodeInfoMap))
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
}
var lock sync.Mutex
var firstError error
topologyPairsAffinityPodsMaps = newTopologyPairsMaps()
topologyPairsAntiAffinityPodsMaps = newTopologyPairsMaps()
appendResult := func(nodeName string, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps *topologyPairsMaps) {
lock.Lock()
defer lock.Unlock()
if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 {
topologyPairsAffinityPodsMaps.appendMaps(nodeTopologyPairsAffinityPodsMaps)
}
if len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
topologyPairsAntiAffinityPodsMaps.appendMaps(nodeTopologyPairsAntiAffinityPodsMaps)
}
}
@ -362,14 +441,12 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache
}
}
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
if err != nil {
return nil, nil, err
}
antiAffinityProperties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
affinityProperties, err := getAffinityTermProperties(pod, affinityTerms)
if err != nil {
return nil, nil, err
}
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
@ -378,28 +455,44 @@ func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache
catchError(fmt.Errorf("nodeInfo.Node is nil"))
return
}
affPods := make([]*v1.Pod, 0, len(nodeInfo.Pods()))
antiAffPods := make([]*v1.Pod, 0, len(nodeInfo.Pods()))
nodeTopologyPairsAffinityPodsMaps := newTopologyPairsMaps()
nodeTopologyPairsAntiAffinityPodsMaps := newTopologyPairsMaps()
for _, existingPod := range nodeInfo.Pods() {
// Check affinity properties.
if podMatchesAffinityTermProperties(existingPod, affinityProperties) {
affPods = append(affPods, existingPod)
if podMatchesAllAffinityTermProperties(existingPod, affinityProperties) {
for _, term := range affinityTerms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
nodeTopologyPairsAffinityPodsMaps.addTopologyPair(pair, existingPod)
}
}
}
// Check anti-affinity properties.
if podMatchesAffinityTermProperties(existingPod, antiAffinityProperties) {
antiAffPods = append(antiAffPods, existingPod)
for _, term := range antiAffinityTerms {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
catchError(err)
return
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
nodeTopologyPairsAntiAffinityPodsMaps.addTopologyPair(pair, existingPod)
}
}
}
}
if len(antiAffPods) > 0 || len(affPods) > 0 {
appendResult(node.Name, affPods, antiAffPods)
if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 || len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
appendResult(node.Name, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps)
}
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
return affinityPods, antiAffinityPods, firstError
workqueue.ParallelizeUntil(context.TODO(), 16, len(allNodeNames), processNode)
return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, firstError
}
// podMatchesAffinity returns true if "targetPod" matches any affinity rule of
// "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
// targetPodMatchesAffinityOfPod returns true if "targetPod" matches ALL affinity terms of
// "pod". This function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAffinityOfPod(pod, targetPod *v1.Pod) bool {
@ -409,14 +502,14 @@ func targetPodMatchesAffinityOfPod(pod, targetPod *v1.Pod) bool {
}
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
if err != nil {
glog.Errorf("error in getting affinity properties of Pod %v", pod.Name)
klog.Errorf("error in getting affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAffinityTermProperties(targetPod, affinityProperties)
return podMatchesAllAffinityTermProperties(targetPod, affinityProperties)
}
// targetPodMatchesAntiAffinityOfPod returns true if "targetPod" matches any anti-affinity
// rule of "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
// targetPodMatchesAntiAffinityOfPod returns true if "targetPod" matches ANY anti-affinity
// term of "pod". This function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAntiAffinityOfPod(pod, targetPod *v1.Pod) bool {
@ -426,8 +519,8 @@ func targetPodMatchesAntiAffinityOfPod(pod, targetPod *v1.Pod) bool {
}
properties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
if err != nil {
glog.Errorf("error in getting anti-affinity properties of Pod %v", pod.Name)
klog.Errorf("error in getting anti-affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAffinityTermProperties(targetPod, properties)
return podMatchesAnyAffinityTermProperties(targetPod, properties)
}

View File

@ -28,42 +28,6 @@ import (
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
// sortableAntiAffinityTerms lets us to sort anti-affinity terms.
type sortableAntiAffinityTerms []matchingPodAntiAffinityTerm
// Less establishes some ordering between two matchingPodAntiAffinityTerms for
// sorting.
func (s sortableAntiAffinityTerms) Less(i, j int) bool {
t1, t2 := s[i], s[j]
if t1.node.Name != t2.node.Name {
return t1.node.Name < t2.node.Name
}
if len(t1.term.Namespaces) != len(t2.term.Namespaces) {
return len(t1.term.Namespaces) < len(t2.term.Namespaces)
}
if t1.term.TopologyKey != t2.term.TopologyKey {
return t1.term.TopologyKey < t2.term.TopologyKey
}
if len(t1.term.LabelSelector.MatchLabels) != len(t2.term.LabelSelector.MatchLabels) {
return len(t1.term.LabelSelector.MatchLabels) < len(t2.term.LabelSelector.MatchLabels)
}
return false
}
func (s sortableAntiAffinityTerms) Len() int { return len(s) }
func (s sortableAntiAffinityTerms) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
var _ = sort.Interface(sortableAntiAffinityTerms{})
func sortAntiAffinityTerms(terms map[string][]matchingPodAntiAffinityTerm) {
for k, v := range terms {
sortableTerms := sortableAntiAffinityTerms(v)
sort.Sort(sortableTerms)
terms[k] = sortableTerms
}
}
// sortablePods lets us to sort pods.
type sortablePods []*v1.Pod
@ -88,13 +52,6 @@ func (s sortableServices) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
var _ = sort.Interface(&sortableServices{})
func sortNodePodMap(np map[string][]*v1.Pod) {
for _, pl := range np {
sortablePods := sortablePods(pl)
sort.Sort(sortablePods)
}
}
// predicateMetadataEquivalent returns true if the two metadata are equivalent.
// Note: this function does not compare podRequest.
func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
@ -113,20 +70,19 @@ func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
for !reflect.DeepEqual(meta1.podPorts, meta2.podPorts) {
return fmt.Errorf("podPorts are not equal")
}
sortAntiAffinityTerms(meta1.matchingAntiAffinityTerms)
sortAntiAffinityTerms(meta2.matchingAntiAffinityTerms)
if !reflect.DeepEqual(meta1.matchingAntiAffinityTerms, meta2.matchingAntiAffinityTerms) {
return fmt.Errorf("matchingAntiAffinityTerms are not euqal")
if !reflect.DeepEqual(meta1.topologyPairsPotentialAffinityPods, meta2.topologyPairsPotentialAffinityPods) {
return fmt.Errorf("topologyPairsPotentialAffinityPods are not equal")
}
sortNodePodMap(meta1.nodeNameToMatchingAffinityPods)
sortNodePodMap(meta2.nodeNameToMatchingAffinityPods)
if !reflect.DeepEqual(meta1.nodeNameToMatchingAffinityPods, meta2.nodeNameToMatchingAffinityPods) {
return fmt.Errorf("nodeNameToMatchingAffinityPods are not euqal")
if !reflect.DeepEqual(meta1.topologyPairsPotentialAntiAffinityPods, meta2.topologyPairsPotentialAntiAffinityPods) {
return fmt.Errorf("topologyPairsPotentialAntiAffinityPods are not equal")
}
sortNodePodMap(meta1.nodeNameToMatchingAntiAffinityPods)
sortNodePodMap(meta2.nodeNameToMatchingAntiAffinityPods)
if !reflect.DeepEqual(meta1.nodeNameToMatchingAntiAffinityPods, meta2.nodeNameToMatchingAntiAffinityPods) {
return fmt.Errorf("nodeNameToMatchingAntiAffinityPods are not euqal")
if !reflect.DeepEqual(meta1.topologyPairsAntiAffinityPodsMap.podToTopologyPairs,
meta2.topologyPairsAntiAffinityPodsMap.podToTopologyPairs) {
return fmt.Errorf("topologyPairsAntiAffinityPodsMap.podToTopologyPairs are not equal")
}
if !reflect.DeepEqual(meta1.topologyPairsAntiAffinityPodsMap.topologyPairToPods,
meta2.topologyPairsAntiAffinityPodsMap.topologyPairToPods) {
return fmt.Errorf("topologyPairsAntiAffinityPodsMap.topologyPairToPods are not equal")
}
if meta1.serviceAffinityInUse {
sortablePods1 := sortablePods(meta1.serviceAffinityMatchingPodList)
@ -236,7 +192,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
}
tests := []struct {
description string
name string
pendingPod *v1.Pod
addedPod *v1.Pod
existingPods []*v1.Pod
@ -244,7 +200,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
services []*v1.Service
}{
{
description: "no anti-affinity or service affinity exist",
name: "no anti-affinity or service affinity exist",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
@ -267,7 +223,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
},
},
{
description: "metadata anti-affinity terms are updated correctly after adding and removing a pod",
name: "metadata anti-affinity terms are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
@ -300,7 +256,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
},
},
{
description: "metadata service-affinity data are updated correctly after adding and removing a pod",
name: "metadata service-affinity data are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
@ -324,7 +280,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
},
},
{
description: "metadata anti-affinity terms and service affinity data are updated correctly after adding and removing a pod",
name: "metadata anti-affinity terms and service affinity data are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
@ -358,7 +314,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
},
},
{
description: "metadata matching pod affinity and anti-affinity are updated correctly after adding and removing a pod",
name: "metadata matching pod affinity and anti-affinity are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
@ -395,44 +351,46 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
}
for _, test := range tests {
allPodLister := schedulertesting.FakePodLister(append(test.existingPods, test.addedPod))
// getMeta creates predicate meta data given the list of pods.
getMeta := func(lister schedulertesting.FakePodLister) (*predicateMetadata, map[string]*schedulercache.NodeInfo) {
nodeInfoMap := schedulercache.CreateNodeNameToInfoMap(lister, test.nodes)
// nodeList is a list of non-pointer nodes to feed to FakeNodeListInfo.
nodeList := []v1.Node{}
for _, n := range test.nodes {
nodeList = append(nodeList, *n)
t.Run(test.name, func(t *testing.T) {
allPodLister := schedulertesting.FakePodLister(append(test.existingPods, test.addedPod))
// getMeta creates predicate meta data given the list of pods.
getMeta := func(lister schedulertesting.FakePodLister) (*predicateMetadata, map[string]*schedulercache.NodeInfo) {
nodeInfoMap := schedulercache.CreateNodeNameToInfoMap(lister, test.nodes)
// nodeList is a list of non-pointer nodes to feed to FakeNodeListInfo.
nodeList := []v1.Node{}
for _, n := range test.nodes {
nodeList = append(nodeList, *n)
}
_, precompute := NewServiceAffinityPredicate(lister, schedulertesting.FakeServiceLister(test.services), FakeNodeListInfo(nodeList), nil)
RegisterPredicateMetadataProducer("ServiceAffinityMetaProducer", precompute)
pmf := PredicateMetadataFactory{lister}
meta := pmf.GetMetadata(test.pendingPod, nodeInfoMap)
return meta.(*predicateMetadata), nodeInfoMap
}
_, precompute := NewServiceAffinityPredicate(lister, schedulertesting.FakeServiceLister(test.services), FakeNodeListInfo(nodeList), nil)
RegisterPredicateMetadataProducer("ServiceAffinityMetaProducer", precompute)
pmf := PredicateMetadataFactory{lister}
meta := pmf.GetMetadata(test.pendingPod, nodeInfoMap)
return meta.(*predicateMetadata), nodeInfoMap
}
// allPodsMeta is meta data produced when all pods, including test.addedPod
// are given to the metadata producer.
allPodsMeta, _ := getMeta(allPodLister)
// existingPodsMeta1 is meta data produced for test.existingPods (without test.addedPod).
existingPodsMeta1, nodeInfoMap := getMeta(schedulertesting.FakePodLister(test.existingPods))
// Add test.addedPod to existingPodsMeta1 and make sure meta is equal to allPodsMeta
nodeInfo := nodeInfoMap[test.addedPod.Spec.NodeName]
if err := existingPodsMeta1.AddPod(test.addedPod, nodeInfo); err != nil {
t.Errorf("test [%v]: error adding pod to meta: %v", test.description, err)
}
if err := predicateMetadataEquivalent(allPodsMeta, existingPodsMeta1); err != nil {
t.Errorf("test [%v]: meta data are not equivalent: %v", test.description, err)
}
// Remove the added pod and from existingPodsMeta1 an make sure it is equal
// to meta generated for existing pods.
existingPodsMeta2, _ := getMeta(schedulertesting.FakePodLister(test.existingPods))
if err := existingPodsMeta1.RemovePod(test.addedPod); err != nil {
t.Errorf("test [%v]: error removing pod from meta: %v", test.description, err)
}
if err := predicateMetadataEquivalent(existingPodsMeta1, existingPodsMeta2); err != nil {
t.Errorf("test [%v]: meta data are not equivalent: %v", test.description, err)
}
// allPodsMeta is meta data produced when all pods, including test.addedPod
// are given to the metadata producer.
allPodsMeta, _ := getMeta(allPodLister)
// existingPodsMeta1 is meta data produced for test.existingPods (without test.addedPod).
existingPodsMeta1, nodeInfoMap := getMeta(schedulertesting.FakePodLister(test.existingPods))
// Add test.addedPod to existingPodsMeta1 and make sure meta is equal to allPodsMeta
nodeInfo := nodeInfoMap[test.addedPod.Spec.NodeName]
if err := existingPodsMeta1.AddPod(test.addedPod, nodeInfo); err != nil {
t.Errorf("error adding pod to meta: %v", err)
}
if err := predicateMetadataEquivalent(allPodsMeta, existingPodsMeta1); err != nil {
t.Errorf("meta data are not equivalent: %v", err)
}
// Remove the added pod and from existingPodsMeta1 an make sure it is equal
// to meta generated for existing pods.
existingPodsMeta2, _ := getMeta(schedulertesting.FakePodLister(test.existingPods))
if err := existingPodsMeta1.RemovePod(test.addedPod); err != nil {
t.Errorf("error removing pod from meta: %v", err)
}
if err := predicateMetadataEquivalent(existingPodsMeta1, existingPodsMeta2); err != nil {
t.Errorf("meta data are not equivalent: %v", err)
}
})
}
}
@ -463,52 +421,93 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) {
HostIP: "1.2.3.4",
},
},
matchingAntiAffinityTerms: map[string][]matchingPodAntiAffinityTerm{
"term1": {
{
term: &v1.PodAffinityTerm{TopologyKey: "node"},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
},
topologyPairsAntiAffinityPodsMap: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "name", value: "machine1"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeC"},
}: struct{}{},
},
{key: "name", value: "machine2"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
}: struct{}{},
},
},
podToTopologyPairs: map[string]topologyPairSet{
"p2_": {
topologyPair{key: "name", value: "machine1"}: struct{}{},
},
"p1_": {
topologyPair{key: "name", value: "machine2"}: struct{}{},
},
},
},
nodeNameToMatchingAffinityPods: map[string][]*v1.Pod{
"nodeA": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
topologyPairsPotentialAffinityPods: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "name", value: "nodeA"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
}: struct{}{},
},
{key: "name", value: "nodeC"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
},
}: struct{}{},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeC"},
}: struct{}{},
},
},
"nodeC": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
},
podToTopologyPairs: map[string]topologyPairSet{
"p1_": {
topologyPair{key: "name", value: "nodeA"}: struct{}{},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeC"},
"p2_": {
topologyPair{key: "name", value: "nodeC"}: struct{}{},
},
"p6_": {
topologyPair{key: "name", value: "nodeC"}: struct{}{},
},
},
},
nodeNameToMatchingAntiAffinityPods: map[string][]*v1.Pod{
"nodeN": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeN"},
topologyPairsPotentialAntiAffinityPods: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "name", value: "nodeN"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeN"},
}: struct{}{},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
}: struct{}{},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
}: struct{}{},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
{key: "name", value: "nodeM"}: {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeM"},
}: struct{}{},
},
},
"nodeM": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeM"},
podToTopologyPairs: map[string]topologyPairSet{
"p1_": {
topologyPair{key: "name", value: "nodeN"}: struct{}{},
},
"p2_": {
topologyPair{key: "name", value: "nodeN"}: struct{}{},
},
"p3_": {
topologyPair{key: "name", value: "nodeN"}: struct{}{},
},
"p6_": {
topologyPair{key: "name", value: "nodeM"}: struct{}{},
},
},
},
@ -526,3 +525,269 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) {
t.Errorf("Copy is not equal to source!")
}
}
// TestGetTPMapMatchingIncomingAffinityAntiAffinity tests against method getTPMapMatchingIncomingAffinityAntiAffinity
// on Anti Affinity cases
func TestGetTPMapMatchingIncomingAffinityAntiAffinity(t *testing.T) {
newPodAffinityTerms := func(keys ...string) []v1.PodAffinityTerm {
var terms []v1.PodAffinityTerm
for _, key := range keys {
terms = append(terms, v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: key,
Operator: metav1.LabelSelectorOpExists,
},
},
},
TopologyKey: "hostname",
})
}
return terms
}
newPod := func(labels ...string) *v1.Pod {
labelMap := make(map[string]string)
for _, l := range labels {
labelMap[l] = ""
}
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "normal", Labels: labelMap},
Spec: v1.PodSpec{NodeName: "nodeA"},
}
}
normalPodA := newPod("aaa")
normalPodB := newPod("bbb")
normalPodAB := newPod("aaa", "bbb")
nodeA := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"hostname": "nodeA"}}}
tests := []struct {
name string
existingPods []*v1.Pod
nodes []*v1.Node
pod *v1.Pod
wantAffinityPodsMaps *topologyPairsMaps
wantAntiAffinityPodsMaps *topologyPairsMaps
wantErr bool
}{
{
name: "nil test",
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "aaa-normal"},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: newTopologyPairsMaps(),
},
{
name: "incoming pod without affinity/anti-affinity causes a no-op",
existingPods: []*v1.Pod{normalPodA},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "aaa-normal"},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: newTopologyPairsMaps(),
},
{
name: "no pod has label that violates incoming pod's affinity and anti-affinity",
existingPods: []*v1.Pod{normalPodB},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "aaa-anti"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"),
},
},
},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: newTopologyPairsMaps(),
},
{
name: "existing pod matches incoming pod's affinity and anti-affinity - single term case",
existingPods: []*v1.Pod{normalPodA},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"),
},
},
},
},
wantAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodA: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
wantAntiAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodA: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
},
{
name: "existing pod matches incoming pod's affinity and anti-affinity - mutiple terms case",
existingPods: []*v1.Pod{normalPodAB},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa"),
},
},
},
},
wantAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodAB: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
wantAntiAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodAB: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
},
{
name: "existing pod not match incoming pod's affinity but matches anti-affinity",
existingPods: []*v1.Pod{normalPodA},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"),
},
},
},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodA: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
},
{
name: "incoming pod's anti-affinity has more than one term - existing pod violates partial term - case 1",
existingPods: []*v1.Pod{normalPodAB},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "anaffi-antiaffiti"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "ccc"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "ccc"),
},
},
},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodAB: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
},
{
name: "incoming pod's anti-affinity has more than one term - existing pod violates partial term - case 2",
existingPods: []*v1.Pod{normalPodB},
nodes: []*v1.Node{nodeA},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "affi-antiaffi"},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"),
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: newPodAffinityTerms("aaa", "bbb"),
},
},
},
},
wantAffinityPodsMaps: newTopologyPairsMaps(),
wantAntiAffinityPodsMaps: &topologyPairsMaps{
topologyPairToPods: map[topologyPair]podSet{
{key: "hostname", value: "nodeA"}: {normalPodB: struct{}{}},
},
podToTopologyPairs: map[string]topologyPairSet{
"normal_": {
topologyPair{key: "hostname", value: "nodeA"}: struct{}{},
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
nodeInfoMap := schedulercache.CreateNodeNameToInfoMap(tt.existingPods, tt.nodes)
gotAffinityPodsMaps, gotAntiAffinityPodsMaps, err := getTPMapMatchingIncomingAffinityAntiAffinity(tt.pod, nodeInfoMap)
if (err != nil) != tt.wantErr {
t.Errorf("getTPMapMatchingIncomingAffinityAntiAffinity() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(gotAffinityPodsMaps, tt.wantAffinityPodsMaps) {
t.Errorf("getTPMapMatchingIncomingAffinityAntiAffinity() gotAffinityPodsMaps = %#v, want %#v", gotAffinityPodsMaps, tt.wantAffinityPodsMaps)
}
if !reflect.DeepEqual(gotAntiAffinityPodsMaps, tt.wantAntiAffinityPodsMaps) {
t.Errorf("getTPMapMatchingIncomingAffinityAntiAffinity() gotAntiAffinityPodsMaps = %#v, want %#v", gotAntiAffinityPodsMaps, tt.wantAntiAffinityPodsMaps)
}
})
}
}

View File

@ -20,10 +20,10 @@ import (
"errors"
"fmt"
"os"
"regexp"
"strconv"
"sync"
"github.com/golang/glog"
"k8s.io/klog"
"k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
@ -36,13 +36,13 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
corelisters "k8s.io/client-go/listers/core/v1"
storagelisters "k8s.io/client-go/listers/storage/v1"
"k8s.io/client-go/util/workqueue"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
@ -84,6 +84,8 @@ const (
MaxGCEPDVolumeCountPred = "MaxGCEPDVolumeCount"
// MaxAzureDiskVolumeCountPred defines the name of predicate MaxAzureDiskVolumeCount.
MaxAzureDiskVolumeCountPred = "MaxAzureDiskVolumeCount"
// MaxCSIVolumeCountPred defines the predicate that decides how many CSI volumes should be attached
MaxCSIVolumeCountPred = "MaxCSIVolumeCountPred"
// NoVolumeZoneConflictPred defines the name of predicate NoVolumeZoneConflict.
NoVolumeZoneConflictPred = "NoVolumeZoneConflict"
// CheckNodeMemoryPressurePred defines the name of predicate CheckNodeMemoryPressure.
@ -93,10 +95,6 @@ const (
// CheckNodePIDPressurePred defines the name of predicate CheckNodePIDPressure.
CheckNodePIDPressurePred = "CheckNodePIDPressure"
// DefaultMaxEBSVolumes is the limit for volumes attached to an instance.
// Amazon recommends no more than 40; the system root volume uses at least one.
// See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/volume_limits.html#linux-specific-volume-limits
DefaultMaxEBSVolumes = 39
// DefaultMaxGCEPDVolumes defines the maximum number of PD Volumes for GCE
// GCE instances can have up to 16 PD volumes attached.
DefaultMaxGCEPDVolumes = 16
@ -134,7 +132,7 @@ var (
GeneralPred, HostNamePred, PodFitsHostPortsPred,
MatchNodeSelectorPred, PodFitsResourcesPred, NoDiskConflictPred,
PodToleratesNodeTaintsPred, PodToleratesNodeNoExecuteTaintsPred, CheckNodeLabelPresencePred,
CheckServiceAffinityPred, MaxEBSVolumeCountPred, MaxGCEPDVolumeCountPred,
CheckServiceAffinityPred, MaxEBSVolumeCountPred, MaxGCEPDVolumeCountPred, MaxCSIVolumeCountPred,
MaxAzureDiskVolumeCountPred, CheckVolumeBindingPred, NoVolumeZoneConflictPred,
CheckNodeMemoryPressurePred, CheckNodePIDPressurePred, CheckNodeDiskPressurePred, MatchInterPodAffinityPred}
)
@ -291,7 +289,7 @@ func NoDiskConflict(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *sch
type MaxPDVolumeCountChecker struct {
filter VolumeFilter
volumeLimitKey v1.ResourceName
maxVolumes int
maxVolumeFunc func(node *v1.Node) int
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
@ -317,7 +315,6 @@ type VolumeFilter struct {
func NewMaxPDVolumeCountPredicate(
filterName string, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
var filter VolumeFilter
var maxVolumes int
var volumeLimitKey v1.ResourceName
switch filterName {
@ -325,17 +322,14 @@ func NewMaxPDVolumeCountPredicate(
case EBSVolumeFilterType:
filter = EBSVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.EBSVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxEBSVolumes)
case GCEPDVolumeFilterType:
filter = GCEPDVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.GCEVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxGCEPDVolumes)
case AzureDiskVolumeFilterType:
filter = AzureDiskVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.AzureVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxAzureDiskVolumes)
default:
glog.Fatalf("Wrong filterName, Only Support %v %v %v ", EBSVolumeFilterType,
klog.Fatalf("Wrong filterName, Only Support %v %v %v ", EBSVolumeFilterType,
GCEPDVolumeFilterType, AzureDiskVolumeFilterType)
return nil
@ -343,7 +337,7 @@ func NewMaxPDVolumeCountPredicate(
c := &MaxPDVolumeCountChecker{
filter: filter,
volumeLimitKey: volumeLimitKey,
maxVolumes: maxVolumes,
maxVolumeFunc: getMaxVolumeFunc(filterName),
pvInfo: pvInfo,
pvcInfo: pvcInfo,
randomVolumeIDPrefix: rand.String(32),
@ -352,19 +346,52 @@ func NewMaxPDVolumeCountPredicate(
return c.predicate
}
// getMaxVols checks the max PD volumes environment variable, otherwise returning a default value
func getMaxVols(defaultVal int) int {
func getMaxVolumeFunc(filterName string) func(node *v1.Node) int {
return func(node *v1.Node) int {
maxVolumesFromEnv := getMaxVolLimitFromEnv()
if maxVolumesFromEnv > 0 {
return maxVolumesFromEnv
}
var nodeInstanceType string
for k, v := range node.ObjectMeta.Labels {
if k == kubeletapis.LabelInstanceType {
nodeInstanceType = v
}
}
switch filterName {
case EBSVolumeFilterType:
return getMaxEBSVolume(nodeInstanceType)
case GCEPDVolumeFilterType:
return DefaultMaxGCEPDVolumes
case AzureDiskVolumeFilterType:
return DefaultMaxAzureDiskVolumes
default:
return -1
}
}
}
func getMaxEBSVolume(nodeInstanceType string) int {
if ok, _ := regexp.MatchString(volumeutil.EBSNitroLimitRegex, nodeInstanceType); ok {
return volumeutil.DefaultMaxEBSNitroVolumeLimit
}
return volumeutil.DefaultMaxEBSVolumes
}
// getMaxVolLimitFromEnv checks the max PD volumes environment variable, otherwise returning a default value
func getMaxVolLimitFromEnv() int {
if rawMaxVols := os.Getenv(KubeMaxPDVols); rawMaxVols != "" {
if parsedMaxVols, err := strconv.Atoi(rawMaxVols); err != nil {
glog.Errorf("Unable to parse maximum PD volumes value, using default of %v: %v", defaultVal, err)
klog.Errorf("Unable to parse maximum PD volumes value, using default: %v", err)
} else if parsedMaxVols <= 0 {
glog.Errorf("Maximum PD volumes must be a positive value, using default of %v", defaultVal)
klog.Errorf("Maximum PD volumes must be a positive value, using default ")
} else {
return parsedMaxVols
}
}
return defaultVal
return -1
}
func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, filteredVolumes map[string]bool) error {
@ -386,7 +413,7 @@ func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace s
pvc, err := c.pvcInfo.GetPersistentVolumeClaimInfo(namespace, pvcName)
if err != nil || pvc == nil {
// if the PVC is not found, log the error and count the PV towards the PV limit
glog.V(4).Infof("Unable to look up PVC info for %s/%s, assuming PVC matches predicate when counting limits: %v", namespace, pvcName, err)
klog.V(4).Infof("Unable to look up PVC info for %s/%s, assuming PVC matches predicate when counting limits: %v", namespace, pvcName, err)
filteredVolumes[pvID] = true
continue
}
@ -397,7 +424,7 @@ func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace s
// it was forcefully unbound by admin. The pod can still use the
// original PV where it was bound to -> log the error and count
// the PV towards the PV limit
glog.V(4).Infof("PVC %s/%s is not bound, assuming PVC matches predicate when counting limits", namespace, pvcName)
klog.V(4).Infof("PVC %s/%s is not bound, assuming PVC matches predicate when counting limits", namespace, pvcName)
filteredVolumes[pvID] = true
continue
}
@ -406,7 +433,7 @@ func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace s
if err != nil || pv == nil {
// if the PV is not found, log the error
// and count the PV towards the PV limit
glog.V(4).Infof("Unable to look up PV info for %s/%s/%s, assuming PV matches predicate when counting limits: %v", namespace, pvcName, pvName, err)
klog.V(4).Infof("Unable to look up PV info for %s/%s/%s, assuming PV matches predicate when counting limits: %v", namespace, pvcName, pvName, err)
filteredVolumes[pvID] = true
continue
}
@ -454,7 +481,7 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *v1.Pod, meta algorithm.Predicat
}
numNewVolumes := len(newVolumes)
maxAttachLimit := c.maxVolumes
maxAttachLimit := c.maxVolumeFunc(nodeInfo.Node())
if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
volumeLimits := nodeInfo.VolumeLimits()
@ -638,12 +665,12 @@ func (c *VolumeZoneChecker) predicate(pod *v1.Pod, meta algorithm.PredicateMetad
nodeV, _ := nodeConstraints[k]
volumeVSet, err := volumeutil.LabelZonesToSet(v)
if err != nil {
glog.Warningf("Failed to parse label for %q: %q. Ignoring the label. err=%v. ", k, v, err)
klog.Warningf("Failed to parse label for %q: %q. Ignoring the label. err=%v. ", k, v, err)
continue
}
if !volumeVSet.Has(nodeV) {
glog.V(10).Infof("Won't schedule pod %q onto node %q due to volume %q (mismatch on %q)", pod.Name, node.Name, pvName, k)
klog.V(10).Infof("Won't schedule pod %q onto node %q due to volume %q (mismatch on %q)", pod.Name, node.Name, pvName, k)
return false, []algorithm.PredicateFailureReason{ErrVolumeZoneConflict}, nil
}
}
@ -754,11 +781,11 @@ func PodFitsResources(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *s
}
}
if glog.V(10) {
if klog.V(10) {
if len(predicateFails) == 0 {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// We explicitly don't do klog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.",
klog.Infof("Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods.",
podName(pod), node.Name, len(nodeInfo.Pods()), allowedPodNumber)
}
}
@ -807,14 +834,14 @@ func podMatchesNodeSelectorAndAffinityTerms(pod *v1.Pod, node *v1.Node) bool {
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
// if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution != nil {
// nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution.NodeSelectorTerms
// glog.V(10).Infof("Match for RequiredDuringSchedulingRequiredDuringExecution node selector terms %+v", nodeSelectorTerms)
// klog.V(10).Infof("Match for RequiredDuringSchedulingRequiredDuringExecution node selector terms %+v", nodeSelectorTerms)
// nodeAffinityMatches = nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms)
// }
// Match node selector for requiredDuringSchedulingIgnoredDuringExecution.
if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
glog.V(10).Infof("Match for RequiredDuringSchedulingIgnoredDuringExecution node selector terms %+v", nodeSelectorTerms)
klog.V(10).Infof("Match for RequiredDuringSchedulingIgnoredDuringExecution node selector terms %+v", nodeSelectorTerms)
nodeAffinityMatches = nodeAffinityMatches && nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms)
}
@ -906,7 +933,7 @@ type ServiceAffinity struct {
// only should be referenced by NewServiceAffinityPredicate.
func (s *ServiceAffinity) serviceAffinityMetadataProducer(pm *predicateMetadata) {
if pm.pod == nil {
glog.Errorf("Cannot precompute service affinity, a pod is required to calculate service affinity.")
klog.Errorf("Cannot precompute service affinity, a pod is required to calculate service affinity.")
return
}
pm.serviceAffinityInUse = true
@ -918,7 +945,7 @@ func (s *ServiceAffinity) serviceAffinityMetadataProducer(pm *predicateMetadata)
// In the future maybe we will return them as part of the function.
if errSvc != nil || errList != nil {
glog.Errorf("Some Error were found while precomputing svc affinity: \nservices:%v , \npods:%v", errSvc, errList)
klog.Errorf("Some Error were found while precomputing svc affinity: \nservices:%v , \npods:%v", errSvc, errList)
}
// consider only the pods that belong to the same namespace
pm.serviceAffinityMatchingPodList = FilterPodsByNamespace(allMatches, pm.pod.Namespace)
@ -1145,10 +1172,10 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
return false, failedPredicates, error
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
if klog.V(10) {
// We explicitly don't do klog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("Schedule Pod %+v on Node %+v is allowed, pod (anti)affinity constraints satisfied",
klog.Infof("Schedule Pod %+v on Node %+v is allowed, pod (anti)affinity constraints satisfied",
podName(pod), node.Name)
}
return true, nil, nil
@ -1159,7 +1186,7 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
// targetPod matches all the terms and their topologies, 2) whether targetPod
// matches all the terms label selector and namespaces (AKA term properties),
// 3) any error.
func (c *PodAffinityChecker) podMatchesPodAffinityTerms(pod *v1.Pod, targetPod *v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, bool, error) {
func (c *PodAffinityChecker) podMatchesPodAffinityTerms(pod, targetPod *v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, bool, error) {
if len(terms) == 0 {
return false, false, fmt.Errorf("terms array is empty")
}
@ -1167,7 +1194,7 @@ func (c *PodAffinityChecker) podMatchesPodAffinityTerms(pod *v1.Pod, targetPod *
if err != nil {
return false, false, err
}
if !podMatchesAffinityTermProperties(targetPod, props) {
if !podMatchesAllAffinityTermProperties(targetPod, props) {
return false, false, nil
}
// Namespace and selector of the terms have matched. Now we check topology of the terms.
@ -1214,186 +1241,129 @@ func GetPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.Po
return terms
}
func getMatchingAntiAffinityTerms(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (map[string][]matchingPodAntiAffinityTerm, error) {
allNodeNames := make([]string, 0, len(nodeInfoMap))
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
// getMatchingAntiAffinityTopologyPairs calculates the following for "existingPod" on given node:
// (1) Whether it has PodAntiAffinity
// (2) Whether ANY AffinityTerm matches the incoming pod
func getMatchingAntiAffinityTopologyPairsOfPod(newPod *v1.Pod, existingPod *v1.Pod, node *v1.Node) (*topologyPairsMaps, error) {
affinity := existingPod.Spec.Affinity
if affinity == nil || affinity.PodAntiAffinity == nil {
return nil, nil
}
var lock sync.Mutex
var firstError error
result := make(map[string][]matchingPodAntiAffinityTerm)
appendResult := func(toAppend map[string][]matchingPodAntiAffinityTerm) {
lock.Lock()
defer lock.Unlock()
for uid, terms := range toAppend {
result[uid] = append(result[uid], terms...)
}
}
catchError := func(err error) {
lock.Lock()
defer lock.Unlock()
if firstError == nil {
firstError = err
topologyMaps := newTopologyPairsMaps()
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
return nil, err
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(newPod, namespaces, selector) {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
topologyMaps.addTopologyPair(pair, existingPod)
}
}
}
return topologyMaps, nil
}
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
node := nodeInfo.Node()
if node == nil {
catchError(fmt.Errorf("node not found"))
return
}
nodeResult := make(map[string][]matchingPodAntiAffinityTerm)
for _, existingPod := range nodeInfo.PodsWithAffinity() {
affinity := existingPod.Spec.Affinity
if affinity == nil {
func (c *PodAffinityChecker) getMatchingAntiAffinityTopologyPairsOfPods(pod *v1.Pod, existingPods []*v1.Pod) (*topologyPairsMaps, error) {
topologyMaps := newTopologyPairsMaps()
for _, existingPod := range existingPods {
existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
if apierrors.IsNotFound(err) {
klog.Errorf("Node not found, %v", existingPod.Spec.NodeName)
continue
}
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
catchError(err)
return
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector) {
existingPodFullName := schedutil.GetPodFullName(existingPod)
nodeResult[existingPodFullName] = append(
nodeResult[existingPodFullName],
matchingPodAntiAffinityTerm{term: &term, node: node})
}
}
return nil, err
}
if len(nodeResult) > 0 {
appendResult(nodeResult)
existingPodTopologyMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, existingPod, existingPodNode)
if err != nil {
return nil, err
}
topologyMaps.appendMaps(existingPodTopologyMaps)
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
return result, firstError
}
func getMatchingAntiAffinityTermsOfExistingPod(newPod *v1.Pod, existingPod *v1.Pod, node *v1.Node) ([]matchingPodAntiAffinityTerm, error) {
var result []matchingPodAntiAffinityTerm
affinity := existingPod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil {
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
return nil, err
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(newPod, namespaces, selector) {
result = append(result, matchingPodAntiAffinityTerm{term: &term, node: node})
}
}
}
return result, nil
}
func (c *PodAffinityChecker) getMatchingAntiAffinityTerms(pod *v1.Pod, allPods []*v1.Pod) (map[string][]matchingPodAntiAffinityTerm, error) {
result := make(map[string][]matchingPodAntiAffinityTerm)
for _, existingPod := range allPods {
affinity := existingPod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil {
existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
if apierrors.IsNotFound(err) {
glog.Errorf("Node not found, %v", existingPod.Spec.NodeName)
continue
}
return nil, err
}
existingPodMatchingTerms, err := getMatchingAntiAffinityTermsOfExistingPod(pod, existingPod, existingPodNode)
if err != nil {
return nil, err
}
if len(existingPodMatchingTerms) > 0 {
existingPodFullName := schedutil.GetPodFullName(existingPod)
result[existingPodFullName] = existingPodMatchingTerms
}
}
}
return result, nil
return topologyMaps, nil
}
// Checks if scheduling the pod onto this node would break any anti-affinity
// rules indicated by the existing pods.
// terms indicated by the existing pods.
func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (algorithm.PredicateFailureReason, error) {
node := nodeInfo.Node()
if node == nil {
return ErrExistingPodsAntiAffinityRulesNotMatch, fmt.Errorf("Node is nil")
}
var matchingTerms map[string][]matchingPodAntiAffinityTerm
var topologyMaps *topologyPairsMaps
if predicateMeta, ok := meta.(*predicateMetadata); ok {
matchingTerms = predicateMeta.matchingAntiAffinityTerms
topologyMaps = predicateMeta.topologyPairsAntiAffinityPodsMap
} else {
// Filter out pods whose nodeName is equal to nodeInfo.node.Name, but are not
// present in nodeInfo. Pods on other nodes pass the filter.
filteredPods, err := c.podLister.FilteredList(nodeInfo.Filter, labels.Everything())
if err != nil {
errMessage := fmt.Sprintf("Failed to get all pods, %+v", err)
glog.Error(errMessage)
klog.Error(errMessage)
return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
}
if matchingTerms, err = c.getMatchingAntiAffinityTerms(pod, filteredPods); err != nil {
if topologyMaps, err = c.getMatchingAntiAffinityTopologyPairsOfPods(pod, filteredPods); err != nil {
errMessage := fmt.Sprintf("Failed to get all terms that pod %+v matches, err: %+v", podName(pod), err)
glog.Error(errMessage)
klog.Error(errMessage)
return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
}
}
for _, terms := range matchingTerms {
for i := range terms {
term := &terms[i]
if len(term.term.TopologyKey) == 0 {
errMessage := fmt.Sprintf("Empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
glog.Error(errMessage)
return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
}
if priorityutil.NodesHaveSameTopologyKey(node, term.node, term.term.TopologyKey) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v,because of PodAntiAffinityTerm %v",
podName(pod), node.Name, term.term)
return ErrExistingPodsAntiAffinityRulesNotMatch, nil
}
// Iterate over topology pairs to get any of the pods being affected by
// the scheduled pod anti-affinity terms
for topologyKey, topologyValue := range node.Labels {
if topologyMaps.topologyPairToPods[topologyPair{key: topologyKey, value: topologyValue}] != nil {
klog.V(10).Infof("Cannot schedule pod %+v onto node %v", podName(pod), node.Name)
return ErrExistingPodsAntiAffinityRulesNotMatch, nil
}
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
if klog.V(10) {
// We explicitly don't do klog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("Schedule Pod %+v on Node %+v is allowed, existing pods anti-affinity rules satisfied.",
klog.Infof("Schedule Pod %+v on Node %+v is allowed, existing pods anti-affinity terms satisfied.",
podName(pod), node.Name)
}
return nil, nil
}
// anyPodsMatchingTopologyTerms checks whether any of the nodes given via
// "targetPods" matches topology of all the "terms" for the give "pod" and "nodeInfo".
func (c *PodAffinityChecker) anyPodsMatchingTopologyTerms(pod *v1.Pod, targetPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, error) {
for nodeName, targetPods := range targetPods {
targetPodNodeInfo, err := c.info.GetNodeInfo(nodeName)
if err != nil {
return false, err
}
if len(targetPods) > 0 {
allTermsMatched := true
for _, term := range terms {
if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNodeInfo, term.TopologyKey) {
allTermsMatched = false
break
}
// nodeMatchesAllTopologyTerms checks whether "nodeInfo" matches
// topology of all the "terms" for the given "pod".
func (c *PodAffinityChecker) nodeMatchesAllTopologyTerms(pod *v1.Pod, topologyPairs *topologyPairsMaps, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) bool {
node := nodeInfo.Node()
for _, term := range terms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
if _, ok := topologyPairs.topologyPairToPods[pair]; !ok {
return false
}
if allTermsMatched {
// We have 1 or more pods on the target node that have already matched namespace and selector
// and all of the terms topologies matched the target node. So, there is at least 1 matching pod on the node.
return true, nil
} else {
return false
}
}
return true
}
// nodeMatchesAnyTopologyTerm checks whether "nodeInfo" matches
// topology of any "term" for the given "pod".
func (c *PodAffinityChecker) nodeMatchesAnyTopologyTerm(pod *v1.Pod, topologyPairs *topologyPairsMaps, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) bool {
node := nodeInfo.Node()
for _, term := range terms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
if _, ok := topologyPairs.topologyPairToPods[pair]; ok {
return true
}
}
}
return false, nil
return false
}
// Checks if scheduling the pod onto this node would break any rules of this pod.
// Checks if scheduling the pod onto this node would break any term of this pod.
func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo,
affinity *v1.Affinity) (algorithm.PredicateFailureReason, error) {
@ -1403,21 +1373,16 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
}
if predicateMeta, ok := meta.(*predicateMetadata); ok {
// Check all affinity terms.
matchingPods := predicateMeta.nodeNameToMatchingAffinityPods
topologyPairsPotentialAffinityPods := predicateMeta.topologyPairsPotentialAffinityPods
if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 {
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, affinityTerms)
if err != nil {
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
glog.Errorf(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
matchExists := c.nodeMatchesAllTopologyTerms(pod, topologyPairsPotentialAffinityPods, nodeInfo, affinityTerms)
if !matchExists {
// This pod may the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches
// its own terms, then we allow the pod to pass the affinity check.
if !(len(matchingPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
if !(len(topologyPairsPotentialAffinityPods.topologyPairToPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) {
klog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
@ -1425,16 +1390,16 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
}
// Check all anti-affinity terms.
matchingPods = predicateMeta.nodeNameToMatchingAntiAffinityPods
topologyPairsPotentialAntiAffinityPods := predicateMeta.topologyPairsPotentialAntiAffinityPods
if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 {
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, antiAffinityTerms)
if err != nil || matchExists {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity, err: %v",
podName(pod), node.Name, err)
matchExists := c.nodeMatchesAnyTopologyTerm(pod, topologyPairsPotentialAntiAffinityPods, nodeInfo, antiAffinityTerms)
if matchExists {
klog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity",
podName(pod), node.Name)
return ErrPodAntiAffinityRulesNotMatch, nil
}
}
} else { // We don't have precomputed metadata. We have to follow a slow path to check affinity rules.
} else { // We don't have precomputed metadata. We have to follow a slow path to check affinity terms.
filteredPods, err := c.podLister.FilteredList(nodeInfo.Filter, labels.Everything())
if err != nil {
return ErrPodAffinityRulesNotMatch, err
@ -1449,7 +1414,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
affTermsMatch, termsSelectorMatch, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, affinityTerms)
if err != nil {
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
glog.Error(errMessage)
klog.Error(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
if termsSelectorMatch {
@ -1464,7 +1429,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
if len(antiAffinityTerms) > 0 {
antiAffTermsMatch, _, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, antiAffinityTerms)
if err != nil || antiAffTermsMatch {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm, err: %v",
klog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm, err: %v",
podName(pod), node.Name, err)
return ErrPodAntiAffinityRulesNotMatch, nil
}
@ -1472,29 +1437,29 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
}
if !matchFound && len(affinityTerms) > 0 {
// We have not been able to find any matches for the pod's affinity rules.
// We have not been able to find any matches for the pod's affinity terms.
// This pod may be the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches
// its own terms, then we allow the pod to pass the affinity check.
if termsSelectorMatchFound {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
klog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
// Check if pod matches its own affinity properties (namespace and label selector).
if !targetPodMatchesAffinityOfPod(pod, pod) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
klog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
}
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
if klog.V(10) {
// We explicitly don't do klog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("Schedule Pod %+v on Node %+v is allowed, pod affinity/anti-affinity constraints satisfied.",
klog.Infof("Schedule Pod %+v on Node %+v is allowed, pod affinity/anti-affinity constraints satisfied.",
podName(pod), node.Name)
}
return nil, nil
@ -1506,7 +1471,14 @@ func CheckNodeUnschedulablePredicate(pod *v1.Pod, meta algorithm.PredicateMetada
return false, []algorithm.PredicateFailureReason{ErrNodeUnknownCondition}, nil
}
if nodeInfo.Node().Spec.Unschedulable {
// If pod tolerate unschedulable taint, it's also tolerate `node.Spec.Unschedulable`.
podToleratesUnschedulable := v1helper.TolerationsTolerateTaint(pod.Spec.Tolerations, &v1.Taint{
Key: schedulerapi.TaintNodeUnschedulable,
Effect: v1.TaintEffectNoSchedule,
})
// TODO (k82cn): deprecates `node.Spec.Unschedulable` in 1.13.
if nodeInfo.Node().Spec.Unschedulable && !podToleratesUnschedulable {
return false, []algorithm.PredicateFailureReason{ErrNodeUnschedulable}, nil
}
@ -1662,12 +1634,12 @@ func (c *VolumeBindingChecker) predicate(pod *v1.Pod, meta algorithm.PredicateMe
failReasons := []algorithm.PredicateFailureReason{}
if !boundSatisfied {
glog.V(5).Infof("Bound PVs not satisfied for pod %v/%v, node %q", pod.Namespace, pod.Name, node.Name)
klog.V(5).Infof("Bound PVs not satisfied for pod %v/%v, node %q", pod.Namespace, pod.Name, node.Name)
failReasons = append(failReasons, ErrVolumeNodeConflict)
}
if !unboundSatisfied {
glog.V(5).Infof("Couldn't find matching PVs for pod %v/%v, node %q", pod.Namespace, pod.Name, node.Name)
klog.V(5).Infof("Couldn't find matching PVs for pod %v/%v, node %q", pod.Namespace, pod.Name, node.Name)
failReasons = append(failReasons, ErrVolumeBindConflict)
}
@ -1676,6 +1648,6 @@ func (c *VolumeBindingChecker) predicate(pod *v1.Pod, meta algorithm.PredicateMe
}
// All volumes bound or matching PVs found for all unbound PVCs
glog.V(5).Infof("All PVCs found matches for pod %v/%v, node %q", pod.Namespace, pod.Name, node.Name)
klog.V(5).Infof("All PVCs found matches for pod %v/%v, node %q", pod.Namespace, pod.Name, node.Name)
return true, nil, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,7 @@ package predicates
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// FindLabelsInSet gets as many key/value pairs as possible out of a label set.
@ -68,7 +68,7 @@ func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
// portsConflict check whether existingPorts and wantPorts conflict with each other
// return true if we have a conflict
func portsConflict(existingPorts schedutil.HostPortInfo, wantPorts []*v1.ContainerPort) bool {
func portsConflict(existingPorts schedulercache.HostPortInfo, wantPorts []*v1.ContainerPort) bool {
for _, cp := range wantPorts {
if existingPorts.CheckConflict(cp.HostIP, string(cp.Protocol), cp.HostPort) {
return true

View File

@ -37,14 +37,14 @@ go_library(
"//pkg/scheduler/cache:go_default_library",
"//pkg/util/node:go_default_library",
"//pkg/util/parsers:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
@ -55,6 +55,7 @@ go_test(
"image_locality_test.go",
"interpod_affinity_test.go",
"least_requested_test.go",
"main_test.go",
"metadata_test.go",
"most_requested_test.go",
"node_affinity_test.go",
@ -74,13 +75,13 @@ go_test(
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/util/parsers:go_default_library",
"//staging/src/k8s.io/api/apps/v1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature/testing:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

View File

@ -32,7 +32,7 @@ var (
// BalancedResourceAllocationMap should **NOT** be used alone, and **MUST** be used together
// with LeastRequestedPriority. It calculates the difference between the cpu and memory fraction
// of capacity, and prioritizes the host based on how close the two metrics are to each other.
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
// Detail: score = 10 - variance(cpuFraction,memoryFraction,volumeFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced
// Resource Utilization"
BalancedResourceAllocationMap = balancedResourcePriority.PriorityMap

View File

@ -17,7 +17,6 @@ limitations under the License.
package priorities
import (
"fmt"
"reflect"
"testing"
@ -25,6 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
"k8s.io/kubernetes/pkg/features"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
@ -44,7 +44,7 @@ func getExistingVolumeCountForNode(pods []*v1.Pod, maxVolumes int) int {
func TestBalancedResourceAllocation(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.BalanceAttachedNodeVolumes, true)()
podwithVol1 := v1.PodSpec{
Containers: []v1.Container{
{

View File

@ -26,11 +26,12 @@ import (
"k8s.io/kubernetes/pkg/util/parsers"
)
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
// The two thresholds are used as bounds for the image score range. They correspond to a reasonable size range for
// container images compressed and stored in registries; 90%ile of images on dockerhub drops into this range.
const (
mb int64 = 1024 * 1024
minImgSize int64 = 23 * mb
maxImgSize int64 = 1000 * mb
mb int64 = 1024 * 1024
minThreshold int64 = 23 * mb
maxThreshold int64 = 1000 * mb
)
// ImageLocalityPriorityMap is a priority function that favors nodes that already have requested pod container's images.
@ -44,44 +45,55 @@ func ImageLocalityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *scheduler
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
sumSize := totalImageSize(nodeInfo, pod.Spec.Containers)
var score int
if priorityMeta, ok := meta.(*priorityMetadata); ok {
score = calculatePriority(sumImageScores(nodeInfo, pod.Spec.Containers, priorityMeta.totalNumNodes))
} else {
// if we are not able to parse priority meta data, skip this priority
score = 0
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: calculateScoreFromSize(sumSize),
Score: score,
}, nil
}
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
// 1. Split image size range into 10 buckets.
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
func calculateScoreFromSize(sumSize int64) int {
switch {
case sumSize == 0 || sumSize < minImgSize:
// 0 means none of the images required by this pod are present on this
// node or the total size of the images present is too small to be taken into further consideration.
return 0
case sumSize >= maxImgSize:
// If existing images' total size is larger than max, just make it highest priority.
return schedulerapi.MaxPriority
// calculatePriority returns the priority of a node. Given the sumScores of requested images on the node, the node's
// priority is obtained by scaling the maximum priority value with a ratio proportional to the sumScores.
func calculatePriority(sumScores int64) int {
if sumScores < minThreshold {
sumScores = minThreshold
} else if sumScores > maxThreshold {
sumScores = maxThreshold
}
return int((int64(schedulerapi.MaxPriority) * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
return int(int64(schedulerapi.MaxPriority) * (sumScores - minThreshold) / (maxThreshold - minThreshold))
}
// totalImageSize returns the total image size of all the containers that are already on the node.
func totalImageSize(nodeInfo *schedulercache.NodeInfo, containers []v1.Container) int64 {
var total int64
// sumImageScores returns the sum of image scores of all the containers that are already on the node.
// Each image receives a raw score of its size, scaled by scaledImageScore. The raw scores are later used to calculate
// the final score. Note that the init containers are not considered for it's rare for users to deploy huge init containers.
func sumImageScores(nodeInfo *schedulercache.NodeInfo, containers []v1.Container, totalNumNodes int) int64 {
var sum int64
imageStates := nodeInfo.ImageStates()
imageSizes := nodeInfo.ImageSizes()
for _, container := range containers {
if size, ok := imageSizes[normalizedImageName(container.Image)]; ok {
total += size
if state, ok := imageStates[normalizedImageName(container.Image)]; ok {
sum += scaledImageScore(state, totalNumNodes)
}
}
return total
return sum
}
// scaledImageScore returns an adaptively scaled score for the given state of an image.
// The size of the image is used as the base score, scaled by a factor which considers how much nodes the image has "spread" to.
// This heuristic aims to mitigate the undesirable "node heating problem", i.e., pods get assigned to the same or
// a few nodes due to image locality.
func scaledImageScore(imageState *schedulercache.ImageStateSummary, totalNumNodes int) int64 {
spread := float64(imageState.NumNodes) / float64(totalNumNodes)
return int64(float64(imageState.Size) * spread)
}
// normalizedImageName returns the CRI compliant name for a given image.

View File

@ -42,13 +42,13 @@ func TestImageLocalityPriority(t *testing.T) {
},
}
test40140 := v1.PodSpec{
test40300 := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/140",
Image: "gcr.io/300",
},
},
}
@ -64,7 +64,7 @@ func TestImageLocalityPriority(t *testing.T) {
},
}
node401402000 := v1.NodeStatus{
node403002000 := v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
@ -76,10 +76,10 @@ func TestImageLocalityPriority(t *testing.T) {
},
{
Names: []string{
"gcr.io/140:" + parsers.DefaultImageTag,
"gcr.io/140:v1",
"gcr.io/300:" + parsers.DefaultImageTag,
"gcr.io/300:v1",
},
SizeBytes: int64(140 * mb),
SizeBytes: int64(300 * mb),
},
{
Names: []string{
@ -120,29 +120,29 @@ func TestImageLocalityPriority(t *testing.T) {
// Node1
// Image: gcr.io/40:latest 40MB
// Score: (40M-23M)/97.7M + 1 = 1
// Score: 0 (40M/2 < 23M, min-threshold)
// Node2
// Image: gcr.io/250:latest 250MB
// Score: (250M-23M)/97.7M + 1 = 3
// Score: 10 * (250M/2 - 23M)/(1000M - 23M) = 1
pod: &v1.Pod{Spec: test40250},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
nodes: []*v1.Node{makeImageNode("machine1", node403002000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 1}},
name: "two images spread on two nodes, prefer the larger image one",
},
{
// Pod: gcr.io/40 gcr.io/140
// Pod: gcr.io/40 gcr.io/300
// Node1
// Image: gcr.io/40:latest 40MB, gcr.io/140:latest 140MB
// Score: (40M+140M-23M)/97.7M + 1 = 2
// Image: gcr.io/40:latest 40MB, gcr.io/300:latest 300MB
// Score: 10 * ((40M + 300M)/2 - 23M)/(1000M - 23M) = 1
// Node2
// Image: not present
// Score: 0
pod: &v1.Pod{Spec: test40140},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
pod: &v1.Pod{Spec: test40300},
nodes: []*v1.Node{makeImageNode("machine1", node403002000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 0}},
name: "two images on one node, prefer this node",
},
{
@ -150,13 +150,13 @@ func TestImageLocalityPriority(t *testing.T) {
// Node1
// Image: gcr.io/2000:latest 2000MB
// Score: 2000 > max score = 10
// Score: 10 (2000M/2 >= 1000M, max-threshold)
// Node2
// Image: gcr.io/10:latest 10MB
// Score: 10 < min score = 0
// Score: 0 (10M/2 < 23M, min-threshold)
pod: &v1.Pod{Spec: testMinMax},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
nodes: []*v1.Node{makeImageNode("machine1", node403002000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
name: "if exceed limit, use limit",
},
@ -165,7 +165,7 @@ func TestImageLocalityPriority(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil, &priorityMetadata{totalNumNodes: len(test.nodes)})(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package priorities
import (
"context"
"sync"
"k8s.io/api/core/v1"
@ -29,7 +30,7 @@ import (
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"github.com/golang/glog"
"k8s.io/klog"
)
// InterPodAffinity contains information to calculate inter pod affinity.
@ -136,7 +137,7 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *v1.Pod, node
existingPodNode, err := ipa.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
if apierrors.IsNotFound(err) {
glog.Errorf("Node not found, %v", existingPod.Spec.NodeName)
klog.Errorf("Node not found, %v", existingPod.Spec.NodeName)
return nil
}
return err
@ -210,7 +211,7 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *v1.Pod, node
}
}
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
workqueue.ParallelizeUntil(context.TODO(), 16, len(allNodeNames), processNode)
if pm.firstError != nil {
return nil, pm.firstError
}
@ -232,8 +233,8 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *v1.Pod, node
fScore = float64(schedulerapi.MaxPriority) * ((pm.counts[node.Name] - minCount) / (maxCount - minCount))
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
if glog.V(10) {
glog.Infof("%v -> %v: InterPodAffinityPriority, Score: (%d)", pod.Name, node.Name, int(fScore))
if klog.V(10) {
klog.Infof("%v -> %v: InterPodAffinityPriority, Score: (%d)", pod.Name, node.Name, int(fScore))
}
}
return result, nil

View File

@ -29,7 +29,7 @@ var (
// prioritizes based on the minimum of the average of the fraction of requested to capacity.
//
// Details:
// cpu((capacity-sum(requested))*10/capacity) + memory((capacity-sum(requested))*10/capacity)/2
// (cpu((capacity-sum(requested))*10/capacity) + memory((capacity-sum(requested))*10/capacity))/2
LeastRequestedPriorityMap = leastResourcePriority.PriorityMap
)

View File

@ -0,0 +1,29 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"testing"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
_ "k8s.io/kubernetes/pkg/features"
)
func TestMain(m *testing.M) {
utilfeaturetesting.VerifyFeatureGatesUnchanged(utilfeature.DefaultFeatureGate, m.Run)
}

View File

@ -21,7 +21,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
@ -52,6 +51,7 @@ type priorityMetadata struct {
podSelectors []labels.Selector
controllerRef *metav1.OwnerReference
podFirstServiceSelector labels.Selector
totalNumNodes int
}
// PriorityMetadata is a PriorityMetadataProducer. Node info can be nil.
@ -65,8 +65,9 @@ func (pmf *PriorityMetadataFactory) PriorityMetadata(pod *v1.Pod, nodeNameToInfo
podTolerations: getAllTolerationPreferNoSchedule(pod.Spec.Tolerations),
affinity: pod.Spec.Affinity,
podSelectors: getSelectors(pod, pmf.serviceLister, pmf.controllerLister, pmf.replicaSetLister, pmf.statefulSetLister),
controllerRef: priorityutil.GetControllerRef(pod),
controllerRef: metav1.GetControllerOf(pod),
podFirstServiceSelector: getFirstServiceSelector(pod, pmf.serviceLister),
totalNumNodes: len(nodeNameToInfo),
}
}

View File

@ -20,9 +20,8 @@ import (
"reflect"
"testing"
apps "k8s.io/api/apps/v1beta1"
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
@ -153,14 +152,14 @@ func TestPriorityMetadata(t *testing.T) {
name: "Produce a priorityMetadata with specified requests",
},
}
mataDataProducer := NewPriorityMetadataFactory(
metaDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister([]*v1.Service{}),
schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
schedulertesting.FakeReplicaSetLister([]*apps.ReplicaSet{}),
schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ptData := mataDataProducer(test.pod, nil)
ptData := metaDataProducer(test.pod, nil)
if !reflect.DeepEqual(test.expected, ptData) {
t.Errorf("expected %#v, got %#v", test.expected, ptData)
}

View File

@ -39,10 +39,10 @@ func mostResourceScorer(requested, allocable *schedulercache.Resource, includeVo
// The used capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more resources are used the higher the score is. This function
// is almost a reversed version of least_requested_priority.calculatUnusedScore
// is almost a reversed version of least_requested_priority.calculateUnusedScore
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
// keep the final formula clean and not to modify the widely used (by users
// in their default scheduling policies) calculateUSedScore.
// in their default scheduling policies) calculateUsedScore.
func mostRequestedScore(requested, capacity int64) int64 {
if capacity == 0 {
return 0

View File

@ -22,7 +22,6 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
@ -39,7 +38,7 @@ func CalculateNodePreferAvoidPodsPriorityMap(pod *v1.Pod, meta interface{}, node
controllerRef = priorityMeta.controllerRef
} else {
// We couldn't parse metadata - fallback to the podspec.
controllerRef = priorityutil.GetControllerRef(pod)
controllerRef = metav1.GetControllerOf(pod)
}
if controllerRef != nil {

View File

@ -209,19 +209,19 @@ func TestRequestedToCapacityRatio(t *testing.T) {
for _, test := range tests {
nodeNames := make([]string, 0)
var nodeNames []string
for nodeName := range test.nodes {
nodeNames = append(nodeNames, nodeName)
}
sort.Strings(nodeNames)
nodes := make([]*v1.Node, 0)
var nodes []*v1.Node
for _, nodeName := range nodeNames {
node := test.nodes[nodeName]
nodes = append(nodes, makeNode(nodeName, node.capacity.cpu, node.capacity.mem))
}
scheduledPods := make([]*v1.Pod, 0)
var scheduledPods []*v1.Pod
for name, node := range test.nodes {
scheduledPods = append(scheduledPods,
buildResourcesPod(name, node.used))

View File

@ -19,8 +19,10 @@ package priorities
import (
"fmt"
"github.com/golang/glog"
"k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/features"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
@ -56,20 +58,31 @@ func (r *ResourceAllocationPriority) PriorityMap(
requested.Memory += nodeInfo.NonZeroRequest().Memory
var score int64
// Check if the pod has volumes and this could be added to scorer function for balanced resource allocation.
if len(pod.Spec.Volumes) >= 0 && nodeInfo.TransientInfo != nil {
if len(pod.Spec.Volumes) >= 0 && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && nodeInfo.TransientInfo != nil {
score = r.scorer(&requested, &allocatable, true, nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount)
} else {
score = r.scorer(&requested, &allocatable, false, 0, 0)
}
if glog.V(10) {
glog.Infof(
"%v -> %v: %v, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
pod.Name, node.Name, r.Name,
allocatable.MilliCPU, allocatable.Memory,
requested.MilliCPU+allocatable.MilliCPU, requested.Memory+allocatable.Memory,
score,
)
if klog.V(10) {
if len(pod.Spec.Volumes) >= 0 && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && nodeInfo.TransientInfo != nil {
klog.Infof(
"%v -> %v: %v, capacity %d millicores %d memory bytes, %d volumes, total request %d millicores %d memory bytes %d volumes, score %d",
pod.Name, node.Name, r.Name,
allocatable.MilliCPU, allocatable.Memory, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount,
requested.MilliCPU, requested.Memory,
nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes,
score,
)
} else {
klog.Infof(
"%v -> %v: %v, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
pod.Name, node.Name, r.Name,
allocatable.MilliCPU, allocatable.Memory,
requested.MilliCPU, requested.Memory,
score,
)
}
}
return schedulerapi.HostPriority{

View File

@ -23,7 +23,7 @@ import (
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"github.com/golang/glog"
"k8s.io/klog"
)
// ResourceLimitsPriorityMap is a priority function that increases score of input node by 1 if the node satisfies
@ -52,10 +52,10 @@ func ResourceLimitsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedule
score = 1
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
if klog.V(10) {
// We explicitly don't do klog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof(
klog.Infof(
"%v -> %v: Resource Limits Priority, allocatable %d millicores %d memory bytes, pod limits %d millicores %d memory bytes, score %d",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
@ -70,7 +70,7 @@ func ResourceLimitsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedule
}, nil
}
// computeScore return 1 if limit value is less than or equal to allocable
// computeScore returns 1 if limit value is less than or equal to allocatable
// value, otherwise it returns 0.
func computeScore(limit, allocatable int64) int64 {
if limit != 0 && allocatable != 0 && limit <= allocatable {

View File

@ -26,7 +26,7 @@ import (
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
utilnode "k8s.io/kubernetes/pkg/util/node"
"github.com/golang/glog"
"k8s.io/klog"
)
// When zone information is present, give 2/3 of the weighting to zone spreading, 1/3 to node spreading
@ -94,19 +94,15 @@ func (s *SelectorSpread) CalculateSpreadPriorityMap(pod *v1.Pod, meta interface{
// Ignore the previous deleted version for spreading purposes
// (it can still be considered for resource restrictions etc.)
if nodePod.DeletionTimestamp != nil {
glog.V(4).Infof("skipping pending-deleted pod: %s/%s", nodePod.Namespace, nodePod.Name)
klog.V(4).Infof("skipping pending-deleted pod: %s/%s", nodePod.Namespace, nodePod.Name)
continue
}
matches := false
for _, selector := range selectors {
if selector.Matches(labels.Set(nodePod.ObjectMeta.Labels)) {
matches = true
count++
break
}
}
if matches {
count++
}
}
return schedulerapi.HostPriority{
Host: node.Name,
@ -164,8 +160,8 @@ func (s *SelectorSpread) CalculateSpreadPriorityReduce(pod *v1.Pod, meta interfa
}
}
result[i].Score = int(fScore)
if glog.V(10) {
glog.Infof(
if klog.V(10) {
klog.Infof(
"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, result[i].Host, int(fScore),
)
}

View File

@ -21,9 +21,8 @@ import (
"sort"
"testing"
apps "k8s.io/api/apps/v1beta1"
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
@ -60,7 +59,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
pods []*v1.Pod
nodes []string
rcs []*v1.ReplicationController
rss []*extensions.ReplicaSet
rss []*apps.ReplicaSet
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
@ -200,7 +199,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
rss: []*apps.ReplicaSet{{Spec: apps.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
name: "service with partial pod label matches with service and replica set",
@ -241,7 +240,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
rss: []*apps.ReplicaSet{{Spec: apps.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
name: "disjoined service and replica set should be treated equally",
@ -280,7 +279,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
rss: []*apps.ReplicaSet{{Spec: apps.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
name: "Replica set with partial pod label matches",
@ -318,7 +317,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
rss: []*apps.ReplicaSet{{Spec: apps.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
name: "Another replication set with partial pod label matches",
@ -348,14 +347,14 @@ func TestSelectorSpreadPriority(t *testing.T) {
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
mataDataProducer := NewPriorityMetadataFactory(
metaDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
metaData := metaDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, metaData)
list, err := ttp(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v \n", err)
@ -410,7 +409,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
pod *v1.Pod
pods []*v1.Pod
rcs []*v1.ReplicationController
rss []*extensions.ReplicaSet
rss []*apps.ReplicaSet
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
@ -584,13 +583,13 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
mataDataProducer := NewPriorityMetadataFactory(
metaDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
metaData := metaDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, metaData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
if err != nil {
t.Errorf("unexpected error: %v", err)
@ -761,23 +760,23 @@ func TestZoneSpreadPriority(t *testing.T) {
},
}
// these local variables just make sure controllerLister\replicaSetLister\statefulSetLister not nil
// when construct mataDataProducer
// when construct metaDataProducer
sss := []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}}
rcs := []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}}
rss := []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}}
rss := []*apps.ReplicaSet{{Spec: apps.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(test.nodes))
zoneSpread := ServiceAntiAffinity{podLister: schedulertesting.FakePodLister(test.pods), serviceLister: schedulertesting.FakeServiceLister(test.services), label: "zone"}
mataDataProducer := NewPriorityMetadataFactory(
metaDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(rcs),
schedulertesting.FakeReplicaSetLister(rss),
schedulertesting.FakeStatefulSetLister(sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(zoneSpread.CalculateAntiAffinityPriorityMap, zoneSpread.CalculateAntiAffinityPriorityReduce, mataData)
metaData := metaDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(zoneSpread.CalculateAntiAffinityPriorityMap, zoneSpread.CalculateAntiAffinityPriorityReduce, metaData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v", err)

View File

@ -41,18 +41,18 @@ func makeNode(node string, milliCPU, memory int64) *v1.Node {
}
}
func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction, mataData interface{}) algorithm.PriorityFunction {
func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction, metaData interface{}) algorithm.PriorityFunction {
return func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
for i := range nodes {
hostResult, err := mapFn(pod, mataData, nodeNameToInfo[nodes[i].Name])
hostResult, err := mapFn(pod, metaData, nodeNameToInfo[nodes[i].Name])
if err != nil {
return nil, err
}
result = append(result, hostResult)
}
if reduceFn != nil {
if err := reduceFn(pod, mataData, nodeNameToInfo, result); err != nil {
if err := reduceFn(pod, metaData, nodeNameToInfo, result); err != nil {
return nil, err
}
}

View File

@ -11,17 +11,16 @@ go_test(
srcs = [
"non_zero_test.go",
"topologies_test.go",
"util_test.go",
],
embed = [":go_default_library"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/selection:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/selection:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
)
@ -30,14 +29,12 @@ go_library(
srcs = [
"non_zero.go",
"topologies.go",
"util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util",
deps = [
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
)

View File

@ -26,7 +26,7 @@ import (
)
func TestGetNonzeroRequests(t *testing.T) {
tds := []struct {
tests := []struct {
name string
requests v1.ResourceList
expectedCPU int64
@ -65,9 +65,11 @@ func TestGetNonzeroRequests(t *testing.T) {
},
}
for _, td := range tds {
realCPU, realMemory := GetNonzeroRequests(&td.requests)
assert.EqualValuesf(t, td.expectedCPU, realCPU, "Failed to test: %s", td.name)
assert.EqualValuesf(t, td.expectedMemory, realMemory, "Failed to test: %s", td.name)
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
realCPU, realMemory := GetNonzeroRequests(&test.requests)
assert.EqualValuesf(t, test.expectedCPU, realCPU, "Failed to test: %s", test.name)
assert.EqualValuesf(t, test.expectedMemory, realMemory, "Failed to test: %s", test.name)
})
}
}

View File

@ -59,8 +59,10 @@ func TestGetNamespacesFromPodAffinityTerm(t *testing.T) {
}
for _, test := range tests {
realValue := GetNamespacesFromPodAffinityTerm(fakePod(), test.podAffinityTerm)
assert.EqualValuesf(t, test.expectedValue, realValue, "Failed to test: %s", test.name)
t.Run(test.name, func(t *testing.T) {
realValue := GetNamespacesFromPodAffinityTerm(fakePod(), test.podAffinityTerm)
assert.EqualValuesf(t, test.expectedValue, realValue, "Failed to test: %s", test.name)
})
}
}
@ -96,12 +98,14 @@ func TestPodMatchesTermsNamespaceAndSelector(t *testing.T) {
}
for _, test := range tests {
fakeTestPod := fakePod()
fakeTestPod.Namespace = test.podNamespaces
fakeTestPod.Labels = test.podLabels
t.Run(test.name, func(t *testing.T) {
fakeTestPod := fakePod()
fakeTestPod.Namespace = test.podNamespaces
fakeTestPod.Labels = test.podLabels
realValue := PodMatchesTermsNamespaceAndSelector(fakeTestPod, fakeNamespaces, fakeSelector)
assert.EqualValuesf(t, test.expectedResult, realValue, "Faild to test: %s", test.name)
realValue := PodMatchesTermsNamespaceAndSelector(fakeTestPod, fakeNamespaces, fakeSelector)
assert.EqualValuesf(t, test.expectedResult, realValue, "Faild to test: %s", test.name)
})
}
}
@ -248,7 +252,9 @@ func TestNodesHaveSameTopologyKey(t *testing.T) {
}
for _, test := range tests {
got := NodesHaveSameTopologyKey(test.nodeA, test.nodeB, test.topologyKey)
assert.Equalf(t, test.expected, got, "Failed to test: %s", test.name)
t.Run(test.name, func(t *testing.T) {
got := NodesHaveSameTopologyKey(test.nodeA, test.nodeB, test.topologyKey)
assert.Equalf(t, test.expected, got, "Failed to test: %s", test.name)
})
}
}

View File

@ -1,119 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func TestGetControllerRef(t *testing.T) {
fakeBlockOwnerDeletion := true
fakeFalseController := false
fakeTrueController := true
fakeEmptyOwnerReference := metav1.OwnerReference{}
tds := []struct {
name string
pod v1.Pod
expectedNil bool
expectedOR metav1.OwnerReference
}{
{
"ownerreference_not_exist",
v1.Pod{},
true,
fakeEmptyOwnerReference,
},
{
"ownerreference_controller_is_nil",
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
},
},
},
},
true,
fakeEmptyOwnerReference,
},
{
"ownerreference_controller_is_false",
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
Controller: &fakeFalseController,
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
},
},
},
},
true,
fakeEmptyOwnerReference,
},
{
"ownerreference_controller_is_true",
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
Controller: &fakeTrueController,
},
},
},
},
false,
metav1.OwnerReference{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
Controller: &fakeTrueController,
},
},
}
for _, td := range tds {
realOR := GetControllerRef(&td.pod)
if td.expectedNil {
assert.Nilf(t, realOR, "Failed to test: %s", td.name)
} else {
assert.Equalf(t, &td.expectedOR, realOR, "Failed to test: %s", td.name)
}
}
}

View File

@ -26,6 +26,9 @@ import (
// decisions made by Kubernetes. This is typically needed for resources not directly
// managed by Kubernetes.
type SchedulerExtender interface {
// Name returns a unique name that identifies the extender.
Name() string
// Filter based on extender-implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.

View File

@ -1,60 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"testing"
"k8s.io/api/core/v1"
)
// Some functions used by multiple scheduler tests.
type schedulerTester struct {
t *testing.T
scheduler ScheduleAlgorithm
nodeLister NodeLister
}
// Call if you know exactly where pod should get scheduled.
func (st *schedulerTester) expectSchedule(pod *v1.Pod, expected string) {
actual, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
if actual != expected {
st.t.Errorf("Unexpected scheduling value: %v, expected %v", actual, expected)
}
}
// Call if you can't predict where pod will be scheduled.
func (st *schedulerTester) expectSuccess(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
}
// Call if pod should *not* schedule.
func (st *schedulerTester) expectFailure(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err == nil {
st.t.Error("Unexpected non-error")
}
}

View File

@ -17,18 +17,19 @@ limitations under the License.
package algorithm
import (
apps "k8s.io/api/apps/v1beta1"
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
policyv1beta1 "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/labels"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulerinternalcache "k8s.io/kubernetes/pkg/scheduler/internal/cache"
)
// NodeFieldSelectorKeys is a map that: the key are node field selector keys; the values are
// the functions to get the value of the node field.
var NodeFieldSelectorKeys = map[string]func(*v1.Node) string{
NodeFieldSelectorKeyNodeName: func(n *v1.Node) string { return n.Name },
schedulerapi.NodeFieldSelectorKeyNodeName: func(n *v1.Node) string { return n.Name },
}
// FitPredicate is a function that indicates if a pod fits into an existing node.
@ -98,7 +99,7 @@ type PodLister interface {
List(labels.Selector) ([]*v1.Pod, error)
// This is similar to "List()", but the returned slice does not
// contain pods that don't pass `podFilter`.
FilteredList(podFilter schedulercache.PodFilter, selector labels.Selector) ([]*v1.Pod, error)
FilteredList(podFilter schedulerinternalcache.PodFilter, selector labels.Selector) ([]*v1.Pod, error)
}
// ServiceLister interface represents anything that can produce a list of services; the list is consumed by a scheduler.
@ -120,7 +121,13 @@ type ControllerLister interface {
// ReplicaSetLister interface represents anything that can produce a list of ReplicaSet; the list is consumed by a scheduler.
type ReplicaSetLister interface {
// Gets the replicasets for the given pod
GetPodReplicaSets(*v1.Pod) ([]*extensions.ReplicaSet, error)
GetPodReplicaSets(*v1.Pod) ([]*apps.ReplicaSet, error)
}
// PDBLister interface represents anything that can list PodDisruptionBudget objects.
type PDBLister interface {
// List() returns a list of PodDisruptionBudgets matching the selector.
List(labels.Selector) ([]*policyv1beta1.PodDisruptionBudget, error)
}
var _ ControllerLister = &EmptyControllerLister{}
@ -144,7 +151,7 @@ var _ ReplicaSetLister = &EmptyReplicaSetLister{}
type EmptyReplicaSetLister struct{}
// GetPodReplicaSets returns nil
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*extensions.ReplicaSet, err error) {
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*apps.ReplicaSet, err error) {
return nil, nil
}

View File

@ -1,85 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
api "k8s.io/kubernetes/pkg/apis/core"
)
const (
// TaintNodeNotReady will be added when node is not ready
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes ready.
TaintNodeNotReady = "node.kubernetes.io/not-ready"
// DeprecatedTaintNodeNotReady is the deprecated version of TaintNodeNotReady.
// It is deprecated since 1.9
DeprecatedTaintNodeNotReady = "node.alpha.kubernetes.io/notReady"
// TaintNodeUnreachable will be added when node becomes unreachable
// (corresponding to NodeReady status ConditionUnknown)
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes reachable (NodeReady status ConditionTrue).
TaintNodeUnreachable = "node.kubernetes.io/unreachable"
// DeprecatedTaintNodeUnreachable is the deprecated version of TaintNodeUnreachable.
// It is deprecated since 1.9
DeprecatedTaintNodeUnreachable = "node.alpha.kubernetes.io/unreachable"
// TaintNodeUnschedulable will be added when node becomes unschedulable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node becomes scheduable.
TaintNodeUnschedulable = "node.kubernetes.io/unschedulable"
// TaintNodeOutOfDisk will be added when node becomes out of disk
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodeOutOfDisk = "node.kubernetes.io/out-of-disk"
// TaintNodeMemoryPressure will be added when node has memory pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough memory.
TaintNodeMemoryPressure = "node.kubernetes.io/memory-pressure"
// TaintNodeDiskPressure will be added when node has disk pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodeDiskPressure = "node.kubernetes.io/disk-pressure"
// TaintNodeNetworkUnavailable will be added when node's network is unavailable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when network becomes ready.
TaintNodeNetworkUnavailable = "node.kubernetes.io/network-unavailable"
// TaintNodePIDPressure will be added when node has pid pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodePIDPressure = "node.kubernetes.io/pid-pressure"
// TaintExternalCloudProvider sets this taint on a node to mark it as unusable,
// when kubelet is started with the "external" cloud provider, until a controller
// from the cloud-controller-manager intitializes this node, and then removes
// the taint
TaintExternalCloudProvider = "node.cloudprovider.kubernetes.io/uninitialized"
// TaintNodeShutdown when node is shutdown in external cloud provider
TaintNodeShutdown = "node.cloudprovider.kubernetes.io/shutdown"
// NodeFieldSelectorKeyNodeName ('metadata.name') uses this as node field selector key
// when selecting node by node's name.
NodeFieldSelectorKeyNodeName = api.ObjectNameField
)