vendor updates

This commit is contained in:
Serguei Bezverkhi
2018-03-06 17:33:18 -05:00
parent 4b3ebc171b
commit e9033989a0
5854 changed files with 248382 additions and 119809 deletions

87
vendor/k8s.io/kubernetes/pkg/scheduler/BUILD generated vendored Normal file
View File

@ -0,0 +1,87 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = ["scheduler_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/controller/volume/persistentvolume:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"scheduler.go",
"testutil.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler",
deps = [
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/scheduler/algorithm:all-srcs",
"//pkg/scheduler/algorithmprovider:all-srcs",
"//pkg/scheduler/api:all-srcs",
"//pkg/scheduler/core:all-srcs",
"//pkg/scheduler/factory:all-srcs",
"//pkg/scheduler/metrics:all-srcs",
"//pkg/scheduler/schedulercache:all-srcs",
"//pkg/scheduler/testing:all-srcs",
"//pkg/scheduler/util:all-srcs",
"//pkg/scheduler/volumebinder:all-srcs",
],
tags = ["automanaged"],
)

4
vendor/k8s.io/kubernetes/pkg/scheduler/OWNERS generated vendored Normal file
View File

@ -0,0 +1,4 @@
approvers:
- sig-scheduling-maintainers
reviewers:
- sig-scheduling

57
vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/BUILD generated vendored Normal file
View File

@ -0,0 +1,57 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"scheduler_interface.go",
"types.go",
"well_known_labels.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm",
deps = [
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"scheduler_interface_test.go",
"types_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/schedulercache:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/scheduler/algorithm/predicates:all-srcs",
"//pkg/scheduler/algorithm/priorities:all-srcs",
],
tags = ["automanaged"],
)

View File

@ -0,0 +1,19 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package algorithm contains a generic Scheduler interface and several
// implementations.
package algorithm // import "k8s.io/kubernetes/pkg/scheduler/algorithm"

View File

@ -0,0 +1,80 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"error.go",
"metadata.go",
"predicates.go",
"testing_helper.go",
"utils.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates",
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/apis/core/v1/helper/qos:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//pkg/volume/util:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/storage/v1:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"metadata_test.go",
"predicates_test.go",
"utils_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,150 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
)
var (
// The predicateName tries to be consistent as the predicate name used in DefaultAlgorithmProvider defined in
// defaults.go (which tend to be stable for backward compatibility)
// NOTE: If you add a new predicate failure error for a predicate that can never
// be made to pass by removing pods, or you change an existing predicate so that
// it can never be made to pass by removing pods, you need to add the predicate
// failure error in nodesWherePreemptionMightHelp() in scheduler/core/generic_scheduler.go
// ErrDiskConflict is used for NoDiskConflict predicate error.
ErrDiskConflict = newPredicateFailureError("NoDiskConflict", "node(s) had no available disk")
// ErrVolumeZoneConflict is used for NoVolumeZoneConflict predicate error.
ErrVolumeZoneConflict = newPredicateFailureError("NoVolumeZoneConflict", "node(s) had no available volume zone")
// ErrNodeSelectorNotMatch is used for MatchNodeSelector predicate error.
ErrNodeSelectorNotMatch = newPredicateFailureError("MatchNodeSelector", "node(s) didn't match node selector")
// ErrPodAffinityNotMatch is used for MatchInterPodAffinity predicate error.
ErrPodAffinityNotMatch = newPredicateFailureError("MatchInterPodAffinity", "node(s) didn't match pod affinity/anti-affinity")
// ErrPodAffinityRulesNotMatch is used for PodAffinityRulesNotMatch predicate error.
ErrPodAffinityRulesNotMatch = newPredicateFailureError("PodAffinityRulesNotMatch", "node(s) didn't match pod affinity rules")
// ErrPodAntiAffinityRulesNotMatch is used for PodAntiAffinityRulesNotMatch predicate error.
ErrPodAntiAffinityRulesNotMatch = newPredicateFailureError("PodAntiAffinityRulesNotMatch", "node(s) didn't match pod anti-affinity rules")
// ErrExistingPodsAntiAffinityRulesNotMatch is used for ExistingPodsAntiAffinityRulesNotMatch predicate error.
ErrExistingPodsAntiAffinityRulesNotMatch = newPredicateFailureError("ExistingPodsAntiAffinityRulesNotMatch", "node(s) didn't satisfy existing pods anti-affinity rules")
// ErrTaintsTolerationsNotMatch is used for PodToleratesNodeTaints predicate error.
ErrTaintsTolerationsNotMatch = newPredicateFailureError("PodToleratesNodeTaints", "node(s) had taints that the pod didn't tolerate")
// ErrPodNotMatchHostName is used for HostName predicate error.
ErrPodNotMatchHostName = newPredicateFailureError("HostName", "node(s) didn't match the requested hostname")
// ErrPodNotFitsHostPorts is used for PodFitsHostPorts predicate error.
ErrPodNotFitsHostPorts = newPredicateFailureError("PodFitsHostPorts", "node(s) didn't have free ports for the requested pod ports")
// ErrNodeLabelPresenceViolated is used for CheckNodeLabelPresence predicate error.
ErrNodeLabelPresenceViolated = newPredicateFailureError("CheckNodeLabelPresence", "node(s) didn't have the requested labels")
// ErrServiceAffinityViolated is used for CheckServiceAffinity predicate error.
ErrServiceAffinityViolated = newPredicateFailureError("CheckServiceAffinity", "node(s) didn't match service affinity")
// ErrMaxVolumeCountExceeded is used for MaxVolumeCount predicate error.
ErrMaxVolumeCountExceeded = newPredicateFailureError("MaxVolumeCount", "node(s) exceed max volume count")
// ErrNodeUnderMemoryPressure is used for NodeUnderMemoryPressure predicate error.
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure", "node(s) had memory pressure")
// ErrNodeUnderDiskPressure is used for NodeUnderDiskPressure predicate error.
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure", "node(s) had disk pressure")
// ErrNodeOutOfDisk is used for NodeOutOfDisk predicate error.
ErrNodeOutOfDisk = newPredicateFailureError("NodeOutOfDisk", "node(s) were out of disk space")
// ErrNodeNotReady is used for NodeNotReady predicate error.
ErrNodeNotReady = newPredicateFailureError("NodeNotReady", "node(s) were not ready")
// ErrNodeNetworkUnavailable is used for NodeNetworkUnavailable predicate error.
ErrNodeNetworkUnavailable = newPredicateFailureError("NodeNetworkUnavailable", "node(s) had unavailable network")
// ErrNodeUnschedulable is used for NodeUnschedulable predicate error.
ErrNodeUnschedulable = newPredicateFailureError("NodeUnschedulable", "node(s) were unschedulable")
// ErrNodeUnknownCondition is used for NodeUnknownCondition predicate error.
ErrNodeUnknownCondition = newPredicateFailureError("NodeUnknownCondition", "node(s) had unknown conditions")
// ErrVolumeNodeConflict is used for VolumeNodeAffinityConflict predicate error.
ErrVolumeNodeConflict = newPredicateFailureError("VolumeNodeAffinityConflict", "node(s) had volume node affinity conflict")
// ErrVolumeBindConflict is used for VolumeBindingNoMatch predicate error.
ErrVolumeBindConflict = newPredicateFailureError("VolumeBindingNoMatch", "node(s) didn't find available persistent volumes to bind")
// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
// as ErrFakePredicate.
ErrFakePredicate = newPredicateFailureError("FakePredicateError", "Nodes failed the fake predicate")
)
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {
// resourceName is the name of the resource that is insufficient
ResourceName v1.ResourceName
requested int64
used int64
capacity int64
}
// NewInsufficientResourceError returns an InsufficientResourceError.
func NewInsufficientResourceError(resourceName v1.ResourceName, requested, used, capacity int64) *InsufficientResourceError {
return &InsufficientResourceError{
ResourceName: resourceName,
requested: requested,
used: used,
capacity: capacity,
}
}
func (e *InsufficientResourceError) Error() string {
return fmt.Sprintf("Node didn't have enough resource: %s, requested: %d, used: %d, capacity: %d",
e.ResourceName, e.requested, e.used, e.capacity)
}
// GetReason returns the reason of the InsufficientResourceError.
func (e *InsufficientResourceError) GetReason() string {
return fmt.Sprintf("Insufficient %v", e.ResourceName)
}
// GetInsufficientAmount returns the amount of the insufficient resource of the error.
func (e *InsufficientResourceError) GetInsufficientAmount() int64 {
return e.requested - (e.capacity - e.used)
}
// PredicateFailureError describes a failure error of predicate.
type PredicateFailureError struct {
PredicateName string
PredicateDesc string
}
func newPredicateFailureError(predicateName, predicateDesc string) *PredicateFailureError {
return &PredicateFailureError{PredicateName: predicateName, PredicateDesc: predicateDesc}
}
func (e *PredicateFailureError) Error() string {
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
}
// GetReason returns the reason of the PredicateFailureError.
func (e *PredicateFailureError) GetReason() string {
return e.PredicateDesc
}
// FailureReason describes a failure reason.
type FailureReason struct {
reason string
}
// NewFailureReason creates a FailureReason with message.
func NewFailureReason(msg string) *FailureReason {
return &FailureReason{reason: msg}
}
// GetReason returns the reason of the FailureReason.
func (e *FailureReason) GetReason() string {
return e.reason
}

View File

@ -0,0 +1,208 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"sync"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
"github.com/golang/glog"
)
// PredicateMetadataFactory defines a factory of predicate metadata.
type PredicateMetadataFactory struct {
podLister algorithm.PodLister
}
// Note that predicateMetadata and matchingPodAntiAffinityTerm need to be declared in the same file
// due to the way declarations are processed in predicate declaration unit tests.
type matchingPodAntiAffinityTerm struct {
term *v1.PodAffinityTerm
node *v1.Node
}
// NOTE: When new fields are added/removed or logic is changed, please make sure that
// RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes.
type predicateMetadata struct {
pod *v1.Pod
podBestEffort bool
podRequest *schedulercache.Resource
podPorts []*v1.ContainerPort
//key is a pod full name with the anti-affinity rules.
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service
// ignoredExtendedResources is a set of extended resource names that will
// be ignored in the PodFitsResources predicate.
//
// They can be scheduler extender managed resources, the consumption of
// which should be accounted only by the extenders. This set is synthesized
// from scheduler extender configuration and does not change per pod.
ignoredExtendedResources sets.String
}
// Ensure that predicateMetadata implements algorithm.PredicateMetadata.
var _ algorithm.PredicateMetadata = &predicateMetadata{}
// PredicateMetadataProducer function produces predicate metadata.
type PredicateMetadataProducer func(pm *predicateMetadata)
var predicateMetaProducerRegisterLock sync.Mutex
var predicateMetadataProducers = make(map[string]PredicateMetadataProducer)
// RegisterPredicateMetadataProducer registers a PredicateMetadataProducer.
func RegisterPredicateMetadataProducer(predicateName string, precomp PredicateMetadataProducer) {
predicateMetaProducerRegisterLock.Lock()
defer predicateMetaProducerRegisterLock.Unlock()
predicateMetadataProducers[predicateName] = precomp
}
// RegisterPredicateMetadataProducerWithExtendedResourceOptions registers a
// PredicateMetadataProducer that creates predicate metadata with the provided
// options for extended resources.
//
// See the comments in "predicateMetadata" for the explanation of the options.
func RegisterPredicateMetadataProducerWithExtendedResourceOptions(ignoredExtendedResources sets.String) {
RegisterPredicateMetadataProducer("PredicateWithExtendedResourceOptions", func(pm *predicateMetadata) {
pm.ignoredExtendedResources = ignoredExtendedResources
})
}
// NewPredicateMetadataFactory creates a PredicateMetadataFactory.
func NewPredicateMetadataFactory(podLister algorithm.PodLister) algorithm.PredicateMetadataProducer {
factory := &PredicateMetadataFactory{
podLister,
}
return factory.GetMetadata
}
// GetMetadata returns the predicateMetadata used which will be used by various predicates.
func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInfoMap map[string]*schedulercache.NodeInfo) algorithm.PredicateMetadata {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
matchingTerms, err := getMatchingAntiAffinityTerms(pod, nodeNameToInfoMap)
if err != nil {
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
matchingAntiAffinityTerms: matchingTerms,
}
for predicateName, precomputeFunc := range predicateMetadataProducers {
glog.V(10).Infof("Precompute: %v", predicateName)
precomputeFunc(predicateMetadata)
}
return predicateMetadata
}
// RemovePod changes predicateMetadata assuming that the given `deletedPod` is
// deleted from the system.
func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
if deletedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("deletedPod and meta.pod must not be the same")
}
// Delete any anti-affinity rule from the deletedPod.
delete(meta.matchingAntiAffinityTerms, deletedPodFullName)
// All pods in the serviceAffinityMatchingPodList are in the same namespace.
// So, if the namespace of the first one is not the same as the namespace of the
// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
if meta.serviceAffinityInUse &&
len(meta.serviceAffinityMatchingPodList) > 0 &&
deletedPod.Namespace == meta.serviceAffinityMatchingPodList[0].Namespace {
for i, pod := range meta.serviceAffinityMatchingPodList {
if schedutil.GetPodFullName(pod) == deletedPodFullName {
meta.serviceAffinityMatchingPodList = append(
meta.serviceAffinityMatchingPodList[:i],
meta.serviceAffinityMatchingPodList[i+1:]...)
break
}
}
}
return nil
}
// AddPod changes predicateMetadata assuming that `newPod` is added to the
// system.
func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache.NodeInfo) error {
addedPodFullName := schedutil.GetPodFullName(addedPod)
if addedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("addedPod and meta.pod must not be the same")
}
if nodeInfo.Node() == nil {
return fmt.Errorf("invalid node in nodeInfo")
}
// Add matching anti-affinity terms of the addedPod to the map.
podMatchingTerms, err := getMatchingAntiAffinityTermsOfExistingPod(meta.pod, addedPod, nodeInfo.Node())
if err != nil {
return err
}
if len(podMatchingTerms) > 0 {
existingTerms, found := meta.matchingAntiAffinityTerms[addedPodFullName]
if found {
meta.matchingAntiAffinityTerms[addedPodFullName] = append(existingTerms,
podMatchingTerms...)
} else {
meta.matchingAntiAffinityTerms[addedPodFullName] = podMatchingTerms
}
}
// If addedPod is in the same namespace as the meta.pod, update the list
// of matching pods if applicable.
if meta.serviceAffinityInUse && addedPod.Namespace == meta.pod.Namespace {
selector := CreateSelectorFromLabels(meta.pod.Labels)
if selector.Matches(labels.Set(addedPod.Labels)) {
meta.serviceAffinityMatchingPodList = append(meta.serviceAffinityMatchingPodList,
addedPod)
}
}
return nil
}
// ShallowCopy copies a metadata struct into a new struct and creates a copy of
// its maps and slices, but it does not copy the contents of pointer values.
func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata {
newPredMeta := &predicateMetadata{
pod: meta.pod,
podBestEffort: meta.podBestEffort,
podRequest: meta.podRequest,
serviceAffinityInUse: meta.serviceAffinityInUse,
ignoredExtendedResources: meta.ignoredExtendedResources,
}
newPredMeta.podPorts = append([]*v1.ContainerPort(nil), meta.podPorts...)
newPredMeta.matchingAntiAffinityTerms = map[string][]matchingPodAntiAffinityTerm{}
for k, v := range meta.matchingAntiAffinityTerms {
newPredMeta.matchingAntiAffinityTerms[k] = append([]matchingPodAntiAffinityTerm(nil), v...)
}
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
meta.serviceAffinityMatchingPodServices...)
newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
meta.serviceAffinityMatchingPodList...)
return (algorithm.PredicateMetadata)(newPredMeta)
}

View File

@ -0,0 +1,408 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"reflect"
"sort"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
// sortableAntiAffinityTerms lets us to sort anti-affinity terms.
type sortableAntiAffinityTerms []matchingPodAntiAffinityTerm
// Less establishes some ordering between two matchingPodAntiAffinityTerms for
// sorting.
func (s sortableAntiAffinityTerms) Less(i, j int) bool {
t1, t2 := s[i], s[j]
if t1.node.Name != t2.node.Name {
return t1.node.Name < t2.node.Name
}
if len(t1.term.Namespaces) != len(t2.term.Namespaces) {
return len(t1.term.Namespaces) < len(t2.term.Namespaces)
}
if t1.term.TopologyKey != t2.term.TopologyKey {
return t1.term.TopologyKey < t2.term.TopologyKey
}
if len(t1.term.LabelSelector.MatchLabels) != len(t2.term.LabelSelector.MatchLabels) {
return len(t1.term.LabelSelector.MatchLabels) < len(t2.term.LabelSelector.MatchLabels)
}
return false
}
func (s sortableAntiAffinityTerms) Len() int { return len(s) }
func (s sortableAntiAffinityTerms) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
var _ = sort.Interface(sortableAntiAffinityTerms{})
func sortAntiAffinityTerms(terms map[string][]matchingPodAntiAffinityTerm) {
for k, v := range terms {
sortableTerms := sortableAntiAffinityTerms(v)
sort.Sort(sortableTerms)
terms[k] = sortableTerms
}
}
// sortablePods lets us to sort pods.
type sortablePods []*v1.Pod
func (s sortablePods) Less(i, j int) bool {
return s[i].Namespace < s[j].Namespace ||
(s[i].Namespace == s[j].Namespace && s[i].Name < s[j].Name)
}
func (s sortablePods) Len() int { return len(s) }
func (s sortablePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
var _ = sort.Interface(&sortablePods{})
// sortableServices allows us to sort services.
type sortableServices []*v1.Service
func (s sortableServices) Less(i, j int) bool {
return s[i].Namespace < s[j].Namespace ||
(s[i].Namespace == s[j].Namespace && s[i].Name < s[j].Name)
}
func (s sortableServices) Len() int { return len(s) }
func (s sortableServices) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
var _ = sort.Interface(&sortableServices{})
// predicateMetadataEquivalent returns true if the two metadata are equivalent.
// Note: this function does not compare podRequest.
func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
if !reflect.DeepEqual(meta1.pod, meta2.pod) {
return fmt.Errorf("pods are not the same")
}
if meta1.podBestEffort != meta2.podBestEffort {
return fmt.Errorf("podBestEfforts are not equal")
}
if meta1.serviceAffinityInUse != meta1.serviceAffinityInUse {
return fmt.Errorf("serviceAffinityInUses are not equal")
}
if len(meta1.podPorts) != len(meta2.podPorts) {
return fmt.Errorf("podPorts are not equal")
}
for !reflect.DeepEqual(meta1.podPorts, meta2.podPorts) {
return fmt.Errorf("podPorts are not equal")
}
sortAntiAffinityTerms(meta1.matchingAntiAffinityTerms)
sortAntiAffinityTerms(meta2.matchingAntiAffinityTerms)
if !reflect.DeepEqual(meta1.matchingAntiAffinityTerms, meta2.matchingAntiAffinityTerms) {
return fmt.Errorf("matchingAntiAffinityTerms are not euqal")
}
if meta1.serviceAffinityInUse {
sortablePods1 := sortablePods(meta1.serviceAffinityMatchingPodList)
sort.Sort(sortablePods1)
sortablePods2 := sortablePods(meta2.serviceAffinityMatchingPodList)
sort.Sort(sortablePods2)
if !reflect.DeepEqual(sortablePods1, sortablePods2) {
return fmt.Errorf("serviceAffinityMatchingPodLists are not euqal")
}
sortableServices1 := sortableServices(meta1.serviceAffinityMatchingPodServices)
sort.Sort(sortableServices1)
sortableServices2 := sortableServices(meta2.serviceAffinityMatchingPodServices)
sort.Sort(sortableServices2)
if !reflect.DeepEqual(sortableServices1, sortableServices2) {
return fmt.Errorf("serviceAffinityMatchingPodServices are not euqal")
}
}
return nil
}
func TestPredicateMetadata_AddRemovePod(t *testing.T) {
var label1 = map[string]string{
"region": "r1",
"zone": "z11",
}
var label2 = map[string]string{
"region": "r1",
"zone": "z12",
}
var label3 = map[string]string{
"region": "r2",
"zone": "z21",
}
selector1 := map[string]string{"foo": "bar"}
antiAffinityFooBar := &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar"},
},
},
},
TopologyKey: "region",
},
},
}
antiAffinityComplex := &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "buzz"},
},
},
},
TopologyKey: "region",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"bar", "security", "test"},
},
},
},
TopologyKey: "zone",
},
},
}
tests := []struct {
description string
pendingPod *v1.Pod
addedPod *v1.Pod
existingPods []*v1.Pod
nodes []*v1.Node
services []*v1.Service
}{
{
description: "no anti-affinity or service affinity exist",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{NodeName: "nodeC"},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeB"},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
description: "metadata anti-affinity terms are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeB",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
description: "metadata service-affinity data are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{NodeName: "nodeC"},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeB"},
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
description: "metadata anti-affinity terms and service affinity data are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeA",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityComplex,
},
},
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
}
for _, test := range tests {
allPodLister := schedulertesting.FakePodLister(append(test.existingPods, test.addedPod))
// getMeta creates predicate meta data given the list of pods.
getMeta := func(lister schedulertesting.FakePodLister) (*predicateMetadata, map[string]*schedulercache.NodeInfo) {
nodeInfoMap := schedulercache.CreateNodeNameToInfoMap(lister, test.nodes)
// nodeList is a list of non-pointer nodes to feed to FakeNodeListInfo.
nodeList := []v1.Node{}
for _, n := range test.nodes {
nodeList = append(nodeList, *n)
}
_, precompute := NewServiceAffinityPredicate(lister, schedulertesting.FakeServiceLister(test.services), FakeNodeListInfo(nodeList), nil)
RegisterPredicateMetadataProducer("ServiceAffinityMetaProducer", precompute)
pmf := PredicateMetadataFactory{lister}
meta := pmf.GetMetadata(test.pendingPod, nodeInfoMap)
return meta.(*predicateMetadata), nodeInfoMap
}
// allPodsMeta is meta data produced when all pods, including test.addedPod
// are given to the metadata producer.
allPodsMeta, _ := getMeta(allPodLister)
// existingPodsMeta1 is meta data produced for test.existingPods (without test.addedPod).
existingPodsMeta1, nodeInfoMap := getMeta(schedulertesting.FakePodLister(test.existingPods))
// Add test.addedPod to existingPodsMeta1 and make sure meta is equal to allPodsMeta
nodeInfo := nodeInfoMap[test.addedPod.Spec.NodeName]
if err := existingPodsMeta1.AddPod(test.addedPod, nodeInfo); err != nil {
t.Errorf("test [%v]: error adding pod to meta: %v", test.description, err)
}
if err := predicateMetadataEquivalent(allPodsMeta, existingPodsMeta1); err != nil {
t.Errorf("test [%v]: meta data are not equivalent: %v", test.description, err)
}
// Remove the added pod and from existingPodsMeta1 an make sure it is equal
// to meta generated for existing pods.
existingPodsMeta2, _ := getMeta(schedulertesting.FakePodLister(test.existingPods))
if err := existingPodsMeta1.RemovePod(test.addedPod); err != nil {
t.Errorf("test [%v]: error removing pod from meta: %v", test.description, err)
}
if err := predicateMetadataEquivalent(existingPodsMeta1, existingPodsMeta2); err != nil {
t.Errorf("test [%v]: meta data are not equivalent: %v", test.description, err)
}
}
}
// TestPredicateMetadata_ShallowCopy tests the ShallowCopy function. It is based
// on the idea that shallow-copy should produce an object that is deep-equal to the original
// object.
func TestPredicateMetadata_ShallowCopy(t *testing.T) {
source := predicateMetadata{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "testns",
},
},
podBestEffort: true,
podRequest: &schedulercache.Resource{
MilliCPU: 1000,
Memory: 300,
AllowedPodNumber: 4,
},
podPorts: []*v1.ContainerPort{
{
Name: "name",
HostPort: 10,
ContainerPort: 20,
Protocol: "TCP",
HostIP: "1.2.3.4",
},
},
matchingAntiAffinityTerms: map[string][]matchingPodAntiAffinityTerm{
"term1": {
{
term: &v1.PodAffinityTerm{TopologyKey: "node"},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
},
},
},
},
serviceAffinityInUse: true,
serviceAffinityMatchingPodList: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "pod2"}},
},
serviceAffinityMatchingPodServices: []*v1.Service{
{ObjectMeta: metav1.ObjectMeta{Name: "service1"}},
},
}
if !reflect.DeepEqual(source.ShallowCopy().(*predicateMetadata), &source) {
t.Errorf("Copy is not equal to source!")
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,85 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
)
// FakePersistentVolumeClaimInfo declares a []v1.PersistentVolumeClaim type for testing.
type FakePersistentVolumeClaimInfo []v1.PersistentVolumeClaim
// GetPersistentVolumeClaimInfo gets PVC matching the namespace and PVC ID.
func (pvcs FakePersistentVolumeClaimInfo) GetPersistentVolumeClaimInfo(namespace string, pvcID string) (*v1.PersistentVolumeClaim, error) {
for _, pvc := range pvcs {
if pvc.Name == pvcID && pvc.Namespace == namespace {
return &pvc, nil
}
}
return nil, fmt.Errorf("Unable to find persistent volume claim: %s/%s", namespace, pvcID)
}
// FakeNodeInfo declares a v1.Node type for testing.
type FakeNodeInfo v1.Node
// GetNodeInfo return a fake node info object.
func (n FakeNodeInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
node := v1.Node(n)
return &node, nil
}
// FakeNodeListInfo declares a []v1.Node type for testing.
type FakeNodeListInfo []v1.Node
// GetNodeInfo returns a fake node object in the fake nodes.
func (nodes FakeNodeListInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
for _, node := range nodes {
if node.Name == nodeName {
return &node, nil
}
}
return nil, fmt.Errorf("Unable to find node: %s", nodeName)
}
// FakePersistentVolumeInfo declares a []v1.PersistentVolume type for testing.
type FakePersistentVolumeInfo []v1.PersistentVolume
// GetPersistentVolumeInfo returns a fake PV object in the fake PVs by PV ID.
func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.PersistentVolume, error) {
for _, pv := range pvs {
if pv.Name == pvID {
return &pv, nil
}
}
return nil, fmt.Errorf("Unable to find persistent volume: %s", pvID)
}
// FakeStorageClassInfo declares a []storagev1.StorageClass type for testing.
type FakeStorageClassInfo []storagev1.StorageClass
// GetStorageClassInfo returns a fake storage class object in the fake storage classes by name.
func (classes FakeStorageClassInfo) GetStorageClassInfo(name string) (*storagev1.StorageClass, error) {
for _, sc := range classes {
if sc.Name == name {
return &sc, nil
}
}
return nil, fmt.Errorf("Unable to find storage class: %s", name)
}

View File

@ -0,0 +1,146 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
// FindLabelsInSet gets as many key/value pairs as possible out of a label set.
func FindLabelsInSet(labelsToKeep []string, selector labels.Set) map[string]string {
aL := make(map[string]string)
for _, l := range labelsToKeep {
if selector.Has(l) {
aL[l] = selector.Get(l)
}
}
return aL
}
// AddUnsetLabelsToMap backfills missing values with values we find in a map.
func AddUnsetLabelsToMap(aL map[string]string, labelsToAdd []string, labelSet labels.Set) {
for _, l := range labelsToAdd {
// if the label is already there, dont overwrite it.
if _, exists := aL[l]; exists {
continue
}
// otherwise, backfill this label.
if labelSet.Has(l) {
aL[l] = labelSet.Get(l)
}
}
}
// FilterPodsByNamespace filters pods outside a namespace from the given list.
func FilterPodsByNamespace(pods []*v1.Pod, ns string) []*v1.Pod {
filtered := []*v1.Pod{}
for _, nsPod := range pods {
if nsPod.Namespace == ns {
filtered = append(filtered, nsPod)
}
}
return filtered
}
// CreateSelectorFromLabels is used to define a selector that corresponds to the keys in a map.
func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
if aL == nil || len(aL) == 0 {
return labels.Everything()
}
return labels.Set(aL).AsSelector()
}
// EquivalencePodGenerator is a generator of equivalence class for pod with consideration of PVC info.
type EquivalencePodGenerator struct {
pvcInfo PersistentVolumeClaimInfo
}
// NewEquivalencePodGenerator returns a getEquivalencePod method with consideration of PVC info.
func NewEquivalencePodGenerator(pvcInfo PersistentVolumeClaimInfo) algorithm.GetEquivalencePodFunc {
g := &EquivalencePodGenerator{
pvcInfo: pvcInfo,
}
return g.getEquivalencePod
}
// GetEquivalencePod returns a EquivalencePod which contains a group of pod attributes which can be reused.
func (e *EquivalencePodGenerator) getEquivalencePod(pod *v1.Pod) interface{} {
// For now we only consider pods:
// 1. OwnerReferences is Controller
// 2. with same OwnerReferences
// 3. with same PVC claim
// to be equivalent
for _, ref := range pod.OwnerReferences {
if ref.Controller != nil && *ref.Controller {
pvcSet, err := e.getPVCSet(pod)
if err == nil {
// A pod can only belongs to one controller, so let's return.
return &EquivalencePod{
ControllerRef: ref,
PVCSet: pvcSet,
}
}
// If error encountered, log warning and return nil (i.e. no equivalent pod found)
glog.Warningf("[EquivalencePodGenerator] for pod: %v failed due to: %v", pod.GetName(), err)
return nil
}
}
return nil
}
// getPVCSet returns a set of PVC UIDs of given pod.
func (e *EquivalencePodGenerator) getPVCSet(pod *v1.Pod) (sets.String, error) {
result := sets.NewString()
for _, volume := range pod.Spec.Volumes {
if volume.PersistentVolumeClaim == nil {
continue
}
pvcName := volume.PersistentVolumeClaim.ClaimName
pvc, err := e.pvcInfo.GetPersistentVolumeClaimInfo(pod.GetNamespace(), pvcName)
if err != nil {
return nil, err
}
result.Insert(string(pvc.UID))
}
return result, nil
}
// EquivalencePod is a group of pod attributes which can be reused as equivalence to schedule other pods.
type EquivalencePod struct {
ControllerRef metav1.OwnerReference
PVCSet sets.String
}
// portsConflict check whether existingPorts and wantPorts conflict with each other
// return true if we have a conflict
func portsConflict(existingPorts schedutil.HostPortInfo, wantPorts []*v1.ContainerPort) bool {
for _, cp := range wantPorts {
if existingPorts.CheckConflict(cp.HostIP, string(cp.Protocol), cp.HostPort) {
return true
}
}
return false
}

View File

@ -0,0 +1,70 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)
// ExampleUtils is a https://blog.golang.org/examples styled unit test.
func ExampleFindLabelsInSet() {
labelSubset := labels.Set{}
labelSubset["label1"] = "value1"
labelSubset["label2"] = "value2"
// Lets make believe that these pods are on the cluster.
// Utility functions will inspect their labels, filter them, and so on.
nsPods := []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
Namespace: "ns1",
Labels: map[string]string{
"label1": "wontSeeThis",
"label2": "wontSeeThis",
"label3": "will_see_this",
},
},
}, // first pod which will be used via the utilities
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod2",
Namespace: "ns1",
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod3ThatWeWontSee",
},
},
}
fmt.Println(FindLabelsInSet([]string{"label1", "label2", "label3"}, nsPods[0].ObjectMeta.Labels)["label3"])
AddUnsetLabelsToMap(labelSubset, []string{"label1", "label2", "label3"}, nsPods[0].ObjectMeta.Labels)
fmt.Println(labelSubset)
for _, pod := range FilterPodsByNamespace(nsPods, "ns1") {
fmt.Print(pod.Name, ",")
}
// Output:
// will_see_this
// label1=value1,label2=value2,label3=will_see_this
// pod1,pod2,
}

View File

@ -0,0 +1,92 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"balanced_resource_allocation.go",
"image_locality.go",
"interpod_affinity.go",
"least_requested.go",
"metadata.go",
"most_requested.go",
"node_affinity.go",
"node_label.go",
"node_prefer_avoid_pods.go",
"reduce.go",
"resource_allocation.go",
"resource_limits.go",
"selector_spreading.go",
"taint_toleration.go",
"test_util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities",
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/util/node:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"balanced_resource_allocation_test.go",
"image_locality_test.go",
"interpod_affinity_test.go",
"least_requested_test.go",
"metadata_test.go",
"most_requested_test.go",
"node_affinity_test.go",
"node_label_test.go",
"node_prefer_avoid_pods_test.go",
"resource_limits_test.go",
"selector_spreading_test.go",
"taint_toleration_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/scheduler/algorithm/priorities/util:all-srcs",
],
tags = ["automanaged"],
)

View File

@ -0,0 +1,61 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"math"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
var (
balancedResourcePriority = &ResourceAllocationPriority{"BalancedResourceAllocation", balancedResourceScorer}
// BalancedResourceAllocationMap favors nodes with balanced resource usage rate.
// BalancedResourceAllocationMap should **NOT** be used alone, and **MUST** be used together
// with LeastRequestedPriority. It calculates the difference between the cpu and memory fraction
// of capacity, and prioritizes the host based on how close the two metrics are to each other.
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced
// Resource Utilization"
BalancedResourceAllocationMap = balancedResourcePriority.PriorityMap
)
func balancedResourceScorer(requested, allocable *schedulercache.Resource) int64 {
cpuFraction := fractionOfCapacity(requested.MilliCPU, allocable.MilliCPU)
memoryFraction := fractionOfCapacity(requested.Memory, allocable.Memory)
if cpuFraction >= 1 || memoryFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
return 0
}
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
// respectively. Multilying the absolute value of the difference by 10 scales the value to
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)
return int64((1 - diff) * float64(schedulerapi.MaxPriority))
}
func fractionOfCapacity(requested, capacity int64) float64 {
if capacity == 0 {
return 1
}
return float64(requested) / float64(capacity)
}

View File

@ -0,0 +1,264 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
func TestBalancedResourceAllocation(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
machine1Spec := v1.PodSpec{
NodeName: "machine1",
}
machine2Spec := v1.PodSpec{
NodeName: "machine2",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (remaining resources) on 0-10 scale
CPU Fraction: 0 / 4000 = 0%
Memory Fraction: 0 / 10000 = 0%
Node1 Score: 10 - (0-0)*10 = 10
Node2 scores (remaining resources) on 0-10 scale
CPU Fraction: 0 / 4000 = 0 %
Memory Fraction: 0 / 10000 = 0%
Node2 Score: 10 - (0-0)*10 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 3000 / 4000= 75%
Memory Fraction: 5000 / 10000 = 50%
Node1 Score: 10 - (0.75-0.5)*10 = 7
Node2 scores on 0-10 scale
CPU Fraction: 3000 / 6000= 50%
Memory Fraction: 5000/10000 = 50%
Node2 Score: 10 - (0.5-0.5)*10 = 10
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 0 / 4000= 0%
Memory Fraction: 0 / 10000 = 0%
Node1 Score: 10 - (0-0)*10 = 10
Node2 scores on 0-10 scale
CPU Fraction: 0 / 4000= 0%
Memory Fraction: 0 / 10000 = 0%
Node2 Score: 10 - (0-0)*10 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 0 / 20000 = 0%
Node1 Score: 10 - (0.6-0)*10 = 4
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node2 Score: 10 - (0.6-0.25)*10 = 6
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node1 Score: 10 - (0.6-0.25)*10 = 6
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 10000 / 20000 = 50%
Node2 Score: 10 - (0.6-0.5)*10 = 9
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node1 Score: 10 - (0.6-0.25)*10 = 6
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 10000 / 50000 = 20%
Node2 Score: 10 - (0.6-0.2)*10 = 6
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
Memory Fraction: 0 / 10000 = 0
Node1 Score: 0
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
Memory Fraction 5000 / 10000 = 50%
Node2 Score: 0
*/
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(BalancedResourceAllocationMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View File

@ -0,0 +1,88 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
const (
mb int64 = 1024 * 1024
minImgSize int64 = 23 * mb
maxImgSize int64 = 1000 * mb
)
// ImageLocalityPriorityMap is a priority function that favors nodes that already have requested pod container's images.
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
// based on the total size of those images.
// - If none of the images are present, this node will be given the lowest priority.
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
func ImageLocalityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
sumSize := totalImageSize(node, pod.Spec.Containers)
return schedulerapi.HostPriority{
Host: node.Name,
Score: calculateScoreFromSize(sumSize),
}, nil
}
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
// 1. Split image size range into 10 buckets.
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
func calculateScoreFromSize(sumSize int64) int {
switch {
case sumSize == 0 || sumSize < minImgSize:
// 0 means none of the images required by this pod are present on this
// node or the total size of the images present is too small to be taken into further consideration.
return 0
case sumSize >= maxImgSize:
// If existing images' total size is larger than max, just make it highest priority.
return schedulerapi.MaxPriority
}
return int((int64(schedulerapi.MaxPriority) * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
}
// totalImageSize returns the total image size of all the containers that are already on the node.
func totalImageSize(node *v1.Node, containers []v1.Container) int64 {
imageSizes := make(map[string]int64)
for _, image := range node.Status.Images {
for _, name := range image.Names {
imageSizes[name] = image.SizeBytes
}
}
var total int64
for _, container := range containers {
if size, ok := imageSizes[container.Image]; ok {
total += size
}
}
return total
}

View File

@ -0,0 +1,183 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
func TestImageLocalityPriority(t *testing.T) {
test40250 := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/250",
},
},
}
test40140 := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/140",
},
},
}
testMinMax := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/10",
},
{
Image: "gcr.io/2000",
},
},
}
node401402000 := v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/40",
"gcr.io/40:v1",
"gcr.io/40:v1",
},
SizeBytes: int64(40 * mb),
},
{
Names: []string{
"gcr.io/140",
"gcr.io/140:v1",
},
SizeBytes: int64(140 * mb),
},
{
Names: []string{
"gcr.io/2000",
},
SizeBytes: int64(2000 * mb),
},
},
}
node25010 := v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/250",
},
SizeBytes: int64(250 * mb),
},
{
Names: []string{
"gcr.io/10",
"gcr.io/10:v1",
},
SizeBytes: int64(10 * mb),
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
// Pod: gcr.io/40 gcr.io/250
// Node1
// Image: gcr.io/40 40MB
// Score: (40M-23M)/97.7M + 1 = 1
// Node2
// Image: gcr.io/250 250MB
// Score: (250M-23M)/97.7M + 1 = 3
pod: &v1.Pod{Spec: test40250},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
test: "two images spread on two nodes, prefer the larger image one",
},
{
// Pod: gcr.io/40 gcr.io/140
// Node1
// Image: gcr.io/40 40MB, gcr.io/140 140MB
// Score: (40M+140M-23M)/97.7M + 1 = 2
// Node2
// Image: not present
// Score: 0
pod: &v1.Pod{Spec: test40140},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
test: "two images on one node, prefer this node",
},
{
// Pod: gcr.io/2000 gcr.io/10
// Node1
// Image: gcr.io/2000 2000MB
// Score: 2000 > max score = 10
// Node2
// Image: gcr.io/10 10MB
// Score: 10 < min score = 0
pod: &v1.Pod{Spec: testMinMax},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "if exceed limit, use limit",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}
func makeImageNode(node string, status v1.NodeStatus) *v1.Node {
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: node},
Status: status,
}
}

View File

@ -0,0 +1,240 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"sync"
"k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// InterPodAffinity contains information to calculate inter pod affinity.
type InterPodAffinity struct {
info predicates.NodeInfo
nodeLister algorithm.NodeLister
podLister algorithm.PodLister
hardPodAffinityWeight int32
}
// NewInterPodAffinityPriority creates an InterPodAffinity.
func NewInterPodAffinityPriority(
info predicates.NodeInfo,
nodeLister algorithm.NodeLister,
podLister algorithm.PodLister,
hardPodAffinityWeight int32) algorithm.PriorityFunction {
interPodAffinity := &InterPodAffinity{
info: info,
nodeLister: nodeLister,
podLister: podLister,
hardPodAffinityWeight: hardPodAffinityWeight,
}
return interPodAffinity.CalculateInterPodAffinityPriority
}
type podAffinityPriorityMap struct {
sync.Mutex
// nodes contain all nodes that should be considered
nodes []*v1.Node
// counts store the mapping from node name to so-far computed score of
// the node.
counts map[string]float64
// The first error that we faced.
firstError error
}
func newPodAffinityPriorityMap(nodes []*v1.Node) *podAffinityPriorityMap {
return &podAffinityPriorityMap{
nodes: nodes,
counts: make(map[string]float64, len(nodes)),
}
}
func (p *podAffinityPriorityMap) setError(err error) {
p.Lock()
defer p.Unlock()
if p.firstError == nil {
p.firstError = err
}
}
func (p *podAffinityPriorityMap) processTerm(term *v1.PodAffinityTerm, podDefiningAffinityTerm, podToCheck *v1.Pod, fixedNode *v1.Node, weight float64) {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(podDefiningAffinityTerm, term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
p.setError(err)
return
}
match := priorityutil.PodMatchesTermsNamespaceAndSelector(podToCheck, namespaces, selector)
if match {
func() {
p.Lock()
defer p.Unlock()
for _, node := range p.nodes {
if priorityutil.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) {
p.counts[node.Name] += weight
}
}
}()
}
}
func (p *podAffinityPriorityMap) processTerms(terms []v1.WeightedPodAffinityTerm, podDefiningAffinityTerm, podToCheck *v1.Pod, fixedNode *v1.Node, multiplier int) {
for i := range terms {
term := &terms[i]
p.processTerm(&term.PodAffinityTerm, podDefiningAffinityTerm, podToCheck, fixedNode, float64(term.Weight*int32(multiplier)))
}
}
// CalculateInterPodAffinityPriority compute a sum by iterating through the elements of weightedPodAffinityTerm and adding
// "weight" to the sum if the corresponding PodAffinityTerm is satisfied for
// that node; the node(s) with the highest sum are the most preferred.
// Symmetry need to be considered for preferredDuringSchedulingIgnoredDuringExecution from podAffinity & podAntiAffinity,
// symmetry need to be considered for hard requirements from podAffinity
func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
affinity := pod.Spec.Affinity
hasAffinityConstraints := affinity != nil && affinity.PodAffinity != nil
hasAntiAffinityConstraints := affinity != nil && affinity.PodAntiAffinity != nil
allNodeNames := make([]string, 0, len(nodeNameToInfo))
for name := range nodeNameToInfo {
allNodeNames = append(allNodeNames, name)
}
// convert the topology key based weights to the node name based weights
var maxCount float64
var minCount float64
// priorityMap stores the mapping from node name to so-far computed score of
// the node.
pm := newPodAffinityPriorityMap(nodes)
processPod := func(existingPod *v1.Pod) error {
existingPodNode, err := ipa.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
if apierrors.IsNotFound(err) {
glog.Errorf("Node not found, %v", existingPod.Spec.NodeName)
return nil
}
return err
}
existingPodAffinity := existingPod.Spec.Affinity
existingHasAffinityConstraints := existingPodAffinity != nil && existingPodAffinity.PodAffinity != nil
existingHasAntiAffinityConstraints := existingPodAffinity != nil && existingPodAffinity.PodAntiAffinity != nil
if hasAffinityConstraints {
// For every soft pod affinity term of <pod>, if <existingPod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPods>`s node by the term`s weight.
terms := affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, pod, existingPod, existingPodNode, 1)
}
if hasAntiAffinityConstraints {
// For every soft pod anti-affinity term of <pod>, if <existingPod> matches the term,
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>`s node by the term`s weight.
terms := affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, pod, existingPod, existingPodNode, -1)
}
if existingHasAffinityConstraints {
// For every hard pod affinity term of <existingPod>, if <pod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the constant <ipa.hardPodAffinityWeight>
if ipa.hardPodAffinityWeight > 0 {
terms := existingPodAffinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
//if len(existingPodAffinity.PodAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
// terms = append(terms, existingPodAffinity.PodAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
//}
for _, term := range terms {
pm.processTerm(&term, existingPod, pod, existingPodNode, float64(ipa.hardPodAffinityWeight))
}
}
// For every soft pod affinity term of <existingPod>, if <pod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the term's weight.
terms := existingPodAffinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, existingPod, pod, existingPodNode, 1)
}
if existingHasAntiAffinityConstraints {
// For every soft pod anti-affinity term of <existingPod>, if <pod> matches the term,
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the term's weight.
terms := existingPodAffinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, existingPod, pod, existingPodNode, -1)
}
return nil
}
processNode := func(i int) {
nodeInfo := nodeNameToInfo[allNodeNames[i]]
if nodeInfo.Node() != nil {
if hasAffinityConstraints || hasAntiAffinityConstraints {
// We need to process all the nodes.
for _, existingPod := range nodeInfo.Pods() {
if err := processPod(existingPod); err != nil {
pm.setError(err)
}
}
} else {
// The pod doesn't have any constraints - we need to check only existing
// ones that have some.
for _, existingPod := range nodeInfo.PodsWithAffinity() {
if err := processPod(existingPod); err != nil {
pm.setError(err)
}
}
}
}
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
if pm.firstError != nil {
return nil, pm.firstError
}
for _, node := range nodes {
if pm.counts[node.Name] > maxCount {
maxCount = pm.counts[node.Name]
}
if pm.counts[node.Name] < minCount {
minCount = pm.counts[node.Name]
}
}
// calculate final priority score for each node
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
for _, node := range nodes {
fScore := float64(0)
if (maxCount - minCount) > 0 {
fScore = float64(schedulerapi.MaxPriority) * ((pm.counts[node.Name] - minCount) / (maxCount - minCount))
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
if glog.V(10) {
glog.Infof("%v -> %v: InterPodAffinityPriority, Score: (%d)", pod.Name, node.Name, int(fScore))
}
}
return result, nil
}

View File

@ -0,0 +1,615 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
type FakeNodeListInfo []*v1.Node
func (nodes FakeNodeListInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
for _, node := range nodes {
if node.Name == nodeName {
return node, nil
}
}
return nil, fmt.Errorf("Unable to find node: %s", nodeName)
}
func TestInterPodAffinityPriority(t *testing.T) {
labelRgChina := map[string]string{
"region": "China",
}
labelRgIndia := map[string]string{
"region": "India",
}
labelAzAz1 := map[string]string{
"az": "az1",
}
labelAzAz2 := map[string]string{
"az": "az2",
}
labelRgChinaAzAz1 := map[string]string{
"region": "China",
"az": "az1",
}
podLabelSecurityS1 := map[string]string{
"security": "S1",
}
podLabelSecurityS2 := map[string]string{
"security": "S2",
}
// considered only preferredDuringSchedulingIgnoredDuringExecution in pod affinity
stayWithS1InRegion := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 5,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: "region",
},
},
},
},
}
stayWithS2InRegion := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 6,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S2"},
},
},
},
TopologyKey: "region",
},
},
},
},
}
affinity3 := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 8,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"S1"},
}, {
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S2"},
},
},
},
TopologyKey: "region",
},
}, {
Weight: 2,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpExists,
}, {
Key: "wrongkey",
Operator: metav1.LabelSelectorOpDoesNotExist,
},
},
},
TopologyKey: "region",
},
},
},
},
}
hardAffinity := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1", "value2"},
},
},
},
TopologyKey: "region",
}, {
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpExists,
}, {
Key: "wrongkey",
Operator: metav1.LabelSelectorOpDoesNotExist,
},
},
},
TopologyKey: "region",
},
},
},
}
awayFromS1InAz := &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 5,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: "az",
},
},
},
},
}
// to stay away from security S2 in any az.
awayFromS2InAz := &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 5,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S2"},
},
},
},
TopologyKey: "az",
},
},
},
},
}
// to stay with security S1 in same region, stay away from security S2 in any az.
stayWithS1InRegionAwayFromS2InAz := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 8,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: "region",
},
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 5,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S2"},
},
},
},
TopologyKey: "az",
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "all machines are same priority as Affinity is nil",
},
// the node(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score
// the node(machine3) that don't have the label {"region": "whatever the value is"} (mismatch the topology key) but that have existing pods that match the labelSelector get low score
// the node(machine2) that have the label {"region": "China"} (match the topology key) but that have existing pods that mismatch the labelSelector get low score
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "Affinity: pod that matches topology key & pods in nodes will get high score comparing to others" +
"which doesn't match either pods in nodes or in topology key",
},
// the node1(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score
// the node2(machine2) that have the label {"region": "China"}, match the topology key and have the same label value with node1, get the same high score with node1
// the node3(machine3) that have the label {"region": "India"}, match the topology key but have a different label value, don't have existing pods that match the labelSelector,
// get a low score.
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegion}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChinaAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "All the nodes that have the same topology key & label value with one of them has an existing pod that match the affinity rules, have the same score",
},
// there are 2 regions, say regionChina(machine1,machine3,machine4) and regionIndia(machine2,machine5), both regions have nodes that match the preference.
// But there are more nodes(actually more existing pods) in regionChina that match the preference than regionIndia.
// Then, nodes in regionChina get higher score than nodes in regionIndia, and all the nodes in regionChina should get a same score(high score),
// while all the nodes in regionIndia should get another same score(low score).
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS2InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine4"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine5"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 5}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: schedulerapi.MaxPriority}, {Host: "machine5", Score: 5}},
test: "Affinity: nodes in one region has more matching pods comparing to other reqion, so the region which has more macthes will get high score",
},
// Test with the different operators and values for pod affinity scheduling preference, including some match failures.
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: affinity3}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity: different Label operators and values for pod affinity scheduling preference, including some match failures ",
},
// Test the symmetry cases for affinity, the difference between affinity and symmetry is not the pod wants to run together with some existing pods,
// but the existing pods have the inter pod affinity preference while the pod to schedule satisfy the preference.
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1", Affinity: stayWithS1InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: stayWithS2InRegion}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity symmetry: considred only the preferredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1", Affinity: hardAffinity}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: hardAffinity}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity symmetry: considred RequiredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
},
// The pod to schedule prefer to stay away from some existing pods at node level using the pod anti affinity.
// the nodes that have the label {"node": "bar"} (match the topology key) and that have existing pods that match the labelSelector get low score
// the nodes that don't have the label {"node": "whatever the value is"} (mismatch the topology key) but that have existing pods that match the labelSelector get high score
// the nodes that have the label {"node": "bar"} (match the topology key) but that have existing pods that mismatch the labelSelector get high score
// there are 2 nodes, say node1 and node2, both nodes have pods that match the labelSelector and have topology-key in node.Labels.
// But there are more pods on node1 that match the preference than node2. Then, node1 get a lower score than node2.
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: pod that doesnot match existing pods in node will get high score ",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: pod that does not matches topology key & matches the pods in nodes will get higher score comparing to others ",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: one node has more matching pods comparing to other node, so the node which has more unmacthes will get high score",
},
// Test the symmetry cases for anti affinity
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1", Affinity: awayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity symmetry: the existing pods in node which has anti affinity match will get high score",
},
// Test both affinity and anti-affinity
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "Affinity and Anti Affinity: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity",
},
// Combined cases considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels (they are in the same RC/service),
// the pod prefer to run together with its brother pods in the same region, but wants to stay away from them at node level,
// so that all the pods of a RC/service can stay in a same region but trying to separate with each other
// machine-1,machine-3,machine-4 are in ChinaRegion others machin-2,machine-5 are in IndiaRegion
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine3"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine4"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine5"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChinaAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 4}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: schedulerapi.MaxPriority}, {Host: "machine5", Score: 4}},
test: "Affinity and Anti Affinity: considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels",
},
// Consider Affinity, Anti Affinity and symmetry together.
// for Affinity, the weights are: 8, 0, 0, 0
// for Anti Affinity, the weights are: 0, -5, 0, 0
// for Affinity symmetry, the weights are: 0, 0, 8, 0
// for Anti Affinity symmetry, the weights are: 0, 0, 0, -5
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: stayWithS1InRegionAwayFromS2InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS2}},
{Spec: v1.PodSpec{NodeName: "machine3", Affinity: stayWithS1InRegionAwayFromS2InAz}},
{Spec: v1.PodSpec{NodeName: "machine4", Affinity: awayFromS1InAz}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelAzAz2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: 0}},
test: "Affinity and Anti Affinity and symmetry: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity & symmetry",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
interPodAffinity := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
}
list, err := interPodAffinity.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected \n\t%#v, \ngot \n\t%#v\n", test.test, test.expectedList, list)
}
}
}
func TestHardPodAffinitySymmetricWeight(t *testing.T) {
podLabelServiceS1 := map[string]string{
"service": "S1",
}
labelRgChina := map[string]string{
"region": "China",
}
labelRgIndia := map[string]string{
"region": "India",
}
labelAzAz1 := map[string]string{
"az": "az1",
}
hardPodAffinity := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: "region",
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
hardPodAffinityWeight int32
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelServiceS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1", Affinity: hardPodAffinity}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: hardPodAffinity}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Hard Pod Affinity symmetry: hard pod affinity symmetry weights 1 by default, then nodes that match the hard pod affinity symmetry rules, get a high score",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelServiceS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1", Affinity: hardPodAffinity}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: hardPodAffinity}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
hardPodAffinityWeight: 0,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "Hard Pod Affinity symmetry: hard pod affinity symmetry is closed(weights 0), then nodes that match the hard pod affinity symmetry rules, get same score with those not match",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
ipa := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: test.hardPodAffinityWeight,
}
list, err := ipa.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected \n\t%#v, \ngot \n\t%#v\n", test.test, test.expectedList, list)
}
}
}

View File

@ -0,0 +1,53 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
var (
leastResourcePriority = &ResourceAllocationPriority{"LeastResourceAllocation", leastResourceScorer}
// LeastRequestedPriorityMap is a priority function that favors nodes with fewer requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and
// prioritizes based on the minimum of the average of the fraction of requested to capacity.
//
// Details:
// cpu((capacity-sum(requested))*10/capacity) + memory((capacity-sum(requested))*10/capacity)/2
LeastRequestedPriorityMap = leastResourcePriority.PriorityMap
)
func leastResourceScorer(requested, allocable *schedulercache.Resource) int64 {
return (leastRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
leastRequestedScore(requested.Memory, allocable.Memory)) / 2
}
// The unused capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more unused resources the higher the score is.
func leastRequestedScore(requested, capacity int64) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
return 0
}
return ((capacity - requested) * int64(schedulerapi.MaxPriority)) / capacity
}

View File

@ -0,0 +1,264 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
func TestLeastRequested(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
machine1Spec := v1.PodSpec{
NodeName: "machine1",
}
machine2Spec := v1.PodSpec{
NodeName: "machine2",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (remaining resources) on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (10 + 10) / 2 = 10
Node2 scores (remaining resources) on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node2 Score: (10 + 10) / 2 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 3000) *10) / 4000 = 2.5
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node1 Score: (2.5 + 5) / 2 = 3
Node2 scores on 0-10 scale
CPU Score: ((6000 - 3000) *10) / 6000 = 5
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node2 Score: (5 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (10 + 10) / 2 = 10
Node2 scores on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node2 Score: (10 + 10) / 2 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 0) *10) / 20000 = 10
Node1 Score: (4 + 10) / 2 = 7
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node2 Score: (4 + 7.5) / 2 = 5
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node1 Score: (4 + 7.5) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 10000) *10) / 20000 = 5
Node2 Score: (4 + 5) / 2 = 4
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node1 Score: (4 + 7.5) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((50000 - 10000) *10) / 50000 = 8
Node2 Score: (4 + 8) / 2 = 6
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 6000) *10) / 4000 = 0
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (0 + 10) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((4000 - 6000) *10) / 4000 = 0
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node2 Score: (0 + 5) / 2 = 2
*/
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}},
test: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(LeastRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View File

@ -0,0 +1,114 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// PriorityMetadataFactory is a factory to produce PriorityMetadata.
type PriorityMetadataFactory struct {
serviceLister algorithm.ServiceLister
controllerLister algorithm.ControllerLister
replicaSetLister algorithm.ReplicaSetLister
statefulSetLister algorithm.StatefulSetLister
}
// NewPriorityMetadataFactory creates a PriorityMetadataFactory.
func NewPriorityMetadataFactory(serviceLister algorithm.ServiceLister, controllerLister algorithm.ControllerLister, replicaSetLister algorithm.ReplicaSetLister, statefulSetLister algorithm.StatefulSetLister) algorithm.PriorityMetadataProducer {
factory := &PriorityMetadataFactory{
serviceLister: serviceLister,
controllerLister: controllerLister,
replicaSetLister: replicaSetLister,
statefulSetLister: statefulSetLister,
}
return factory.PriorityMetadata
}
// priorityMetadata is a type that is passed as metadata for priority functions
type priorityMetadata struct {
nonZeroRequest *schedulercache.Resource
podTolerations []v1.Toleration
affinity *v1.Affinity
podSelectors []labels.Selector
controllerRef *metav1.OwnerReference
podFirstServiceSelector labels.Selector
}
// PriorityMetadata is a PriorityMetadataProducer. Node info can be nil.
func (pmf *PriorityMetadataFactory) PriorityMetadata(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{} {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
return &priorityMetadata{
nonZeroRequest: getNonZeroRequests(pod),
podTolerations: getAllTolerationPreferNoSchedule(pod.Spec.Tolerations),
affinity: pod.Spec.Affinity,
podSelectors: getSelectors(pod, pmf.serviceLister, pmf.controllerLister, pmf.replicaSetLister, pmf.statefulSetLister),
controllerRef: priorityutil.GetControllerRef(pod),
podFirstServiceSelector: getFirstServiceSelector(pod, pmf.serviceLister),
}
}
// getFirstServiceSelector returns one selector of services the given pod.
func getFirstServiceSelector(pod *v1.Pod, sl algorithm.ServiceLister) (firstServiceSelector labels.Selector) {
if services, err := sl.GetPodServices(pod); err == nil && len(services) > 0 {
return labels.SelectorFromSet(services[0].Spec.Selector)
}
return nil
}
// getSelectors returns selectors of services, RCs and RSs matching the given pod.
func getSelectors(pod *v1.Pod, sl algorithm.ServiceLister, cl algorithm.ControllerLister, rsl algorithm.ReplicaSetLister, ssl algorithm.StatefulSetLister) []labels.Selector {
var selectors []labels.Selector
if services, err := sl.GetPodServices(pod); err == nil {
for _, service := range services {
selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
}
}
if rcs, err := cl.GetPodControllers(pod); err == nil {
for _, rc := range rcs {
selectors = append(selectors, labels.SelectorFromSet(rc.Spec.Selector))
}
}
if rss, err := rsl.GetPodReplicaSets(pod); err == nil {
for _, rs := range rss {
if selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
selectors = append(selectors, selector)
}
}
}
if sss, err := ssl.GetPodStatefulSets(pod); err == nil {
for _, ss := range sss {
if selector, err := metav1.LabelSelectorAsSelector(ss.Spec.Selector); err == nil {
selectors = append(selectors, selector)
}
}
}
return selectors
}

View File

@ -0,0 +1,167 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
apps "k8s.io/api/apps/v1beta1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
func TestPriorityMetadata(t *testing.T) {
nonZeroReqs := &schedulercache.Resource{}
nonZeroReqs.MilliCPU = priorityutil.DefaultMilliCPURequest
nonZeroReqs.Memory = priorityutil.DefaultMemoryRequest
specifiedReqs := &schedulercache.Resource{}
specifiedReqs.MilliCPU = 200
specifiedReqs.Memory = 2000
tolerations := []v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}
podAffinity := &v1.Affinity{
PodAffinity: &v1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
{
Weight: 5,
PodAffinityTerm: v1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "security",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"S1"},
},
},
},
TopologyKey: "region",
},
},
},
},
}
podWithTolerationsAndAffinity := &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: "image",
ImagePullPolicy: "Always",
},
},
Affinity: podAffinity,
Tolerations: tolerations,
},
}
podWithTolerationsAndRequests := &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: "image",
ImagePullPolicy: "Always",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
},
Tolerations: tolerations,
},
}
podWithAffinityAndRequests := &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: "image",
ImagePullPolicy: "Always",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
},
Affinity: podAffinity,
},
}
tests := []struct {
pod *v1.Pod
test string
expected interface{}
}{
{
pod: nil,
expected: nil,
test: "pod is nil , priorityMetadata is nil",
},
{
pod: podWithTolerationsAndAffinity,
expected: &priorityMetadata{
nonZeroRequest: nonZeroReqs,
podTolerations: tolerations,
affinity: podAffinity,
},
test: "Produce a priorityMetadata with default requests",
},
{
pod: podWithTolerationsAndRequests,
expected: &priorityMetadata{
nonZeroRequest: specifiedReqs,
podTolerations: tolerations,
affinity: nil,
},
test: "Produce a priorityMetadata with specified requests",
},
{
pod: podWithAffinityAndRequests,
expected: &priorityMetadata{
nonZeroRequest: specifiedReqs,
podTolerations: nil,
affinity: podAffinity,
},
test: "Produce a priorityMetadata with specified requests",
},
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister([]*v1.Service{}),
schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
for _, test := range tests {
ptData := mataDataProducer(test.pod, nil)
if !reflect.DeepEqual(test.expected, ptData) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expected, ptData)
}
}
}

View File

@ -0,0 +1,55 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
var (
mostResourcePriority = &ResourceAllocationPriority{"MostResourceAllocation", mostResourceScorer}
// MostRequestedPriorityMap is a priority function that favors nodes with most requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
// based on the maximum of the average of the fraction of requested to capacity.
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
MostRequestedPriorityMap = mostResourcePriority.PriorityMap
)
func mostResourceScorer(requested, allocable *schedulercache.Resource) int64 {
return (mostRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
mostRequestedScore(requested.Memory, allocable.Memory)) / 2
}
// The used capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more resources are used the higher the score is. This function
// is almost a reversed version of least_requested_priority.calculatUnusedScore
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
// keep the final formula clean and not to modify the widely used (by users
// in their default scheduling policies) calculateUSedScore.
func mostRequestedScore(requested, capacity int64) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
return 0
}
return (requested * schedulerapi.MaxPriority) / capacity
}

View File

@ -0,0 +1,221 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
func TestMostRequested(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
}
bigCPUAndMemory := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("4000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3000m"),
v1.ResourceMemory: resource.MustParse("5000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (used resources) on 0-10 scale
CPU Score: (0 * 10 / 4000 = 0
Memory Score: (0 * 10) / 10000 = 0
Node1 Score: (0 + 0) / 2 = 0
Node2 scores (used resources) on 0-10 scale
CPU Score: (0 * 10 / 4000 = 0
Memory Score: (0 * 10 / 10000 = 0
Node2 Score: (0 + 0) / 2 = 0
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (3000 * 10 / 4000 = 7.5
Memory Score: (5000 * 10) / 10000 = 5
Node1 Score: (7.5 + 5) / 2 = 6
Node2 scores on 0-10 scale
CPU Score: (3000 * 10 / 6000 = 5
Memory Score: (5000 * 10 / 10000 = 5
Node2 Score: (5 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (0 * 10) / 20000 = 10
Node1 Score: (6 + 0) / 2 = 3
Node2 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (5000 * 10) / 20000 = 2.5
Node2 Score: (6 + 2.5) / 2 = 4
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (5000 * 10) / 20000 = 2.5
Node1 Score: (6 + 2.5) / 2 = 4
Node2 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (10000 * 10) / 20000 = 5
Node2 Score: (6 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: 5000 > 4000 return 0
Memory Score: (9000 * 10) / 10000 = 9
Node1 Score: (0 + 9) / 2 = 4
Node2 scores on 0-10 scale
CPU Score: (5000 * 10) / 10000 = 5
Memory Score: 9000 > 8000 return 0
Node2 Score: (5 + 0) / 2 = 2
*/
pod: &v1.Pod{Spec: bigCPUAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 10000, 8000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 2}},
test: "resources requested with more than the node, pods scheduled with resources",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(MostRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View File

@ -0,0 +1,78 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// CalculateNodeAffinityPriorityMap prioritizes nodes according to node affinity scheduling preferences
// indicated in PreferredDuringSchedulingIgnoredDuringExecution. Each time a node match a preferredSchedulingTerm,
// it will a get an add of preferredSchedulingTerm.Weight. Thus, the more preferredSchedulingTerms
// the node satisfies and the more the preferredSchedulingTerm that is satisfied weights, the higher
// score the node gets.
func CalculateNodeAffinityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var affinity *v1.Affinity
if priorityMeta, ok := meta.(*priorityMetadata); ok {
affinity = priorityMeta.affinity
} else {
// We couldn't parse metadata - fallback to the podspec.
affinity = pod.Spec.Affinity
}
var count int32
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
// empty PreferredSchedulingTerm matches all objects.
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
// Match PreferredDuringSchedulingIgnoredDuringExecution term by term.
for i := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
preferredSchedulingTerm := &affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution[i]
if preferredSchedulingTerm.Weight == 0 {
continue
}
// TODO: Avoid computing it for all nodes if this becomes a performance problem.
nodeSelector, err := v1helper.NodeSelectorRequirementsAsSelector(preferredSchedulingTerm.Preference.MatchExpressions)
if err != nil {
return schedulerapi.HostPriority{}, err
}
if nodeSelector.Matches(labels.Set(node.Labels)) {
count += preferredSchedulingTerm.Weight
}
}
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int(count),
}, nil
}
// CalculateNodeAffinityPriorityReduce is a reduce function for node affinity priority calculation.
var CalculateNodeAffinityPriorityReduce = NormalizeReduce(schedulerapi.MaxPriority, false)

View File

@ -0,0 +1,179 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
func TestNodeAffinityPriority(t *testing.T) {
label1 := map[string]string{"foo": "bar"}
label2 := map[string]string{"key": "value"}
label3 := map[string]string{"az": "az1"}
label4 := map[string]string{"abc": "az11", "def": "az22"}
label5 := map[string]string{"foo": "bar", "key": "value", "az": "az1"}
affinity1 := &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{{
Weight: 2,
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
}},
},
}},
},
}
affinity2 := &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{
{
Weight: 2,
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
},
},
},
},
{
Weight: 4,
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "key",
Operator: v1.NodeSelectorOpIn,
Values: []string{"value"},
},
},
},
},
{
Weight: 5,
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
},
{
Key: "key",
Operator: v1.NodeSelectorOpIn,
Values: []string{"value"},
},
{
Key: "az",
Operator: v1.NodeSelectorOpIn,
Values: []string{"az1"},
},
},
},
},
},
},
}
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{},
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "all machines are same priority as NodeAffinity is nil",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: affinity1,
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label4}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "no machine macthes preferred scheduling requirements in NodeAffinity of pod so all machines' priority is zero",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: affinity1,
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "only machine1 matches the preferred scheduling requirements of pod",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: affinity2,
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: label5}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine5", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 3}},
test: "all machines matches the preferred scheduling requirements of pod but with different priorities ",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce, nil)
list, err := nap(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected %#v, \ngot %#v", test.test, test.expectedList, list)
}
}
}

View File

@ -0,0 +1,62 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// NodeLabelPrioritizer contains information to calculate node label priority.
type NodeLabelPrioritizer struct {
label string
presence bool
}
// NewNodeLabelPriority creates a NodeLabelPrioritizer.
func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
labelPrioritizer := &NodeLabelPrioritizer{
label: label,
presence: presence,
}
return labelPrioritizer.CalculateNodeLabelPriorityMap, nil
}
// CalculateNodeLabelPriorityMap checks whether a particular label exists on a node or not, regardless of its value.
// If presence is true, prioritizes nodes that have the specified label, regardless of value.
// If presence is false, prioritizes nodes that do not have the specified label.
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
exists := labels.Set(node.Labels).Has(n.label)
score := 0
if (exists && n.presence) || (!exists && !n.presence) {
score = schedulerapi.MaxPriority
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: score,
}, nil
}

View File

@ -0,0 +1,126 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
func TestNewNodeLabelPriority(t *testing.T) {
label1 := map[string]string{"foo": "bar"}
label2 := map[string]string{"bar": "foo"}
label3 := map[string]string{"bar": "baz"}
tests := []struct {
nodes []*v1.Node
label string
presence bool
expectedList schedulerapi.HostPriorityList
test string
}{
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "baz",
presence: true,
test: "no match found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "baz",
presence: false,
test: "no match found, presence false",
},
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "foo",
presence: true,
test: "one match found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "foo",
presence: false,
test: "one match found, presence false",
},
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "bar",
presence: true,
test: "two matches found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "bar",
presence: false,
test: "two matches found, presence false",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
labelPrioritizer := &NodeLabelPrioritizer{
label: test.label,
presence: test.presence,
}
list, err := priorityFunction(labelPrioritizer.CalculateNodeLabelPriorityMap, nil, nil)(nil, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View File

@ -0,0 +1,68 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// CalculateNodePreferAvoidPodsPriorityMap priorities nodes according to the node annotation
// "scheduler.alpha.kubernetes.io/preferAvoidPods".
func CalculateNodePreferAvoidPodsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var controllerRef *metav1.OwnerReference
if priorityMeta, ok := meta.(*priorityMetadata); ok {
controllerRef = priorityMeta.controllerRef
} else {
// We couldn't parse metadata - fallback to the podspec.
controllerRef = priorityutil.GetControllerRef(pod)
}
if controllerRef != nil {
// Ignore pods that are owned by other controller than ReplicationController
// or ReplicaSet.
if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
controllerRef = nil
}
}
if controllerRef == nil {
return schedulerapi.HostPriority{Host: node.Name, Score: schedulerapi.MaxPriority}, nil
}
avoids, err := v1helper.GetAvoidPodsFromNodeAnnotations(node.Annotations)
if err != nil {
// If we cannot get annotation, assume it's schedulable there.
return schedulerapi.HostPriority{Host: node.Name, Score: schedulerapi.MaxPriority}, nil
}
for i := range avoids.PreferAvoidPods {
avoid := &avoids.PreferAvoidPods[i]
if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
}
}
return schedulerapi.HostPriority{Host: node.Name, Score: schedulerapi.MaxPriority}, nil
}

View File

@ -0,0 +1,156 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
func TestNodePreferAvoidPriority(t *testing.T) {
annotations1 := map[string]string{
v1.PreferAvoidPodsAnnotationKey: `
{
"preferAvoidPods": [
{
"podSignature": {
"podController": {
"apiVersion": "v1",
"kind": "ReplicationController",
"name": "foo",
"uid": "abcdef123456",
"controller": true
}
},
"reason": "some reason",
"message": "some message"
}
]
}`,
}
annotations2 := map[string]string{
v1.PreferAvoidPodsAnnotationKey: `
{
"preferAvoidPods": [
{
"podSignature": {
"podController": {
"apiVersion": "v1",
"kind": "ReplicaSet",
"name": "foo",
"uid": "qwert12345",
"controller": true
}
},
"reason": "some reason",
"message": "some message"
}
]
}`,
}
testNodes := []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{Name: "machine1", Annotations: annotations1},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "machine2", Annotations: annotations2},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "machine3"},
},
}
trueVar := true
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{Kind: "RandomController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "ownership by random controller should be ignored",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456"},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "owner without Controller field set should be ignored",
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{Kind: "ReplicaSet", Name: "foo", UID: "qwert12345", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View File

@ -0,0 +1,64 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// NormalizeReduce generates a PriorityReduceFunction that can normalize the result
// scores to [0, maxPriority]. If reverse is set to true, it reverses the scores by
// subtracting it from maxPriority.
func NormalizeReduce(maxPriority int, reverse bool) algorithm.PriorityReduceFunction {
return func(
_ *v1.Pod,
_ interface{},
_ map[string]*schedulercache.NodeInfo,
result schedulerapi.HostPriorityList) error {
var maxCount int
for i := range result {
if result[i].Score > maxCount {
maxCount = result[i].Score
}
}
if maxCount == 0 {
if reverse {
for i := range result {
result[i].Score = maxPriority
}
}
return nil
}
for i := range result {
score := result[i].Score
score = maxPriority * score / maxCount
if reverse {
score = maxPriority - score
}
result[i].Score = score
}
return nil
}
}

View File

@ -0,0 +1,85 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"github.com/golang/glog"
"k8s.io/api/core/v1"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// ResourceAllocationPriority contains information to calculate resource allocation priority.
type ResourceAllocationPriority struct {
Name string
scorer func(requested, allocable *schedulercache.Resource) int64
}
// PriorityMap priorities nodes according to the resource allocations on the node.
// It will use `scorer` function to calculate the score.
func (r *ResourceAllocationPriority) PriorityMap(
pod *v1.Pod,
meta interface{},
nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatable := nodeInfo.AllocatableResource()
var requested schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
requested = *priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadata - fallback to computing it.
requested = *getNonZeroRequests(pod)
}
requested.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
requested.Memory += nodeInfo.NonZeroRequest().Memory
score := r.scorer(&requested, &allocatable)
if glog.V(10) {
glog.Infof(
"%v -> %v: %v, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
pod.Name, node.Name, r.Name,
allocatable.MilliCPU, allocatable.Memory,
requested.MilliCPU+allocatable.MilliCPU, requested.Memory+allocatable.Memory,
score,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int(score),
}, nil
}
func getNonZeroRequests(pod *v1.Pod) *schedulercache.Resource {
result := &schedulercache.Resource{}
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
result.MilliCPU += cpu
result.Memory += memory
}
return result
}

View File

@ -0,0 +1,128 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// ResourceLimitsPriorityMap is a priority function that increases score of input node by 1 if the node satisfies
// input pod's resource limits. In detail, this priority function works as follows: If a node does not publish its
// allocatable resources (cpu and memory both), the node score is not affected. If a pod does not specify
// its cpu and memory limits both, the node score is not affected. If one or both of cpu and memory limits
// of the pod are satisfied, the node is assigned a score of 1.
// Rationale of choosing the lowest score of 1 is that this is mainly selected to break ties between nodes that have
// same scores assigned by one of least and most requested priority functions.
func ResourceLimitsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
// compute pod limits
podLimits := getResourceLimits(pod)
cpuScore := computeScore(podLimits.MilliCPU, allocatableResources.MilliCPU)
memScore := computeScore(podLimits.Memory, allocatableResources.Memory)
score := int(0)
if cpuScore == 1 || memScore == 1 {
score = 1
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof(
"%v -> %v: Resource Limits Priority, allocatable %d millicores %d memory bytes, pod limits %d millicores %d memory bytes, score %d",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
podLimits.MilliCPU, podLimits.Memory,
score,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: score,
}, nil
}
// computeScore return 1 if limit value is less than or equal to allocable
// value, otherwise it returns 0.
func computeScore(limit, allocatable int64) int64 {
if limit != 0 && allocatable != 0 && limit <= allocatable {
return 1
}
return 0
}
// getResourceLimits computes resource limits for input pod.
// The reason to create this new function is to be consistent with other
// priority functions because most or perhaps all priority functions work
// with schedulercache.Resource.
// TODO: cache it as part of metadata passed to priority functions.
func getResourceLimits(pod *v1.Pod) *schedulercache.Resource {
result := &schedulercache.Resource{}
for _, container := range pod.Spec.Containers {
result.Add(container.Resources.Limits)
}
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
for rName, rQuantity := range container.Resources.Limits {
switch rName {
case v1.ResourceMemory:
if mem := rQuantity.Value(); mem > result.Memory {
result.Memory = mem
}
case v1.ResourceCPU:
if cpu := rQuantity.MilliValue(); cpu > result.MilliCPU {
result.MilliCPU = cpu
}
// keeping these resources though score computation in other priority functions and in this
// are only computed based on cpu and memory only.
case v1.ResourceEphemeralStorage:
if ephemeralStorage := rQuantity.Value(); ephemeralStorage > result.EphemeralStorage {
result.EphemeralStorage = ephemeralStorage
}
case v1.ResourceNvidiaGPU:
if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
result.NvidiaGPU = gpu
}
default:
if v1helper.IsScalarResourceName(rName) {
value := rQuantity.Value()
if value > result.ScalarResources[rName] {
result.SetScalar(rName, value)
}
}
}
}
}
return result
}

View File

@ -0,0 +1,151 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
//metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
func TestResourceLimistPriority(t *testing.T) {
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
}
memOnly := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
}
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
// input pod
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 0), makeNode("machine3", 0, 10000), makeNode("machine4", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}, {Host: "machine4", Score: 0}},
test: "pod does not specify its resource limits",
},
{
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 3000, 10000), makeNode("machine2", 2000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 0}},
test: "pod only specifies cpu limits",
},
{
pod: &v1.Pod{Spec: memOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 1}},
test: "pod only specifies mem limits",
},
{
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 1}},
test: "pod specifies both cpu and mem limits",
},
{
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}},
test: "node does not advertise its allocatables",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(ResourceLimitsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View File

@ -0,0 +1,283 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
utilnode "k8s.io/kubernetes/pkg/util/node"
"github.com/golang/glog"
)
// When zone information is present, give 2/3 of the weighting to zone spreading, 1/3 to node spreading
// TODO: Any way to justify this weighting?
const zoneWeighting float64 = 2.0 / 3.0
// SelectorSpread contains information to calculate selector spread priority.
type SelectorSpread struct {
serviceLister algorithm.ServiceLister
controllerLister algorithm.ControllerLister
replicaSetLister algorithm.ReplicaSetLister
statefulSetLister algorithm.StatefulSetLister
}
// NewSelectorSpreadPriority creates a SelectorSpread.
func NewSelectorSpreadPriority(
serviceLister algorithm.ServiceLister,
controllerLister algorithm.ControllerLister,
replicaSetLister algorithm.ReplicaSetLister,
statefulSetLister algorithm.StatefulSetLister) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
selectorSpread := &SelectorSpread{
serviceLister: serviceLister,
controllerLister: controllerLister,
replicaSetLister: replicaSetLister,
statefulSetLister: statefulSetLister,
}
return selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce
}
// CalculateSpreadPriorityMap spreads pods across hosts, considering pods
// belonging to the same service,RC,RS or StatefulSet.
// When a pod is scheduled, it looks for services, RCs,RSs and StatefulSets that match the pod,
// then finds existing pods that match those selectors.
// It favors nodes that have fewer existing matching pods.
// i.e. it pushes the scheduler towards a node where there's the smallest number of
// pods which match the same service, RC,RSs or StatefulSets selectors as the pod being scheduled.
func (s *SelectorSpread) CalculateSpreadPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var selectors []labels.Selector
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
priorityMeta, ok := meta.(*priorityMetadata)
if ok {
selectors = priorityMeta.podSelectors
} else {
selectors = getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister, s.statefulSetLister)
}
if len(selectors) == 0 {
return schedulerapi.HostPriority{
Host: node.Name,
Score: int(0),
}, nil
}
count := int(0)
for _, nodePod := range nodeInfo.Pods() {
if pod.Namespace != nodePod.Namespace {
continue
}
// When we are replacing a failed pod, we often see the previous
// deleted version while scheduling the replacement.
// Ignore the previous deleted version for spreading purposes
// (it can still be considered for resource restrictions etc.)
if nodePod.DeletionTimestamp != nil {
glog.V(4).Infof("skipping pending-deleted pod: %s/%s", nodePod.Namespace, nodePod.Name)
continue
}
matches := false
for _, selector := range selectors {
if selector.Matches(labels.Set(nodePod.ObjectMeta.Labels)) {
matches = true
break
}
}
if matches {
count++
}
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int(count),
}, nil
}
// CalculateSpreadPriorityReduce calculates the source of each node
// based on the number of existing matching pods on the node
// where zone information is included on the nodes, it favors nodes
// in zones with fewer existing matching pods.
func (s *SelectorSpread) CalculateSpreadPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
countsByZone := make(map[string]int, 10)
maxCountByZone := int(0)
maxCountByNodeName := int(0)
for i := range result {
if result[i].Score > maxCountByNodeName {
maxCountByNodeName = result[i].Score
}
zoneID := utilnode.GetZoneKey(nodeNameToInfo[result[i].Host].Node())
if zoneID == "" {
continue
}
countsByZone[zoneID] += result[i].Score
}
for zoneID := range countsByZone {
if countsByZone[zoneID] > maxCountByZone {
maxCountByZone = countsByZone[zoneID]
}
}
haveZones := len(countsByZone) != 0
maxCountByNodeNameFloat64 := float64(maxCountByNodeName)
maxCountByZoneFloat64 := float64(maxCountByZone)
MaxPriorityFloat64 := float64(schedulerapi.MaxPriority)
for i := range result {
// initializing to the default/max node score of maxPriority
fScore := MaxPriorityFloat64
if maxCountByNodeName > 0 {
fScore = MaxPriorityFloat64 * (float64(maxCountByNodeName-result[i].Score) / maxCountByNodeNameFloat64)
}
// If there is zone information present, incorporate it
if haveZones {
zoneID := utilnode.GetZoneKey(nodeNameToInfo[result[i].Host].Node())
if zoneID != "" {
zoneScore := MaxPriorityFloat64
if maxCountByZone > 0 {
zoneScore = MaxPriorityFloat64 * (float64(maxCountByZone-countsByZone[zoneID]) / maxCountByZoneFloat64)
}
fScore = (fScore * (1.0 - zoneWeighting)) + (zoneWeighting * zoneScore)
}
}
result[i].Score = int(fScore)
if glog.V(10) {
glog.Infof(
"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, result[i].Host, int(fScore),
)
}
}
return nil
}
// ServiceAntiAffinity contains information to calculate service anti-affinity priority.
type ServiceAntiAffinity struct {
podLister algorithm.PodLister
serviceLister algorithm.ServiceLister
label string
}
// NewServiceAntiAffinityPriority creates a ServiceAntiAffinity.
func NewServiceAntiAffinityPriority(podLister algorithm.PodLister, serviceLister algorithm.ServiceLister, label string) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
antiAffinity := &ServiceAntiAffinity{
podLister: podLister,
serviceLister: serviceLister,
label: label,
}
return antiAffinity.CalculateAntiAffinityPriorityMap, antiAffinity.CalculateAntiAffinityPriorityReduce
}
// Classifies nodes into ones with labels and without labels.
func (s *ServiceAntiAffinity) getNodeClassificationByLabels(nodes []*v1.Node) (map[string]string, []string) {
labeledNodes := map[string]string{}
nonLabeledNodes := []string{}
for _, node := range nodes {
if labels.Set(node.Labels).Has(s.label) {
label := labels.Set(node.Labels).Get(s.label)
labeledNodes[node.Name] = label
} else {
nonLabeledNodes = append(nonLabeledNodes, node.Name)
}
}
return labeledNodes, nonLabeledNodes
}
// filteredPod get pods based on namespace and selector
func filteredPod(namespace string, selector labels.Selector, nodeInfo *schedulercache.NodeInfo) (pods []*v1.Pod) {
if nodeInfo.Pods() == nil || len(nodeInfo.Pods()) == 0 || selector == nil {
return []*v1.Pod{}
}
for _, pod := range nodeInfo.Pods() {
if namespace == pod.Namespace && selector.Matches(labels.Set(pod.Labels)) {
pods = append(pods, pod)
}
}
return
}
// CalculateAntiAffinityPriorityMap spreads pods by minimizing the number of pods belonging to the same service
// on given machine
func (s *ServiceAntiAffinity) CalculateAntiAffinityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var firstServiceSelector labels.Selector
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
priorityMeta, ok := meta.(*priorityMetadata)
if ok {
firstServiceSelector = priorityMeta.podFirstServiceSelector
} else {
firstServiceSelector = getFirstServiceSelector(pod, s.serviceLister)
}
//pods matched namespace,selector on current node
matchedPodsOfNode := filteredPod(pod.Namespace, firstServiceSelector, nodeInfo)
return schedulerapi.HostPriority{
Host: node.Name,
Score: int(len(matchedPodsOfNode)),
}, nil
}
// CalculateAntiAffinityPriorityReduce computes each node score with the same value for a particular label.
// The label to be considered is provided to the struct (ServiceAntiAffinity).
func (s *ServiceAntiAffinity) CalculateAntiAffinityPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
var numServicePods int
var label string
podCounts := map[string]int{}
labelNodesStatus := map[string]string{}
maxPriorityFloat64 := float64(schedulerapi.MaxPriority)
for _, hostPriority := range result {
numServicePods += hostPriority.Score
if !labels.Set(nodeNameToInfo[hostPriority.Host].Node().Labels).Has(s.label) {
continue
}
label = labels.Set(nodeNameToInfo[hostPriority.Host].Node().Labels).Get(s.label)
labelNodesStatus[hostPriority.Host] = label
podCounts[label] += hostPriority.Score
}
//score int - scale of 0-maxPriority
// 0 being the lowest priority and maxPriority being the highest
for i, hostPriority := range result {
label, ok := labelNodesStatus[hostPriority.Host]
if !ok {
result[i].Host = hostPriority.Host
result[i].Score = int(0)
continue
}
// initializing to the default/max node score of maxPriority
fScore := maxPriorityFloat64
if numServicePods > 0 {
fScore = maxPriorityFloat64 * (float64(numServicePods-podCounts[label]) / float64(numServicePods))
}
result[i].Host = hostPriority.Host
result[i].Score = int(fScore)
}
return nil
}

View File

@ -0,0 +1,828 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
apps "k8s.io/api/apps/v1beta1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
func controllerRef(kind, name, uid string) []metav1.OwnerReference {
// TODO: When ControllerRef will be implemented uncomment code below.
return nil
//trueVar := true
//return []metav1.OwnerReference{
// {Kind: kind, Name: name, UID: types.UID(uid), Controller: &trueVar},
//}
}
func TestSelectorSpreadPriority(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
zone1Spec := v1.PodSpec{
NodeName: "machine1",
}
zone2Spec := v1.PodSpec{
NodeName: "machine2",
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []string
rcs []*v1.ReplicationController
rss []*extensions.ReplicaSet
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: new(v1.Pod),
nodes: []string{"machine1", "machine2"},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{{Spec: zone1Spec}},
nodes: []string{"machine1", "machine2"},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}}},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"key": "value"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "different services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "two pods, one service pod",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "five pods, one service pod in no namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}, ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "four pods, one service pod in default namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns2"}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}, ObjectMeta: metav1.ObjectMeta{Namespace: "ns1"}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "five pods, one service pod in specific namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "three pods, two service pods on different machines",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 0}},
test: "four pods, three service pods",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
// "baz=blah" matches both labels1 and labels2, and "foo=bar" matches only labels 1. This means that we assume that we want to
// do spreading between all pods. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replication controller",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replica set",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replica set",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
// Taken together Service and Replication Controller should match all Pods, hence result should be equal to one above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replication controller should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replica set should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replica set should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
// Both Nodes have one pod from the given RC, hence both get 0 score.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "Replication controller with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "Replica set with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use StatefulSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "StatefulSet with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another replication controller with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another replication set with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
},
nodes: []string{"machine1", "machine2"},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
// We use StatefulSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another stateful set with partial pod label matches",
},
}
for i, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeNodeList(test.nodes))
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v index : %d\n", err, i)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}
func buildPod(nodeName string, labels map[string]string, ownerRefs []metav1.OwnerReference) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Labels: labels, OwnerReferences: ownerRefs},
Spec: v1.PodSpec{NodeName: nodeName},
}
}
func TestZoneSelectorSpreadPriority(t *testing.T) {
labels1 := map[string]string{
"label1": "l1",
"baz": "blah",
}
labels2 := map[string]string{
"label2": "l2",
"baz": "blah",
}
const nodeMachine1Zone1 = "machine1.zone1"
const nodeMachine1Zone2 = "machine1.zone2"
const nodeMachine2Zone2 = "machine2.zone2"
const nodeMachine1Zone3 = "machine1.zone3"
const nodeMachine2Zone3 = "machine2.zone3"
const nodeMachine3Zone3 = "machine3.zone3"
buildNodeLabels := func(failureDomain string) map[string]string {
labels := map[string]string{
kubeletapis.LabelZoneFailureDomain: failureDomain,
}
return labels
}
labeledNodes := map[string]map[string]string{
nodeMachine1Zone1: buildNodeLabels("zone1"),
nodeMachine1Zone2: buildNodeLabels("zone2"),
nodeMachine2Zone2: buildNodeLabels("zone2"),
nodeMachine1Zone3: buildNodeLabels("zone3"),
nodeMachine2Zone3: buildNodeLabels("zone3"),
nodeMachine3Zone3: buildNodeLabels("zone3"),
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []string
rcs []*v1.ReplicationController
rss []*extensions.ReplicaSet
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: new(v1.Pod),
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "nothing scheduled",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{buildPod(nodeMachine1Zone1, nil, nil)},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "no services",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{buildPod(nodeMachine1Zone1, labels2, nil)},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"key": "value"}}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "different services",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone1, labels2, nil),
buildPod(nodeMachine1Zone2, labels2, nil),
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone2, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "two pods, 0 matching",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone1, labels2, nil),
buildPod(nodeMachine1Zone2, labels1, nil),
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: 0}, // Already have pod on machine
{Host: nodeMachine2Zone2, Score: 3}, // Already have pod in zone
{Host: nodeMachine1Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "two pods, 1 matching (in z2)",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone1, labels2, nil),
buildPod(nodeMachine1Zone2, labels1, nil),
buildPod(nodeMachine2Zone2, labels1, nil),
buildPod(nodeMachine1Zone3, labels2, nil),
buildPod(nodeMachine2Zone3, labels1, nil),
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority},
{Host: nodeMachine1Zone2, Score: 0}, // Pod on node
{Host: nodeMachine2Zone2, Score: 0}, // Pod on node
{Host: nodeMachine1Zone3, Score: 6}, // Pod in zone
{Host: nodeMachine2Zone3, Score: 3}, // Pod on node
{Host: nodeMachine3Zone3, Score: 6}, // Pod in zone
},
test: "five pods, 3 matching (z2=2, z3=1)",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone1, labels1, nil),
buildPod(nodeMachine1Zone2, labels1, nil),
buildPod(nodeMachine2Zone2, labels2, nil),
buildPod(nodeMachine1Zone3, labels1, nil),
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: 0}, // Pod on node
{Host: nodeMachine1Zone2, Score: 0}, // Pod on node
{Host: nodeMachine2Zone2, Score: 3}, // Pod in zone
{Host: nodeMachine1Zone3, Score: 0}, // Pod on node
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "four pods, 3 matching (z1=1, z2=1, z3=1)",
},
{
pod: buildPod("", labels1, nil),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone1, labels1, nil),
buildPod(nodeMachine1Zone2, labels1, nil),
buildPod(nodeMachine1Zone3, labels1, nil),
buildPod(nodeMachine2Zone2, labels2, nil),
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
{Host: nodeMachine1Zone1, Score: 0}, // Pod on node
{Host: nodeMachine1Zone2, Score: 0}, // Pod on node
{Host: nodeMachine2Zone2, Score: 3}, // Pod in zone
{Host: nodeMachine1Zone3, Score: 0}, // Pod on node
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "four pods, 3 matching (z1=1, z2=1, z3=1)",
},
{
pod: buildPod("", labels1, controllerRef("ReplicationController", "name", "abc123")),
pods: []*v1.Pod{
buildPod(nodeMachine1Zone3, labels1, controllerRef("ReplicationController", "name", "abc123")),
buildPod(nodeMachine1Zone2, labels1, controllerRef("ReplicationController", "name", "abc123")),
buildPod(nodeMachine1Zone3, labels1, controllerRef("ReplicationController", "name", "abc123")),
},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{
// Note that because we put two pods on the same node (nodeMachine1Zone3),
// the values here are questionable for zone2, in particular for nodeMachine1Zone2.
// However they kind of make sense; zone1 is still most-highly favored.
// zone3 is in general least favored, and m1.z3 particularly low priority.
// We would probably prefer to see a bigger gap between putting a second
// pod on m1.z2 and putting a pod on m2.z2, but the ordering is correct.
// This is also consistent with what we have already.
{Host: nodeMachine1Zone1, Score: schedulerapi.MaxPriority}, // No pods in zone
{Host: nodeMachine1Zone2, Score: 5}, // Pod on node
{Host: nodeMachine2Zone2, Score: 6}, // Pod in zone
{Host: nodeMachine1Zone3, Score: 0}, // Two pods on node
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "Replication controller spreading (z1=0, z2=1, z3=2)",
},
}
for i, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(labeledNodes))
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
if err != nil {
t.Errorf("unexpected error: %v index : %d", err, i)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}
func TestZoneSpreadPriority(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
zone1 := map[string]string{
"zone": "zone1",
}
zone2 := map[string]string{
"zone": "zone2",
}
nozone := map[string]string{
"name": "value",
}
zone0Spec := v1.PodSpec{
NodeName: "machine01",
}
zone1Spec := v1.PodSpec{
NodeName: "machine11",
}
zone2Spec := v1.PodSpec{
NodeName: "machine21",
}
labeledNodes := map[string]map[string]string{
"machine01": nozone, "machine02": nozone,
"machine11": zone1, "machine12": zone1,
"machine21": zone2, "machine22": zone2,
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes map[string]map[string]string
services []*v1.Service
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: new(v1.Pod),
nodes: labeledNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "nothing scheduled",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{{Spec: zone1Spec}},
nodes: labeledNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "no services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}}},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"key": "value"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "different services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone0Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: 0}, {Host: "machine22", Score: 0},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three pods, one service pod",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 5}, {Host: "machine12", Score: 5},
{Host: "machine21", Score: 5}, {Host: "machine22", Score: 5},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three pods, two service pods on different machines",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}, ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 0}, {Host: "machine12", Score: 0},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three service label match pods in different namespaces",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 6}, {Host: "machine12", Score: 6},
{Host: "machine21", Score: 3}, {Host: "machine22", Score: 3},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "four pods, three service pods",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 3}, {Host: "machine12", Score: 3},
{Host: "machine21", Score: 6}, {Host: "machine22", Score: 6},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "service with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{
{Spec: zone0Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
nodes: labeledNodes,
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 7}, {Host: "machine12", Score: 7},
{Host: "machine21", Score: 5}, {Host: "machine22", Score: 5},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "service pod on non-zoned node",
},
}
// these local variables just make sure controllerLister\replicaSetLister\statefulSetLister not nil
// when construct mataDataProducer
sss := []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}}
rcs := []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}}
rss := []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}}
for i, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(test.nodes))
zoneSpread := ServiceAntiAffinity{podLister: schedulertesting.FakePodLister(test.pods), serviceLister: schedulertesting.FakeServiceLister(test.services), label: "zone"}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(rcs),
schedulertesting.FakeReplicaSetLister(rss),
schedulertesting.FakeStatefulSetLister(sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(zoneSpread.CalculateAntiAffinityPriorityMap, zoneSpread.CalculateAntiAffinityPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v index : %d", err, i)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("test index %d (%s): expected %#v, got %#v", i, test.test, test.expectedList, list)
}
}
}
func TestGetNodeClassificationByLabels(t *testing.T) {
const machine01 = "machine01"
const machine02 = "machine02"
const zoneA = "zoneA"
zone1 := map[string]string{
"zone": zoneA,
}
labeledNodes := map[string]map[string]string{
machine01: zone1,
}
expectedNonLabeledNodes := []string{machine02}
serviceAffinity := ServiceAntiAffinity{label: "zone"}
newLabeledNodes, noNonLabeledNodes := serviceAffinity.getNodeClassificationByLabels(makeLabeledNodeList(labeledNodes))
noLabeledNodes, newnonLabeledNodes := serviceAffinity.getNodeClassificationByLabels(makeNodeList(expectedNonLabeledNodes))
label, _ := newLabeledNodes[machine01]
if label != zoneA && len(noNonLabeledNodes) != 0 {
t.Errorf("Expected only labeled node with label zoneA and no noNonLabeledNodes")
}
if len(noLabeledNodes) != 0 && newnonLabeledNodes[0] != machine02 {
t.Errorf("Expected only non labelled nodes")
}
}
func makeLabeledNodeList(nodeMap map[string]map[string]string) []*v1.Node {
nodes := make([]*v1.Node, 0, len(nodeMap))
for nodeName, labels := range nodeMap {
nodes = append(nodes, &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName, Labels: labels}})
}
return nodes
}
func makeNodeList(nodeNames []string) []*v1.Node {
nodes := make([]*v1.Node, 0, len(nodeNames))
for _, nodeName := range nodeNames {
nodes = append(nodes, &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}})
}
return nodes
}

View File

@ -0,0 +1,76 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// CountIntolerableTaintsPreferNoSchedule gives the count of intolerable taints of a pod with effect PreferNoSchedule
func countIntolerableTaintsPreferNoSchedule(taints []v1.Taint, tolerations []v1.Toleration) (intolerableTaints int) {
for _, taint := range taints {
// check only on taints that have effect PreferNoSchedule
if taint.Effect != v1.TaintEffectPreferNoSchedule {
continue
}
if !v1helper.TolerationsTolerateTaint(tolerations, &taint) {
intolerableTaints++
}
}
return
}
// getAllTolerationEffectPreferNoSchedule gets the list of all Tolerations with Effect PreferNoSchedule or with no effect.
func getAllTolerationPreferNoSchedule(tolerations []v1.Toleration) (tolerationList []v1.Toleration) {
for _, toleration := range tolerations {
// Empty effect means all effects which includes PreferNoSchedule, so we need to collect it as well.
if len(toleration.Effect) == 0 || toleration.Effect == v1.TaintEffectPreferNoSchedule {
tolerationList = append(tolerationList, toleration)
}
}
return
}
// ComputeTaintTolerationPriorityMap prepares the priority list for all the nodes based on the number of intolerable taints on the node
func ComputeTaintTolerationPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
// To hold all the tolerations with Effect PreferNoSchedule
var tolerationsPreferNoSchedule []v1.Toleration
if priorityMeta, ok := meta.(*priorityMetadata); ok {
tolerationsPreferNoSchedule = priorityMeta.podTolerations
} else {
tolerationsPreferNoSchedule = getAllTolerationPreferNoSchedule(pod.Spec.Tolerations)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: countIntolerableTaintsPreferNoSchedule(node.Spec.Taints, tolerationsPreferNoSchedule),
}, nil
}
// ComputeTaintTolerationPriorityReduce calculates the source of each node based on the number of intolerable taints on the node
var ComputeTaintTolerationPriorityReduce = NormalizeReduce(schedulerapi.MaxPriority, true)

View File

@ -0,0 +1,241 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
func nodeWithTaints(nodeName string, taints []v1.Taint) *v1.Node {
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName,
},
Spec: v1.NodeSpec{
Taints: taints,
},
}
}
func podWithTolerations(tolerations []v1.Toleration) *v1.Pod {
return &v1.Pod{
Spec: v1.PodSpec{
Tolerations: tolerations,
},
}
}
// This function will create a set of nodes and pods and test the priority
// Nodes with zero,one,two,three,four and hundred taints are created
// Pods with zero,one,two,three,four and hundred tolerations are created
func TestTaintAndToleration(t *testing.T) {
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
// basic test case
{
test: "node with taints tolerated by the pod, gets a higher score than those node with intolerable taints",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{{
Key: "foo",
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodeWithTaints("nodeB", []v1.Taint{{
Key: "foo",
Value: "blah",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: schedulerapi.MaxPriority},
{Host: "nodeB", Score: 0},
},
},
// the count of taints that are tolerated by pod, does not matter.
{
test: "the nodes that all of their taints are tolerated by the pod, get the same score, no matter how many tolerable taints a node has",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
Operator: v1.TolerationOpEqual,
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Operator: v1.TolerationOpEqual,
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: schedulerapi.MaxPriority},
{Host: "nodeB", Score: schedulerapi.MaxPriority},
{Host: "nodeC", Score: schedulerapi.MaxPriority},
},
},
// the count of taints on a node that are not tolerated by pod, matters.
{
test: "the more intolerable taints a node has, the lower score it gets.",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: schedulerapi.MaxPriority},
{Host: "nodeB", Score: 5},
{Host: "nodeC", Score: 0},
},
},
// taints-tolerations priority only takes care about the taints and tolerations that have effect PreferNoSchedule
{
test: "only taints and tolerations that have effect PreferNoSchedule are checked by taints-tolerations priority function",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
Operator: v1.TolerationOpEqual,
Value: "arm64",
Effect: v1.TaintEffectNoSchedule,
}, {
Key: "disk-type",
Operator: v1.TolerationOpEqual,
Value: "ssd",
Effect: v1.TaintEffectNoSchedule,
},
}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: schedulerapi.MaxPriority},
{Host: "nodeB", Score: schedulerapi.MaxPriority},
{Host: "nodeC", Score: 0},
},
},
{
test: "Default behaviour No taints and tolerations, lands on node with no taints",
//pod without tolerations
pod: podWithTolerations([]v1.Toleration{}),
nodes: []*v1.Node{
//Node without taints
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: schedulerapi.MaxPriority},
{Host: "nodeB", Score: 0},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce, nil)
list, err := ttp(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("%s, unexpected error: %v", test.test, err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s,\nexpected:\n\t%+v,\ngot:\n\t%+v", test.test, test.expectedList, list)
}
}
}

View File

@ -0,0 +1,61 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
func makeNode(node string, milliCPU, memory int64) *v1.Node {
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: node},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
},
},
}
}
func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction, mataData interface{}) algorithm.PriorityFunction {
return func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
for i := range nodes {
hostResult, err := mapFn(pod, mataData, nodeNameToInfo[nodes[i].Name])
if err != nil {
return nil, err
}
result = append(result, hostResult)
}
if reduceFn != nil {
if err := reduceFn(pod, mataData, nodeNameToInfo, result); err != nil {
return nil, err
}
}
return result, nil
}
}

View File

@ -0,0 +1,55 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = [
"non_zero_test.go",
"topologies_test.go",
"util_test.go",
],
embed = [":go_default_library"],
deps = [
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/selection:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"non_zero.go",
"topologies.go",
"util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util",
deps = [
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,53 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import "k8s.io/api/core/v1"
// For each of these resources, a pod that doesn't request the resource explicitly
// will be treated as having requested the amount indicated below, for the purpose
// of computing priority only. This ensures that when scheduling zero-request pods, such
// pods will not all be scheduled to the machine with the smallest in-use request,
// and that when scheduling regular pods, such pods will not see zero-request pods as
// consuming no resources whatsoever. We chose these values to be similar to the
// resources that we give to cluster addon pods (#10653). But they are pretty arbitrary.
// As described in #11713, we use request instead of limit to deal with resource requirements.
// DefaultMilliCPURequest defines default milli cpu request number.
const DefaultMilliCPURequest int64 = 100 // 0.1 core
// DefaultMemoryRequest defines default memory request size.
const DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
// GetNonzeroRequests returns the default resource request if none is found or what is provided on the request
// TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity v1.ResourceList"
// as an additional argument here) rather than using constants
func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
var outMilliCPU, outMemory int64
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceCPU]; !found {
outMilliCPU = DefaultMilliCPURequest
} else {
outMilliCPU = requests.Cpu().MilliValue()
}
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceMemory]; !found {
outMemory = DefaultMemoryRequest
} else {
outMemory = requests.Memory().Value()
}
return outMilliCPU, outMemory
}

View File

@ -0,0 +1,73 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)
func TestGetNonzeroRequests(t *testing.T) {
tds := []struct {
name string
requests v1.ResourceList
expectedCPU int64
expectedMemory int64
}{
{
"cpu_and_memory_not_found",
v1.ResourceList{},
DefaultMilliCPURequest,
DefaultMemoryRequest,
},
{
"only_cpu_exist",
v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
},
200,
DefaultMemoryRequest,
},
{
"only_memory_exist",
v1.ResourceList{
v1.ResourceMemory: resource.MustParse("400Mi"),
},
DefaultMilliCPURequest,
400 * 1024 * 1024,
},
{
"cpu_memory_exist",
v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("400Mi"),
},
200,
400 * 1024 * 1024,
},
}
for _, td := range tds {
realCPU, realMemory := GetNonzeroRequests(&td.requests)
assert.EqualValuesf(t, td.expectedCPU, realCPU, "Failed to test: %s", td.name)
assert.EqualValuesf(t, td.expectedMemory, realMemory, "Failed to test: %s", td.name)
}
}

View File

@ -0,0 +1,81 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
)
// GetNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.String {
names := sets.String{}
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
} else {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}
// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.String, selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}
if !selector.Matches(labels.Set(pod.Labels)) {
return false
}
return true
}
// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key.
// Returns false if topologyKey is empty.
func NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool {
if len(topologyKey) == 0 {
return false
}
if nodeA.Labels == nil || nodeB.Labels == nil {
return false
}
nodeALabel, okA := nodeA.Labels[topologyKey]
nodeBLabel, okB := nodeB.Labels[topologyKey]
// If found label in both nodes, check the label
if okB && okA {
return nodeALabel == nodeBLabel
}
return false
}
// Topologies contains topologies information of nodes.
type Topologies struct {
DefaultKeys []string
}
// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key.
func (tps *Topologies) NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool {
return NodesHaveSameTopologyKey(nodeA, nodeB, topologyKey)
}

View File

@ -0,0 +1,254 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"
"k8s.io/apimachinery/pkg/util/sets"
)
func fakePod() *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "topologies_pod",
Namespace: metav1.NamespaceDefault,
UID: "551f5a43-9f2f-11e7-a589-fa163e148d75",
},
}
}
func TestGetNamespacesFromPodAffinityTerm(t *testing.T) {
tests := []struct {
name string
podAffinityTerm *v1.PodAffinityTerm
expectedValue sets.String
}{
{
"podAffinityTerm_namespace_empty",
&v1.PodAffinityTerm{},
sets.String{metav1.NamespaceDefault: sets.Empty{}},
},
{
"podAffinityTerm_namespace_not_empty",
&v1.PodAffinityTerm{
Namespaces: []string{metav1.NamespacePublic, metav1.NamespaceSystem},
},
sets.String{metav1.NamespacePublic: sets.Empty{}, metav1.NamespaceSystem: sets.Empty{}},
},
}
for _, test := range tests {
realValue := GetNamespacesFromPodAffinityTerm(fakePod(), test.podAffinityTerm)
assert.EqualValuesf(t, test.expectedValue, realValue, "Failed to test: %s", test.name)
}
}
func TestPodMatchesTermsNamespaceAndSelector(t *testing.T) {
fakeNamespaces := sets.String{metav1.NamespacePublic: sets.Empty{}, metav1.NamespaceSystem: sets.Empty{}}
fakeRequirement, _ := labels.NewRequirement("service", selection.In, []string{"topologies_service1", "topologies_service2"})
fakeSelector := labels.NewSelector().Add(*fakeRequirement)
tests := []struct {
name string
podNamespaces string
podLabels map[string]string
expectedResult bool
}{
{
"namespace_not_in",
metav1.NamespaceDefault,
map[string]string{"service": "topologies_service1"},
false,
},
{
"label_not_match",
metav1.NamespacePublic,
map[string]string{"service": "topologies_service3"},
false,
},
{
"normal_case",
metav1.NamespacePublic,
map[string]string{"service": "topologies_service1"},
true,
},
}
for _, test := range tests {
fakeTestPod := fakePod()
fakeTestPod.Namespace = test.podNamespaces
fakeTestPod.Labels = test.podLabels
realValue := PodMatchesTermsNamespaceAndSelector(fakeTestPod, fakeNamespaces, fakeSelector)
assert.EqualValuesf(t, test.expectedResult, realValue, "Faild to test: %s", test.name)
}
}
func TestNodesHaveSameTopologyKey(t *testing.T) {
tests := []struct {
name string
nodeA, nodeB *v1.Node
topologyKey string
expected bool
}{
{
name: "nodeA{'a':'a'} vs. empty label in nodeB",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "a",
},
},
},
nodeB: &v1.Node{},
expected: false,
topologyKey: "a",
},
{
name: "nodeA{'a':'a'} vs. nodeB{'a':'a'}",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "a",
},
},
},
nodeB: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "a",
},
},
},
expected: true,
topologyKey: "a",
},
{
name: "nodeA{'a':''} vs. empty label in nodeB",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "",
},
},
},
nodeB: &v1.Node{},
expected: false,
topologyKey: "a",
},
{
name: "nodeA{'a':''} vs. nodeB{'a':''}",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "",
},
},
},
nodeB: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "",
},
},
},
expected: true,
topologyKey: "a",
},
{
name: "nodeA{'a':'a'} vs. nodeB{'a':'a'} by key{'b'}",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "a",
},
},
},
nodeB: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "a",
},
},
},
expected: false,
topologyKey: "b",
},
{
name: "topologyKey empty",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "",
},
},
},
nodeB: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "",
},
},
},
expected: false,
topologyKey: "",
},
{
name: "nodeA lable nil vs. nodeB{'a':''} by key('a')",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{},
},
nodeB: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "",
},
},
},
expected: false,
topologyKey: "a",
},
{
name: "nodeA{'a':''} vs. nodeB label is nil by key('a')",
nodeA: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"a": "",
},
},
},
nodeB: &v1.Node{
ObjectMeta: metav1.ObjectMeta{},
},
expected: false,
topologyKey: "a",
},
}
for _, test := range tests {
got := NodesHaveSameTopologyKey(test.nodeA, test.nodeB, test.topologyKey)
assert.Equalf(t, test.expected, got, "Failed to test: %s", test.name)
}
}

View File

@ -0,0 +1,36 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// GetControllerRef gets pod's owner controller reference from a pod object.
func GetControllerRef(pod *v1.Pod) *metav1.OwnerReference {
if len(pod.OwnerReferences) == 0 {
return nil
}
for i := range pod.OwnerReferences {
ref := &pod.OwnerReferences[i]
if ref.Controller != nil && *ref.Controller {
return ref
}
}
return nil
}

View File

@ -0,0 +1,119 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func TestGetControllerRef(t *testing.T) {
fakeBlockOwnerDeletion := true
fakeFalseController := false
fakeTrueController := true
fakeEmptyOwnerReference := metav1.OwnerReference{}
tds := []struct {
name string
pod v1.Pod
expectedNil bool
expectedOR metav1.OwnerReference
}{
{
"ownerreference_not_exist",
v1.Pod{},
true,
fakeEmptyOwnerReference,
},
{
"ownerreference_controller_is_nil",
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
},
},
},
},
true,
fakeEmptyOwnerReference,
},
{
"ownerreference_controller_is_false",
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
Controller: &fakeFalseController,
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
},
},
},
},
true,
fakeEmptyOwnerReference,
},
{
"ownerreference_controller_is_true",
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
Controller: &fakeTrueController,
},
},
},
},
false,
metav1.OwnerReference{
APIVersion: "extensions/v1beta1",
Kind: "ReplicaSet",
Name: "or-unit-test-5b9cffccff",
UID: "a46372ea-b254-11e7-8373-fa163e25bfb5",
BlockOwnerDeletion: &fakeBlockOwnerDeletion,
Controller: &fakeTrueController,
},
},
}
for _, td := range tds {
realOR := GetControllerRef(&td.pod)
if td.expectedNil {
assert.Nilf(t, realOR, "Failed to test: %s", td.name)
} else {
assert.Equalf(t, &td.expectedOR, realOR, "Failed to test: %s", td.name)
}
}
}

View File

@ -0,0 +1,65 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// SchedulerExtender is an interface for external processes to influence scheduling
// decisions made by Kubernetes. This is typically needed for resources not directly
// managed by Kubernetes.
type SchedulerExtender interface {
// Filter based on extender-implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
// Prioritize based on extender-implemented priority functions. The returned scores & weight
// are used to compute the weighted score for an extender. The weighted scores are added to
// the scores computed by Kubernetes scheduler. The total scores are used to do the host selection.
Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *schedulerapi.HostPriorityList, weight int, err error)
// Bind delegates the action of binding a pod to a node to the extender.
Bind(binding *v1.Binding) error
// IsBinder returns whether this extender is configured for the Bind method.
IsBinder() bool
// IsInterested returns true if at least one extended resource requested by
// this pod is managed by this extender.
IsInterested(pod *v1.Pod) bool
}
// ScheduleAlgorithm is an interface implemented by things that know how to schedule pods
// onto machines.
type ScheduleAlgorithm interface {
Schedule(*v1.Pod, NodeLister) (selectedMachine string, err error)
// Preempt receives scheduling errors for a pod and tries to create room for
// the pod by preempting lower priority pods if possible.
// It returns the node where preemption happened, a list of preempted pods, a
// list of pods whose nominated node name should be removed, and error if any.
Preempt(*v1.Pod, NodeLister, error) (selectedNode *v1.Node, preemptedPods []*v1.Pod, cleanupNominatedPods []*v1.Pod, err error)
// Predicates() returns a pointer to a map of predicate functions. This is
// exposed for testing.
Predicates() map[string]FitPredicate
// Prioritizers returns a slice of priority config. This is exposed for
// testing.
Prioritizers() []PriorityConfig
}

View File

@ -0,0 +1,60 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"testing"
"k8s.io/api/core/v1"
)
// Some functions used by multiple scheduler tests.
type schedulerTester struct {
t *testing.T
scheduler ScheduleAlgorithm
nodeLister NodeLister
}
// Call if you know exactly where pod should get scheduled.
func (st *schedulerTester) expectSchedule(pod *v1.Pod, expected string) {
actual, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
if actual != expected {
st.t.Errorf("Unexpected scheduling value: %v, expected %v", actual, expected)
}
}
// Call if you can't predict where pod will be scheduled.
func (st *schedulerTester) expectSuccess(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
}
// Call if pod should *not* schedule.
func (st *schedulerTester) expectFailure(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err == nil {
st.t.Error("Unexpected non-error")
}
}

View File

@ -0,0 +1,169 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
apps "k8s.io/api/apps/v1beta1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/labels"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// FitPredicate is a function that indicates if a pod fits into an existing node.
// The failure information is given by the error.
type FitPredicate func(pod *v1.Pod, meta PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)
// PriorityMapFunction is a function that computes per-node results for a given node.
// TODO: Figure out the exact API of this method.
// TODO: Change interface{} to a specific type.
type PriorityMapFunction func(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error)
// PriorityReduceFunction is a function that aggregated per-node results and computes
// final scores for all nodes.
// TODO: Figure out the exact API of this method.
// TODO: Change interface{} to a specific type.
type PriorityReduceFunction func(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error
// PredicateMetadataProducer is a function that computes predicate metadata for a given pod.
type PredicateMetadataProducer func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) PredicateMetadata
// PriorityMetadataProducer is a function that computes metadata for a given pod. This
// is now used for only for priority functions. For predicates please use PredicateMetadataProducer.
type PriorityMetadataProducer func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{}
// PriorityFunction is a function that computes scores for all nodes.
// DEPRECATED
// Use Map-Reduce pattern for priority functions.
type PriorityFunction func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error)
// PriorityConfig is a config used for a priority function.
type PriorityConfig struct {
Name string
Map PriorityMapFunction
Reduce PriorityReduceFunction
// TODO: Remove it after migrating all functions to
// Map-Reduce pattern.
Function PriorityFunction
Weight int
}
// EmptyPredicateMetadataProducer returns a no-op MetadataProducer type.
func EmptyPredicateMetadataProducer(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) PredicateMetadata {
return nil
}
// EmptyPriorityMetadataProducer returns a no-op PriorityMetadataProducer type.
func EmptyPriorityMetadataProducer(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{} {
return nil
}
// PredicateFailureReason interface represents the failure reason of a predicate.
type PredicateFailureReason interface {
GetReason() string
}
// GetEquivalencePodFunc is a function that gets a EquivalencePod from a pod.
type GetEquivalencePodFunc func(pod *v1.Pod) interface{}
// NodeLister interface represents anything that can list nodes for a scheduler.
type NodeLister interface {
// We explicitly return []*v1.Node, instead of v1.NodeList, to avoid
// performing expensive copies that are unneeded.
List() ([]*v1.Node, error)
}
// PodLister interface represents anything that can list pods for a scheduler.
type PodLister interface {
// We explicitly return []*v1.Pod, instead of v1.PodList, to avoid
// performing expensive copies that are unneeded.
List(labels.Selector) ([]*v1.Pod, error)
// This is similar to "List()", but the returned slice does not
// contain pods that don't pass `podFilter`.
FilteredList(podFilter schedulercache.PodFilter, selector labels.Selector) ([]*v1.Pod, error)
}
// ServiceLister interface represents anything that can produce a list of services; the list is consumed by a scheduler.
type ServiceLister interface {
// Lists all the services
List(labels.Selector) ([]*v1.Service, error)
// Gets the services for the given pod
GetPodServices(*v1.Pod) ([]*v1.Service, error)
}
// ControllerLister interface represents anything that can produce a list of ReplicationController; the list is consumed by a scheduler.
type ControllerLister interface {
// Lists all the replication controllers
List(labels.Selector) ([]*v1.ReplicationController, error)
// Gets the services for the given pod
GetPodControllers(*v1.Pod) ([]*v1.ReplicationController, error)
}
// ReplicaSetLister interface represents anything that can produce a list of ReplicaSet; the list is consumed by a scheduler.
type ReplicaSetLister interface {
// Gets the replicasets for the given pod
GetPodReplicaSets(*v1.Pod) ([]*extensions.ReplicaSet, error)
}
var _ ControllerLister = &EmptyControllerLister{}
// EmptyControllerLister implements ControllerLister on []v1.ReplicationController returning empty data
type EmptyControllerLister struct{}
// List returns nil
func (f EmptyControllerLister) List(labels.Selector) ([]*v1.ReplicationController, error) {
return nil, nil
}
// GetPodControllers returns nil
func (f EmptyControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1.ReplicationController, err error) {
return nil, nil
}
var _ ReplicaSetLister = &EmptyReplicaSetLister{}
// EmptyReplicaSetLister implements ReplicaSetLister on []extensions.ReplicaSet returning empty data
type EmptyReplicaSetLister struct{}
// GetPodReplicaSets returns nil
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*extensions.ReplicaSet, err error) {
return nil, nil
}
// StatefulSetLister interface represents anything that can produce a list of StatefulSet; the list is consumed by a scheduler.
type StatefulSetLister interface {
// Gets the StatefulSet for the given pod.
GetPodStatefulSets(*v1.Pod) ([]*apps.StatefulSet, error)
}
var _ StatefulSetLister = &EmptyStatefulSetLister{}
// EmptyStatefulSetLister implements StatefulSetLister on []apps.StatefulSet returning empty data.
type EmptyStatefulSetLister struct{}
// GetPodStatefulSets of EmptyStatefulSetLister returns nil.
func (f EmptyStatefulSetLister) GetPodStatefulSets(pod *v1.Pod) (sss []*apps.StatefulSet, err error) {
return nil, nil
}
// PredicateMetadata interface represents anything that can access a predicate metadata.
type PredicateMetadata interface {
ShallowCopy() PredicateMetadata
AddPod(addedPod *v1.Pod, nodeInfo *schedulercache.NodeInfo) error
RemovePod(deletedPod *v1.Pod) error
}

View File

@ -0,0 +1,65 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
// EmptyPriorityMetadataProducer should returns a no-op PriorityMetadataProducer type.
func TestEmptyPriorityMetadataProducer(t *testing.T) {
fakePod := new(v1.Pod)
fakeLabelSelector := labels.SelectorFromSet(labels.Set{"foo": "bar"})
nodeNameToInfo := map[string]*schedulercache.NodeInfo{
"2": schedulercache.NewNodeInfo(fakePod),
"1": schedulercache.NewNodeInfo(),
}
// Test EmptyPriorityMetadataProducer
metadata := EmptyPriorityMetadataProducer(fakePod, nodeNameToInfo)
if metadata != nil {
t.Errorf("failed to produce empty metadata: got %v, expected nil", metadata)
}
// Test EmptyControllerLister should return nill
controllerLister := EmptyControllerLister{}
nilController, nilError := controllerLister.List(fakeLabelSelector)
if nilController != nil || nilError != nil {
t.Errorf("failed to produce empty controller lister: got %v, expected nil", nilController)
}
// Test GetPodControllers on empty controller lister should return nill
nilController, nilError = controllerLister.GetPodControllers(fakePod)
if nilController != nil || nilError != nil {
t.Errorf("failed to produce empty controller lister: got %v, expected nil", nilController)
}
// Test GetPodReplicaSets on empty replica sets should return nill
replicaSetLister := EmptyReplicaSetLister{}
nilRss, nilErrRss := replicaSetLister.GetPodReplicaSets(fakePod)
if nilRss != nil || nilErrRss != nil {
t.Errorf("failed to produce empty replicaSetLister: got %v, expected nil", nilRss)
}
// Test GetPodStatefulSets on empty replica sets should return nill
statefulSetLister := EmptyStatefulSetLister{}
nilSSL, nilErrSSL := statefulSetLister.GetPodStatefulSets(fakePod)
if nilSSL != nil || nilErrSSL != nil {
t.Errorf("failed to produce empty statefulSetLister: got %v, expected nil", nilSSL)
}
}

View File

@ -0,0 +1,69 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
const (
// TaintNodeNotReady will be added when node is not ready
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes ready.
TaintNodeNotReady = "node.kubernetes.io/not-ready"
// DeprecatedTaintNodeNotReady is the deprecated version of TaintNodeNotReady.
// It is deprecated since 1.9
DeprecatedTaintNodeNotReady = "node.alpha.kubernetes.io/notReady"
// TaintNodeUnreachable will be added when node becomes unreachable
// (corresponding to NodeReady status ConditionUnknown)
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes reachable (NodeReady status ConditionTrue).
TaintNodeUnreachable = "node.kubernetes.io/unreachable"
// DeprecatedTaintNodeUnreachable is the deprecated version of TaintNodeUnreachable.
// It is deprecated since 1.9
DeprecatedTaintNodeUnreachable = "node.alpha.kubernetes.io/unreachable"
// TaintNodeOutOfDisk will be added when node becomes out of disk
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodeOutOfDisk = "node.kubernetes.io/out-of-disk"
// TaintNodeMemoryPressure will be added when node has memory pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough memory.
TaintNodeMemoryPressure = "node.kubernetes.io/memory-pressure"
// TaintNodeDiskPressure will be added when node has disk pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodeDiskPressure = "node.kubernetes.io/disk-pressure"
// TaintNodeNetworkUnavailable will be added when node's network is unavailable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when network becomes ready.
TaintNodeNetworkUnavailable = "node.kubernetes.io/network-unavailable"
// TaintNodePIDPressure will be added when node has pid pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodePIDPressure = "node.kubernetes.io/pid-pressure"
// TaintExternalCloudProvider sets this taint on a node to mark it as unusable,
// when kubelet is started with the "external" cloud provider, until a controller
// from the cloud-controller-manager intitializes this node, and then removes
// the taint
TaintExternalCloudProvider = "node.cloudprovider.kubernetes.io/uninitialized"
)

View File

@ -0,0 +1,40 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = ["plugins.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithmprovider",
deps = ["//pkg/scheduler/algorithmprovider/defaults:go_default_library"],
)
go_test(
name = "go_default_test",
srcs = ["plugins_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/factory:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/scheduler/algorithmprovider/defaults:all-srcs",
],
tags = ["automanaged"],
)

View File

@ -0,0 +1,61 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = ["defaults.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithmprovider/defaults",
deps = [
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/algorithm/priorities:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/factory:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"compatibility_test.go",
"defaults_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/apis/core/install:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/api/latest:go_default_library",
"//pkg/scheduler/factory:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/util/testing:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,594 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package defaults
import (
"fmt"
"net/http/httptest"
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
utiltesting "k8s.io/client-go/util/testing"
"k8s.io/kubernetes/pkg/api/legacyscheme"
_ "k8s.io/kubernetes/pkg/apis/core/install"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
latestschedulerapi "k8s.io/kubernetes/pkg/scheduler/api/latest"
"k8s.io/kubernetes/pkg/scheduler/factory"
)
const enableEquivalenceCache = true
func TestCompatibility_v1_Scheduler(t *testing.T) {
// Add serialized versions of scheduler config that exercise available options to ensure compatibility between releases
schedulerFiles := map[string]struct {
JSON string
ExpectedPolicy schedulerapi.Policy
}{
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.0": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsPorts"},
{"name": "NoDiskConflict"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "LeastRequestedPriority", "weight": 1},
{"name": "ServiceSpreadingPriority", "weight": 2},
{"name": "TestServiceAntiAffinity", "weight": 3, "argument": {"serviceAntiAffinity": {"label": "zone"}}},
{"name": "TestLabelPreference", "weight": 4, "argument": {"labelPreference": {"label": "bar", "presence":true}}}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsPorts"},
{Name: "NoDiskConflict"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "LeastRequestedPriority", Weight: 1},
{Name: "ServiceSpreadingPriority", Weight: 2},
{Name: "TestServiceAntiAffinity", Weight: 3, Argument: &schedulerapi.PriorityArgument{ServiceAntiAffinity: &schedulerapi.ServiceAntiAffinity{Label: "zone"}}},
{Name: "TestLabelPreference", Weight: 4, Argument: &schedulerapi.PriorityArgument{LabelPreference: &schedulerapi.LabelPreference{Label: "bar", Presence: true}}},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.1": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsHostPorts"},
{"name": "PodFitsResources"},
{"name": "NoDiskConflict"},
{"name": "HostName"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "TestServiceAntiAffinity", "weight": 3, "argument": {"serviceAntiAffinity": {"label": "zone"}}},
{"name": "TestLabelPreference", "weight": 4, "argument": {"labelPreference": {"label": "bar", "presence":true}}}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsHostPorts"},
{Name: "PodFitsResources"},
{Name: "NoDiskConflict"},
{Name: "HostName"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "TestServiceAntiAffinity", Weight: 3, Argument: &schedulerapi.PriorityArgument{ServiceAntiAffinity: &schedulerapi.ServiceAntiAffinity{Label: "zone"}}},
{Name: "TestLabelPreference", Weight: 4, Argument: &schedulerapi.PriorityArgument{LabelPreference: &schedulerapi.LabelPreference{Label: "bar", Presence: true}}},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.2": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "TestServiceAntiAffinity", "weight": 3, "argument": {"serviceAntiAffinity": {"label": "zone"}}},
{"name": "TestLabelPreference", "weight": 4, "argument": {"labelPreference": {"label": "bar", "presence":true}}}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "TestServiceAntiAffinity", Weight: 3, Argument: &schedulerapi.PriorityArgument{ServiceAntiAffinity: &schedulerapi.ServiceAntiAffinity{Label: "zone"}}},
{Name: "TestLabelPreference", Weight: 4, Argument: &schedulerapi.PriorityArgument{LabelPreference: &schedulerapi.LabelPreference{Label: "bar", Presence: true}}},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.3": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.4": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "CheckNodeDiskPressure"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "CheckNodeDiskPressure"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.7": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "CheckNodeDiskPressure"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "CheckNodeDiskPressure"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.8": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "CheckNodeDiskPressure"},
{"name": "CheckNodeCondition"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "CheckNodeDiskPressure"},
{Name: "CheckNodeCondition"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.9": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "CheckNodeDiskPressure"},
{"name": "CheckNodeCondition"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "CheckVolumeBinding"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "CheckNodeDiskPressure"},
{Name: "CheckNodeCondition"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "CheckVolumeBinding"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
},
},
}
registeredPredicates := sets.NewString(factory.ListRegisteredFitPredicates()...)
registeredPriorities := sets.NewString(factory.ListRegisteredPriorityFunctions()...)
seenPredicates := sets.NewString()
seenPriorities := sets.NewString()
for v, tc := range schedulerFiles {
fmt.Printf("%s: Testing scheduler config\n", v)
policy := schedulerapi.Policy{}
if err := runtime.DecodeInto(latestschedulerapi.Codec, []byte(tc.JSON), &policy); err != nil {
t.Errorf("%s: Error decoding: %v", v, err)
continue
}
for _, predicate := range policy.Predicates {
seenPredicates.Insert(predicate.Name)
}
for _, priority := range policy.Priorities {
seenPriorities.Insert(priority.Name)
}
if !reflect.DeepEqual(policy, tc.ExpectedPolicy) {
t.Errorf("%s: Expected:\n\t%#v\nGot:\n\t%#v", v, tc.ExpectedPolicy, policy)
}
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
informerFactory := informers.NewSharedInformerFactory(client, 0)
if _, err := factory.NewConfigFactory(
"some-scheduler-name",
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
informerFactory.Extensions().V1beta1().ReplicaSets(),
informerFactory.Apps().V1beta1().StatefulSets(),
informerFactory.Core().V1().Services(),
informerFactory.Policy().V1beta1().PodDisruptionBudgets(),
informerFactory.Storage().V1().StorageClasses(),
v1.DefaultHardPodAffinitySymmetricWeight,
enableEquivalenceCache,
).CreateFromConfig(policy); err != nil {
t.Errorf("%s: Error constructing: %v", v, err)
continue
}
}
if !seenPredicates.HasAll(registeredPredicates.List()...) {
t.Errorf("Registered predicates are missing from compatibility test (add to test stanza for version currently in development): %#v", registeredPredicates.Difference(seenPredicates).List())
}
if !seenPriorities.HasAll(registeredPriorities.List()...) {
t.Errorf("Registered priorities are missing from compatibility test (add to test stanza for version currently in development): %#v", registeredPriorities.Difference(seenPriorities).List())
}
}

View File

@ -0,0 +1,264 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package defaults
import (
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
"k8s.io/kubernetes/pkg/scheduler/algorithm/priorities"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/factory"
"github.com/golang/glog"
)
const (
// ClusterAutoscalerProvider defines the default autoscaler provider
ClusterAutoscalerProvider = "ClusterAutoscalerProvider"
)
func init() {
// Register functions that extract metadata used by predicates and priorities computations.
factory.RegisterPredicateMetadataProducerFactory(
func(args factory.PluginFactoryArgs) algorithm.PredicateMetadataProducer {
return predicates.NewPredicateMetadataFactory(args.PodLister)
})
factory.RegisterPriorityMetadataProducerFactory(
func(args factory.PluginFactoryArgs) algorithm.PriorityMetadataProducer {
return priorities.NewPriorityMetadataFactory(args.ServiceLister, args.ControllerLister, args.ReplicaSetLister, args.StatefulSetLister)
})
registerAlgorithmProvider(defaultPredicates(), defaultPriorities())
// IMPORTANT NOTES for predicate developers:
// We are using cached predicate result for pods belonging to the same equivalence class.
// So when implementing a new predicate, you are expected to check whether the result
// of your predicate function can be affected by related API object change (ADD/DELETE/UPDATE).
// If yes, you are expected to invalidate the cached predicate result for related API object change.
// For example:
// https://github.com/kubernetes/kubernetes/blob/36a218e/plugin/pkg/scheduler/factory/factory.go#L422
// Registers predicates and priorities that are not enabled by default, but user can pick when creating his
// own set of priorities/predicates.
// PodFitsPorts has been replaced by PodFitsHostPorts for better user understanding.
// For backwards compatibility with 1.0, PodFitsPorts is registered as well.
factory.RegisterFitPredicate("PodFitsPorts", predicates.PodFitsHostPorts)
// Fit is defined based on the absence of port conflicts.
// This predicate is actually a default predicate, because it is invoked from
// predicates.GeneralPredicates()
factory.RegisterFitPredicate(predicates.PodFitsHostPortsPred, predicates.PodFitsHostPorts)
// Fit is determined by resource availability.
// This predicate is actually a default predicate, because it is invoked from
// predicates.GeneralPredicates()
factory.RegisterFitPredicate(predicates.PodFitsResourcesPred, predicates.PodFitsResources)
// Fit is determined by the presence of the Host parameter and a string match
// This predicate is actually a default predicate, because it is invoked from
// predicates.GeneralPredicates()
factory.RegisterFitPredicate(predicates.HostNamePred, predicates.PodFitsHost)
// Fit is determined by node selector query.
factory.RegisterFitPredicate(predicates.MatchNodeSelectorPred, predicates.PodMatchNodeSelector)
// Use equivalence class to speed up heavy predicates phase.
factory.RegisterGetEquivalencePodFunction(
func(args factory.PluginFactoryArgs) algorithm.GetEquivalencePodFunc {
return predicates.NewEquivalencePodGenerator(args.PVCInfo)
},
)
// ServiceSpreadingPriority is a priority config factory that spreads pods by minimizing
// the number of pods (belonging to the same service) on the same node.
// Register the factory so that it's available, but do not include it as part of the default priorities
// Largely replaced by "SelectorSpreadPriority", but registered for backward compatibility with 1.0
factory.RegisterPriorityConfigFactory(
"ServiceSpreadingPriority",
factory.PriorityConfigFactory{
MapReduceFunction: func(args factory.PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
return priorities.NewSelectorSpreadPriority(args.ServiceLister, algorithm.EmptyControllerLister{}, algorithm.EmptyReplicaSetLister{}, algorithm.EmptyStatefulSetLister{})
},
Weight: 1,
},
)
// EqualPriority is a prioritizer function that gives an equal weight of one to all nodes
// Register the priority function so that its available
// but do not include it as part of the default priorities
factory.RegisterPriorityFunction2("EqualPriority", core.EqualPriorityMap, nil, 1)
// ImageLocalityPriority prioritizes nodes based on locality of images requested by a pod. Nodes with larger size
// of already-installed packages required by the pod will be preferred over nodes with no already-installed
// packages required by the pod or a small total size of already-installed packages required by the pod.
factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1)
// Optional, cluster-autoscaler friendly priority function - give used nodes higher priority.
factory.RegisterPriorityFunction2("MostRequestedPriority", priorities.MostRequestedPriorityMap, nil, 1)
}
func defaultPredicates() sets.String {
return sets.NewString(
// Fit is determined by volume zone requirements.
factory.RegisterFitPredicateFactory(
predicates.NoVolumeZoneConflictPred,
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewVolumeZonePredicate(args.PVInfo, args.PVCInfo, args.StorageClassInfo)
},
),
// Fit is determined by whether or not there would be too many AWS EBS volumes attached to the node
factory.RegisterFitPredicateFactory(
predicates.MaxEBSVolumeCountPred,
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewMaxPDVolumeCountPredicate(predicates.EBSVolumeFilterType, args.PVInfo, args.PVCInfo)
},
),
// Fit is determined by whether or not there would be too many GCE PD volumes attached to the node
factory.RegisterFitPredicateFactory(
predicates.MaxGCEPDVolumeCountPred,
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewMaxPDVolumeCountPredicate(predicates.GCEPDVolumeFilterType, args.PVInfo, args.PVCInfo)
},
),
// Fit is determined by whether or not there would be too many Azure Disk volumes attached to the node
factory.RegisterFitPredicateFactory(
predicates.MaxAzureDiskVolumeCountPred,
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewMaxPDVolumeCountPredicate(predicates.AzureDiskVolumeFilterType, args.PVInfo, args.PVCInfo)
},
),
// Fit is determined by inter-pod affinity.
factory.RegisterFitPredicateFactory(
predicates.MatchInterPodAffinityPred,
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewPodAffinityPredicate(args.NodeInfo, args.PodLister)
},
),
// Fit is determined by non-conflicting disk volumes.
factory.RegisterFitPredicate(predicates.NoDiskConflictPred, predicates.NoDiskConflict),
// GeneralPredicates are the predicates that are enforced by all Kubernetes components
// (e.g. kubelet and all schedulers)
factory.RegisterFitPredicate(predicates.GeneralPred, predicates.GeneralPredicates),
// Fit is determined by node memory pressure condition.
factory.RegisterFitPredicate(predicates.CheckNodeMemoryPressurePred, predicates.CheckNodeMemoryPressurePredicate),
// Fit is determined by node disk pressure condition.
factory.RegisterFitPredicate(predicates.CheckNodeDiskPressurePred, predicates.CheckNodeDiskPressurePredicate),
// Fit is determined by node conditions: not ready, network unavailable or out of disk.
factory.RegisterMandatoryFitPredicate(predicates.CheckNodeConditionPred, predicates.CheckNodeConditionPredicate),
// Fit is determined based on whether a pod can tolerate all of the node's taints
factory.RegisterFitPredicate(predicates.PodToleratesNodeTaintsPred, predicates.PodToleratesNodeTaints),
// Fit is determined by volume topology requirements.
factory.RegisterFitPredicateFactory(
predicates.CheckVolumeBindingPred,
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewVolumeBindingPredicate(args.VolumeBinder)
},
),
)
}
// ApplyFeatureGates applies algorithm by feature gates.
func ApplyFeatureGates() {
if utilfeature.DefaultFeatureGate.Enabled(features.TaintNodesByCondition) {
// Remove "CheckNodeCondition" predicate
factory.RemoveFitPredicate(predicates.CheckNodeConditionPred)
// Remove Key "CheckNodeCondition" From All Algorithm Provider
// The key will be removed from all providers which in algorithmProviderMap[]
// if you just want remove specific provider, call func RemovePredicateKeyFromAlgoProvider()
factory.RemovePredicateKeyFromAlgorithmProviderMap(predicates.CheckNodeConditionPred)
// Fit is determined based on whether a pod can tolerate all of the node's taints
factory.RegisterMandatoryFitPredicate(predicates.PodToleratesNodeTaintsPred, predicates.PodToleratesNodeTaints)
// Insert Key "PodToleratesNodeTaints" To All Algorithm Provider
// The key will insert to all providers which in algorithmProviderMap[]
// if you just want insert to specific provider, call func InsertPredicateKeyToAlgoProvider()
factory.InsertPredicateKeyToAlgorithmProviderMap(predicates.PodToleratesNodeTaintsPred)
glog.Warningf("TaintNodesByCondition is enabled, PodToleratesNodeTaints predicate is mandatory")
}
// Prioritizes nodes that satisfy pod's resource limits
if utilfeature.DefaultFeatureGate.Enabled(features.ResourceLimitsPriorityFunction) {
factory.RegisterPriorityFunction2("ResourceLimitsPriority", priorities.ResourceLimitsPriorityMap, nil, 1)
}
}
func registerAlgorithmProvider(predSet, priSet sets.String) {
// Registers algorithm providers. By default we use 'DefaultProvider', but user can specify one to be used
// by specifying flag.
factory.RegisterAlgorithmProvider(factory.DefaultProvider, predSet, priSet)
// Cluster autoscaler friendly scheduling algorithm.
factory.RegisterAlgorithmProvider(ClusterAutoscalerProvider, predSet,
copyAndReplace(priSet, "LeastRequestedPriority", "MostRequestedPriority"))
}
func defaultPriorities() sets.String {
return sets.NewString(
// spreads pods by minimizing the number of pods (belonging to the same service or replication controller) on the same node.
factory.RegisterPriorityConfigFactory(
"SelectorSpreadPriority",
factory.PriorityConfigFactory{
MapReduceFunction: func(args factory.PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
return priorities.NewSelectorSpreadPriority(args.ServiceLister, args.ControllerLister, args.ReplicaSetLister, args.StatefulSetLister)
},
Weight: 1,
},
),
// pods should be placed in the same topological domain (e.g. same node, same rack, same zone, same power domain, etc.)
// as some other pods, or, conversely, should not be placed in the same topological domain as some other pods.
factory.RegisterPriorityConfigFactory(
"InterPodAffinityPriority",
factory.PriorityConfigFactory{
Function: func(args factory.PluginFactoryArgs) algorithm.PriorityFunction {
return priorities.NewInterPodAffinityPriority(args.NodeInfo, args.NodeLister, args.PodLister, args.HardPodAffinitySymmetricWeight)
},
Weight: 1,
},
),
// Prioritize nodes by least requested utilization.
factory.RegisterPriorityFunction2("LeastRequestedPriority", priorities.LeastRequestedPriorityMap, nil, 1),
// Prioritizes nodes to help achieve balanced resource usage
factory.RegisterPriorityFunction2("BalancedResourceAllocation", priorities.BalancedResourceAllocationMap, nil, 1),
// Set this weight large enough to override all other priority functions.
// TODO: Figure out a better way to do this, maybe at same time as fixing #24720.
factory.RegisterPriorityFunction2("NodePreferAvoidPodsPriority", priorities.CalculateNodePreferAvoidPodsPriorityMap, nil, 10000),
// Prioritizes nodes that have labels matching NodeAffinity
factory.RegisterPriorityFunction2("NodeAffinityPriority", priorities.CalculateNodeAffinityPriorityMap, priorities.CalculateNodeAffinityPriorityReduce, 1),
// Prioritizes nodes that marked with taint which pod can tolerate.
factory.RegisterPriorityFunction2("TaintTolerationPriority", priorities.ComputeTaintTolerationPriorityMap, priorities.ComputeTaintTolerationPriorityReduce, 1),
)
}
func copyAndReplace(set sets.String, replaceWhat, replaceWith string) sets.String {
result := sets.NewString(set.List()...)
if result.Has(replaceWhat) {
result.Delete(replaceWhat)
result.Insert(replaceWith)
}
return result
}

View File

@ -0,0 +1,87 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package defaults
import (
"testing"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
)
func TestCopyAndReplace(t *testing.T) {
testCases := []struct {
set sets.String
replaceWhat string
replaceWith string
expected sets.String
}{
{
set: sets.String{"A": sets.Empty{}, "B": sets.Empty{}},
replaceWhat: "A",
replaceWith: "C",
expected: sets.String{"B": sets.Empty{}, "C": sets.Empty{}},
},
{
set: sets.String{"A": sets.Empty{}, "B": sets.Empty{}},
replaceWhat: "D",
replaceWith: "C",
expected: sets.String{"A": sets.Empty{}, "B": sets.Empty{}},
},
}
for _, testCase := range testCases {
result := copyAndReplace(testCase.set, testCase.replaceWhat, testCase.replaceWith)
if !result.Equal(testCase.expected) {
t.Errorf("expected %v got %v", testCase.expected, result)
}
}
}
func TestDefaultPriorities(t *testing.T) {
result := sets.NewString(
"SelectorSpreadPriority",
"InterPodAffinityPriority",
"LeastRequestedPriority",
"BalancedResourceAllocation",
"NodePreferAvoidPodsPriority",
"NodeAffinityPriority",
"TaintTolerationPriority")
if expected := defaultPriorities(); !result.Equal(expected) {
t.Errorf("expected %v got %v", expected, result)
}
}
func TestDefaultPredicates(t *testing.T) {
result := sets.NewString(
"NoVolumeZoneConflict",
"MaxEBSVolumeCount",
"MaxGCEPDVolumeCount",
"MaxAzureDiskVolumeCount",
"MatchInterPodAffinity",
"NoDiskConflict",
"GeneralPredicates",
"CheckNodeMemoryPressure",
"CheckNodeDiskPressure",
"CheckNodeCondition",
"PodToleratesNodeTaints",
predicates.CheckVolumeBindingPred,
)
if expected := defaultPredicates(); !result.Equal(expected) {
t.Errorf("expected %v got %v", expected, result)
}
}

View File

@ -0,0 +1,26 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithmprovider
import (
"k8s.io/kubernetes/pkg/scheduler/algorithmprovider/defaults"
)
// ApplyFeatureGates applies algorithm by feature gates.
func ApplyFeatureGates() {
defaults.ApplyFeatureGates()
}

View File

@ -0,0 +1,109 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithmprovider
import (
"testing"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/scheduler/factory"
)
var (
algorithmProviderNames = []string{
factory.DefaultProvider,
}
)
func TestDefaultConfigExists(t *testing.T) {
p, err := factory.GetAlgorithmProvider(factory.DefaultProvider)
if err != nil {
t.Errorf("error retrieving default provider: %v", err)
}
if p == nil {
t.Error("algorithm provider config should not be nil")
}
if len(p.FitPredicateKeys) == 0 {
t.Error("default algorithm provider shouldn't have 0 fit predicates")
}
}
func TestAlgorithmProviders(t *testing.T) {
for _, pn := range algorithmProviderNames {
p, err := factory.GetAlgorithmProvider(pn)
if err != nil {
t.Errorf("error retrieving '%s' provider: %v", pn, err)
break
}
if len(p.PriorityFunctionKeys) == 0 {
t.Errorf("%s algorithm provider shouldn't have 0 priority functions", pn)
}
for _, pf := range p.PriorityFunctionKeys.List() {
if !factory.IsPriorityFunctionRegistered(pf) {
t.Errorf("priority function %s is not registered but is used in the %s algorithm provider", pf, pn)
}
}
for _, fp := range p.FitPredicateKeys.List() {
if !factory.IsFitPredicateRegistered(fp) {
t.Errorf("fit predicate %s is not registered but is used in the %s algorithm provider", fp, pn)
}
}
}
}
func TestApplyFeatureGates(t *testing.T) {
for _, pn := range algorithmProviderNames {
p, err := factory.GetAlgorithmProvider(pn)
if err != nil {
t.Errorf("Error retrieving '%s' provider: %v", pn, err)
break
}
if !p.FitPredicateKeys.Has("CheckNodeCondition") {
t.Errorf("Failed to find predicate: 'CheckNodeCondition'")
break
}
if !p.FitPredicateKeys.Has("PodToleratesNodeTaints") {
t.Errorf("Failed to find predicate: 'PodToleratesNodeTaints'")
break
}
}
// Apply features for algorithm providers.
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=True")
ApplyFeatureGates()
for _, pn := range algorithmProviderNames {
p, err := factory.GetAlgorithmProvider(pn)
if err != nil {
t.Errorf("Error retrieving '%s' provider: %v", pn, err)
break
}
if !p.FitPredicateKeys.Has("PodToleratesNodeTaints") {
t.Errorf("Failed to find predicate: 'PodToleratesNodeTaints'")
break
}
if p.FitPredicateKeys.Has("CheckNodeCondition") {
t.Errorf("Unexpected predicate: 'CheckNodeCondition'")
break
}
}
}

43
vendor/k8s.io/kubernetes/pkg/scheduler/api/BUILD generated vendored Normal file
View File

@ -0,0 +1,43 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"register.go",
"types.go",
"zz_generated.deepcopy.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/api",
deps = [
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/scheduler/api/latest:all-srcs",
"//pkg/scheduler/api/v1:all-srcs",
"//pkg/scheduler/api/validation:all-srcs",
],
tags = ["automanaged"],
)

20
vendor/k8s.io/kubernetes/pkg/scheduler/api/doc.go generated vendored Normal file
View File

@ -0,0 +1,20 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// +k8s:deepcopy-gen=package
// Package api contains scheduler API objects.
package api // import "k8s.io/kubernetes/pkg/scheduler/api"

View File

@ -0,0 +1,33 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["latest.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/api/latest",
deps = [
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/api/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/serializer/json:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/serializer/versioning:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,54 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package latest
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/runtime/serializer/json"
"k8s.io/apimachinery/pkg/runtime/serializer/versioning"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
// Init the api v1 package
_ "k8s.io/kubernetes/pkg/scheduler/api/v1"
)
// Version is the string that represents the current external default version.
const Version = "v1"
// OldestVersion is the string that represents the oldest server version supported.
const OldestVersion = "v1"
// Versions is the list of versions that are recognized in code. The order provided
// may be assumed to be least feature rich to most feature rich, and clients may
// choose to prefer the latter items in the list over the former items when presented
// with a set of versions to choose.
var Versions = []string{"v1"}
// Codec is the default codec for serializing input that should use
// the latest supported version. It supports JSON by default.
var Codec runtime.Codec
func init() {
jsonSerializer := json.NewSerializer(json.DefaultMetaFactory, schedulerapi.Scheme, schedulerapi.Scheme, true)
Codec = versioning.NewDefaultingCodecForScheme(
schedulerapi.Scheme,
jsonSerializer,
jsonSerializer,
schema.GroupVersion{Version: Version},
runtime.InternalGroupVersioner,
)
}

55
vendor/k8s.io/kubernetes/pkg/scheduler/api/register.go generated vendored Normal file
View File

@ -0,0 +1,55 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
)
// Scheme is the default instance of runtime.Scheme to which types in the Kubernetes API are already registered.
// TODO: remove this, scheduler should not have its own scheme.
var Scheme = runtime.NewScheme()
// SchemeGroupVersion is group version used to register these objects
// TODO this should be in the "scheduler" group
var SchemeGroupVersion = schema.GroupVersion{Group: "", Version: runtime.APIVersionInternal}
var (
// SchemeBuilder defines a SchemeBuilder object.
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
// AddToScheme is used to add stored functions to scheme.
AddToScheme = SchemeBuilder.AddToScheme
)
func init() {
if err := addKnownTypes(Scheme); err != nil {
// Programmer error.
panic(err)
}
}
func addKnownTypes(scheme *runtime.Scheme) error {
if err := scheme.AddIgnoredConversionType(&metav1.TypeMeta{}, &metav1.TypeMeta{}); err != nil {
return err
}
scheme.AddKnownTypes(SchemeGroupVersion,
&Policy{},
)
return nil
}

281
vendor/k8s.io/kubernetes/pkg/scheduler/api/types.go generated vendored Normal file
View File

@ -0,0 +1,281 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
restclient "k8s.io/client-go/rest"
)
const (
// MaxUint defines the max unsigned int value.
MaxUint = ^uint(0)
// MaxInt defines the max signed int value.
MaxInt = int(MaxUint >> 1)
// MaxTotalPriority defines the max total priority value.
MaxTotalPriority = MaxInt
// MaxPriority defines the max priority value.
MaxPriority = 10
// MaxWeight defines the max weight value.
MaxWeight = MaxInt / MaxPriority
// HighestUserDefinablePriority is the highest priority for user defined priority classes. Priority values larger than 1 billion are reserved for Kubernetes system use.
HighestUserDefinablePriority = int32(1000000000)
// SystemCriticalPriority is the beginning of the range of priority values for critical system components.
SystemCriticalPriority = 2 * HighestUserDefinablePriority
// NOTE: In order to avoid conflict of names with user-defined priority classes, all the names must
// start with scheduling.SystemPriorityClassPrefix which is by default "system-".
SystemClusterCritical = "system-cluster-critical"
SystemNodeCritical = "system-node-critical"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// Policy describes a struct of a policy resource in api.
type Policy struct {
metav1.TypeMeta
// Holds the information to configure the fit predicate functions.
// If unspecified, the default predicate functions will be applied.
// If empty list, all predicates (except the mandatory ones) will be
// bypassed.
Predicates []PredicatePolicy
// Holds the information to configure the priority functions.
// If unspecified, the default priority functions will be applied.
// If empty list, all priority functions will be bypassed.
Priorities []PriorityPolicy
// Holds the information to communicate with the extender(s)
ExtenderConfigs []ExtenderConfig
// RequiredDuringScheduling affinity is not symmetric, but there is an implicit PreferredDuringScheduling affinity rule
// corresponding to every RequiredDuringScheduling affinity rule.
// HardPodAffinitySymmetricWeight represents the weight of implicit PreferredDuringScheduling affinity rule, in the range 1-100.
HardPodAffinitySymmetricWeight int32
// When AlwaysCheckAllPredicates is set to true, scheduler checks all
// the configured predicates even after one or more of them fails.
// When the flag is set to false, scheduler skips checking the rest
// of the predicates after it finds one predicate that failed.
AlwaysCheckAllPredicates bool
}
// PredicatePolicy describes a struct of a predicate policy.
type PredicatePolicy struct {
// Identifier of the predicate policy
// For a custom predicate, the name can be user-defined
// For the Kubernetes provided predicates, the name is the identifier of the pre-defined predicate
Name string
// Holds the parameters to configure the given predicate
Argument *PredicateArgument
}
// PriorityPolicy describes a struct of a priority policy.
type PriorityPolicy struct {
// Identifier of the priority policy
// For a custom priority, the name can be user-defined
// For the Kubernetes provided priority functions, the name is the identifier of the pre-defined priority function
Name string
// The numeric multiplier for the node scores that the priority function generates
// The weight should be a positive integer
Weight int
// Holds the parameters to configure the given priority function
Argument *PriorityArgument
}
// PredicateArgument represents the arguments to configure predicate functions in scheduler policy configuration.
// Only one of its members may be specified
type PredicateArgument struct {
// The predicate that provides affinity for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAffinity *ServiceAffinity
// The predicate that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelsPresence *LabelsPresence
}
// PriorityArgument represents the arguments to configure priority functions in scheduler policy configuration.
// Only one of its members may be specified
type PriorityArgument struct {
// The priority function that ensures a good spread (anti-affinity) for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAntiAffinity *ServiceAntiAffinity
// The priority function that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelPreference *LabelPreference
}
// ServiceAffinity holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type ServiceAffinity struct {
// The list of labels that identify node "groups"
// All of the labels should match for the node to be considered a fit for hosting the pod
Labels []string
}
// LabelsPresence holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type LabelsPresence struct {
// The list of labels that identify node "groups"
// All of the labels should be either present (or absent) for the node to be considered a fit for hosting the pod
Labels []string
// The boolean flag that indicates whether the labels should be present or absent from the node
Presence bool
}
// ServiceAntiAffinity holds the parameters that are used to configure the corresponding priority function
type ServiceAntiAffinity struct {
// Used to identify node "groups"
Label string
}
// LabelPreference holds the parameters that are used to configure the corresponding priority function
type LabelPreference struct {
// Used to identify node "groups"
Label string
// This is a boolean flag
// If true, higher priority is given to nodes that have the label
// If false, higher priority is given to nodes that do not have the label
Presence bool
}
// ExtenderManagedResource describes the arguments of extended resources
// managed by an extender.
type ExtenderManagedResource struct {
// Name is the extended resource name.
Name v1.ResourceName
// IgnoredByScheduler indicates whether kube-scheduler should ignore this
// resource when applying predicates.
IgnoredByScheduler bool
}
// ExtenderConfig holds the parameters used to communicate with the extender. If a verb is unspecified/empty,
// it is assumed that the extender chose not to provide that extension.
type ExtenderConfig struct {
// URLPrefix at which the extender is available
URLPrefix string
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
FilterVerb string
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
PrioritizeVerb string
// The numeric multiplier for the node scores that the prioritize call generates.
// The weight should be a positive integer
Weight int
// Verb for the bind call, empty if not supported. This verb is appended to the URLPrefix when issuing the bind call to extender.
// If this method is implemented by the extender, it is the extender's responsibility to bind the pod to apiserver. Only one extender
// can implement this function.
BindVerb string
// EnableHTTPS specifies whether https should be used to communicate with the extender
EnableHTTPS bool
// TLSConfig specifies the transport layer security config
TLSConfig *restclient.TLSClientConfig
// HTTPTimeout specifies the timeout duration for a call to the extender. Filter timeout fails the scheduling of the pod. Prioritize
// timeout is ignored, k8s/other extenders priorities are used to select the node.
HTTPTimeout time.Duration
// NodeCacheCapable specifies that the extender is capable of caching node information,
// so the scheduler should only send minimal information about the eligible nodes
// assuming that the extender already cached full details of all nodes in the cluster
NodeCacheCapable bool
// ManagedResources is a list of extended resources that are managed by
// this extender.
// - A pod will be sent to the extender on the Filter, Prioritize and Bind
// (if the extender is the binder) phases iff the pod requests at least
// one of the extended resources in this list. If empty or unspecified,
// all pods will be sent to this extender.
// - If IgnoredByScheduler is set to true for a resource, kube-scheduler
// will skip checking the resource in predicates.
// +optional
ManagedResources []ExtenderManagedResource
}
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
// nodes for a pod.
type ExtenderArgs struct {
// Pod being scheduled
Pod v1.Pod
// List of candidate nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
// List of candidate node names where the pod can be scheduled; to be
// populated only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string
}
// FailedNodesMap represents the filtered out nodes, with node names and failure messages
type FailedNodesMap map[string]string
// ExtenderFilterResult represents the results of a filter call to an extender
type ExtenderFilterResult struct {
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string
// Filtered out nodes where the pod can't be scheduled and the failure messages
FailedNodes FailedNodesMap
// Error message indicating failure
Error string
}
// ExtenderBindingArgs represents the arguments to an extender for binding a pod to a node.
type ExtenderBindingArgs struct {
// PodName is the name of the pod being bound
PodName string
// PodNamespace is the namespace of the pod being bound
PodNamespace string
// PodUID is the UID of the pod being bound
PodUID types.UID
// Node selected by the scheduler
Node string
}
// ExtenderBindingResult represents the result of binding of a pod to a node from an extender.
type ExtenderBindingResult struct {
// Error message indicating failure
Error string
}
// HostPriority represents the priority of scheduling to a particular host, higher priority is better.
type HostPriority struct {
// Name of the host
Host string
// Score associated with the host
Score int
}
// HostPriorityList declares a []HostPriority type.
type HostPriorityList []HostPriority
// SystemPriorityClasses defines special priority classes which are used by system critical pods that should not be preempted by workload pods.
var SystemPriorityClasses = map[string]int32{
SystemClusterCritical: SystemCriticalPriority,
SystemNodeCritical: SystemCriticalPriority + 1000,
}
func (h HostPriorityList) Len() int {
return len(h)
}
func (h HostPriorityList) Less(i, j int) bool {
if h[i].Score == h[j].Score {
return h[i].Host < h[j].Host
}
return h[i].Score < h[j].Score
}
func (h HostPriorityList) Swap(i, j int) {
h[i], h[j] = h[j], h[i]
}

39
vendor/k8s.io/kubernetes/pkg/scheduler/api/v1/BUILD generated vendored Normal file
View File

@ -0,0 +1,39 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"register.go",
"types.go",
"zz_generated.deepcopy.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/api/v1",
deps = [
"//pkg/scheduler/api:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

20
vendor/k8s.io/kubernetes/pkg/scheduler/api/v1/doc.go generated vendored Normal file
View File

@ -0,0 +1,20 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// +k8s:deepcopy-gen=package
// Package v1 contains scheduler API objects.
package v1 // import "k8s.io/kubernetes/pkg/scheduler/api/v1"

View File

@ -0,0 +1,59 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
)
// SchemeGroupVersion is group version used to register these objects
// TODO this should be in the "scheduler" group
var SchemeGroupVersion = schema.GroupVersion{Group: "", Version: "v1"}
func init() {
if err := addKnownTypes(schedulerapi.Scheme); err != nil {
// Programmer error.
panic(err)
}
}
var (
// TODO: move SchemeBuilder with zz_generated.deepcopy.go to k8s.io/api.
// localSchemeBuilder and AddToScheme will stay in k8s.io/kubernetes.
// SchemeBuilder is a v1 api scheme builder.
SchemeBuilder runtime.SchemeBuilder
localSchemeBuilder = &SchemeBuilder
// AddToScheme is used to add stored functions to scheme.
AddToScheme = localSchemeBuilder.AddToScheme
)
func init() {
// We only register manually written functions here. The registration of the
// generated functions takes place in the generated files. The separation
// makes the code compile even when the generated files are missing.
localSchemeBuilder.Register(addKnownTypes)
}
func addKnownTypes(scheme *runtime.Scheme) error {
scheme.AddKnownTypes(SchemeGroupVersion,
&Policy{},
)
return nil
}

249
vendor/k8s.io/kubernetes/pkg/scheduler/api/v1/types.go generated vendored Normal file
View File

@ -0,0 +1,249 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1
import (
"time"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
restclient "k8s.io/client-go/rest"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// Policy describes a struct for a policy resource used in api.
type Policy struct {
metav1.TypeMeta `json:",inline"`
// Holds the information to configure the fit predicate functions
Predicates []PredicatePolicy `json:"predicates"`
// Holds the information to configure the priority functions
Priorities []PriorityPolicy `json:"priorities"`
// Holds the information to communicate with the extender(s)
ExtenderConfigs []ExtenderConfig `json:"extenders"`
// RequiredDuringScheduling affinity is not symmetric, but there is an implicit PreferredDuringScheduling affinity rule
// corresponding to every RequiredDuringScheduling affinity rule.
// HardPodAffinitySymmetricWeight represents the weight of implicit PreferredDuringScheduling affinity rule, in the range 1-100.
HardPodAffinitySymmetricWeight int `json:"hardPodAffinitySymmetricWeight"`
// When AlwaysCheckAllPredicates is set to true, scheduler checks all
// the configured predicates even after one or more of them fails.
// When the flag is set to false, scheduler skips checking the rest
// of the predicates after it finds one predicate that failed.
AlwaysCheckAllPredicates bool `json:"alwaysCheckAllPredicates"`
}
// PredicatePolicy describes a struct of a predicate policy.
type PredicatePolicy struct {
// Identifier of the predicate policy
// For a custom predicate, the name can be user-defined
// For the Kubernetes provided predicates, the name is the identifier of the pre-defined predicate
Name string `json:"name"`
// Holds the parameters to configure the given predicate
Argument *PredicateArgument `json:"argument"`
}
// PriorityPolicy describes a struct of a priority policy.
type PriorityPolicy struct {
// Identifier of the priority policy
// For a custom priority, the name can be user-defined
// For the Kubernetes provided priority functions, the name is the identifier of the pre-defined priority function
Name string `json:"name"`
// The numeric multiplier for the node scores that the priority function generates
// The weight should be non-zero and can be a positive or a negative integer
Weight int `json:"weight"`
// Holds the parameters to configure the given priority function
Argument *PriorityArgument `json:"argument"`
}
// PredicateArgument represents the arguments to configure predicate functions in scheduler policy configuration.
// Only one of its members may be specified
type PredicateArgument struct {
// The predicate that provides affinity for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAffinity *ServiceAffinity `json:"serviceAffinity"`
// The predicate that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelsPresence *LabelsPresence `json:"labelsPresence"`
}
// PriorityArgument represents the arguments to configure priority functions in scheduler policy configuration.
// Only one of its members may be specified
type PriorityArgument struct {
// The priority function that ensures a good spread (anti-affinity) for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAntiAffinity *ServiceAntiAffinity `json:"serviceAntiAffinity"`
// The priority function that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelPreference *LabelPreference `json:"labelPreference"`
}
// ServiceAffinity holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type ServiceAffinity struct {
// The list of labels that identify node "groups"
// All of the labels should match for the node to be considered a fit for hosting the pod
Labels []string `json:"labels"`
}
// LabelsPresence holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type LabelsPresence struct {
// The list of labels that identify node "groups"
// All of the labels should be either present (or absent) for the node to be considered a fit for hosting the pod
Labels []string `json:"labels"`
// The boolean flag that indicates whether the labels should be present or absent from the node
Presence bool `json:"presence"`
}
// ServiceAntiAffinity holds the parameters that are used to configure the corresponding priority function
type ServiceAntiAffinity struct {
// Used to identify node "groups"
Label string `json:"label"`
}
// LabelPreference holds the parameters that are used to configure the corresponding priority function
type LabelPreference struct {
// Used to identify node "groups"
Label string `json:"label"`
// This is a boolean flag
// If true, higher priority is given to nodes that have the label
// If false, higher priority is given to nodes that do not have the label
Presence bool `json:"presence"`
}
// ExtenderManagedResource describes the arguments of extended resources
// managed by an extender.
type ExtenderManagedResource struct {
// Name is the extended resource name.
Name apiv1.ResourceName `json:"name,casttype=ResourceName"`
// IgnoredByScheduler indicates whether kube-scheduler should ignore this
// resource when applying predicates.
IgnoredByScheduler bool `json:"ignoredByScheduler,omitempty"`
}
// ExtenderConfig holds the parameters used to communicate with the extender. If a verb is unspecified/empty,
// it is assumed that the extender chose not to provide that extension.
type ExtenderConfig struct {
// URLPrefix at which the extender is available
URLPrefix string `json:"urlPrefix"`
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
FilterVerb string `json:"filterVerb,omitempty"`
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
PrioritizeVerb string `json:"prioritizeVerb,omitempty"`
// The numeric multiplier for the node scores that the prioritize call generates.
// The weight should be a positive integer
Weight int `json:"weight,omitempty"`
// Verb for the bind call, empty if not supported. This verb is appended to the URLPrefix when issuing the bind call to extender.
// If this method is implemented by the extender, it is the extender's responsibility to bind the pod to apiserver. Only one extender
// can implement this function.
BindVerb string
// EnableHTTPS specifies whether https should be used to communicate with the extender
EnableHTTPS bool `json:"enableHttps,omitempty"`
// TLSConfig specifies the transport layer security config
TLSConfig *restclient.TLSClientConfig `json:"tlsConfig,omitempty"`
// HTTPTimeout specifies the timeout duration for a call to the extender. Filter timeout fails the scheduling of the pod. Prioritize
// timeout is ignored, k8s/other extenders priorities are used to select the node.
HTTPTimeout time.Duration `json:"httpTimeout,omitempty"`
// NodeCacheCapable specifies that the extender is capable of caching node information,
// so the scheduler should only send minimal information about the eligible nodes
// assuming that the extender already cached full details of all nodes in the cluster
NodeCacheCapable bool `json:"nodeCacheCapable,omitempty"`
// ManagedResources is a list of extended resources that are managed by
// this extender.
// - A pod will be sent to the extender on the Filter, Prioritize and Bind
// (if the extender is the binder) phases iff the pod requests at least
// one of the extended resources in this list. If empty or unspecified,
// all pods will be sent to this extender.
// - If IgnoredByScheduler is set to true for a resource, kube-scheduler
// will skip checking the resource in predicates.
// +optional
ManagedResources []ExtenderManagedResource `json:"managedResources,omitempty"`
}
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
// nodes for a pod.
type ExtenderArgs struct {
// Pod being scheduled
Pod apiv1.Pod `json:"pod"`
// List of candidate nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *apiv1.NodeList `json:"nodes,omitempty"`
// List of candidate node names where the pod can be scheduled; to be
// populated only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string `json:"nodenames,omitempty"`
}
// FailedNodesMap represents the filtered out nodes, with node names and failure messages
type FailedNodesMap map[string]string
// ExtenderFilterResult represents the results of a filter call to an extender
type ExtenderFilterResult struct {
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *apiv1.NodeList `json:"nodes,omitempty"`
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string `json:"nodenames,omitempty"`
// Filtered out nodes where the pod can't be scheduled and the failure messages
FailedNodes FailedNodesMap `json:"failedNodes,omitempty"`
// Error message indicating failure
Error string `json:"error,omitempty"`
}
// ExtenderBindingArgs represents the arguments to an extender for binding a pod to a node.
type ExtenderBindingArgs struct {
// PodName is the name of the pod being bound
PodName string
// PodNamespace is the namespace of the pod being bound
PodNamespace string
// PodUID is the UID of the pod being bound
PodUID types.UID
// Node selected by the scheduler
Node string
}
// ExtenderBindingResult represents the result of binding of a pod to a node from an extender.
type ExtenderBindingResult struct {
// Error message indicating failure
Error string
}
// HostPriority represents the priority of scheduling to a particular host, higher priority is better.
type HostPriority struct {
// Name of the host
Host string `json:"host"`
// Score associated with the host
Score int `json:"score"`
}
// HostPriorityList declares a []HostPriority type.
type HostPriorityList []HostPriority
func (h HostPriorityList) Len() int {
return len(h)
}
func (h HostPriorityList) Less(i, j int) bool {
if h[i].Score == h[j].Score {
return h[i].Host < h[j].Host
}
return h[i].Score < h[j].Score
}
func (h HostPriorityList) Swap(i, j int) {
h[i], h[j] = h[j], h[i]
}

View File

@ -0,0 +1,485 @@
// +build !ignore_autogenerated
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package v1
import (
core_v1 "k8s.io/api/core/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
rest "k8s.io/client-go/rest"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderArgs) DeepCopyInto(out *ExtenderArgs) {
*out = *in
in.Pod.DeepCopyInto(&out.Pod)
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
if *in == nil {
*out = nil
} else {
*out = new(core_v1.NodeList)
(*in).DeepCopyInto(*out)
}
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
if *in == nil {
*out = nil
} else {
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderArgs.
func (in *ExtenderArgs) DeepCopy() *ExtenderArgs {
if in == nil {
return nil
}
out := new(ExtenderArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingArgs) DeepCopyInto(out *ExtenderBindingArgs) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingArgs.
func (in *ExtenderBindingArgs) DeepCopy() *ExtenderBindingArgs {
if in == nil {
return nil
}
out := new(ExtenderBindingArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingResult) DeepCopyInto(out *ExtenderBindingResult) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingResult.
func (in *ExtenderBindingResult) DeepCopy() *ExtenderBindingResult {
if in == nil {
return nil
}
out := new(ExtenderBindingResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderConfig) DeepCopyInto(out *ExtenderConfig) {
*out = *in
if in.TLSConfig != nil {
in, out := &in.TLSConfig, &out.TLSConfig
if *in == nil {
*out = nil
} else {
*out = new(rest.TLSClientConfig)
(*in).DeepCopyInto(*out)
}
}
if in.ManagedResources != nil {
in, out := &in.ManagedResources, &out.ManagedResources
*out = make([]ExtenderManagedResource, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderConfig.
func (in *ExtenderConfig) DeepCopy() *ExtenderConfig {
if in == nil {
return nil
}
out := new(ExtenderConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderFilterResult) DeepCopyInto(out *ExtenderFilterResult) {
*out = *in
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
if *in == nil {
*out = nil
} else {
*out = new(core_v1.NodeList)
(*in).DeepCopyInto(*out)
}
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
if *in == nil {
*out = nil
} else {
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
}
if in.FailedNodes != nil {
in, out := &in.FailedNodes, &out.FailedNodes
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderFilterResult.
func (in *ExtenderFilterResult) DeepCopy() *ExtenderFilterResult {
if in == nil {
return nil
}
out := new(ExtenderFilterResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderManagedResource) DeepCopyInto(out *ExtenderManagedResource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderManagedResource.
func (in *ExtenderManagedResource) DeepCopy() *ExtenderManagedResource {
if in == nil {
return nil
}
out := new(ExtenderManagedResource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in FailedNodesMap) DeepCopyInto(out *FailedNodesMap) {
{
in := &in
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedNodesMap.
func (in FailedNodesMap) DeepCopy() FailedNodesMap {
if in == nil {
return nil
}
out := new(FailedNodesMap)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HostPriority) DeepCopyInto(out *HostPriority) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriority.
func (in *HostPriority) DeepCopy() *HostPriority {
if in == nil {
return nil
}
out := new(HostPriority)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in HostPriorityList) DeepCopyInto(out *HostPriorityList) {
{
in := &in
*out = make(HostPriorityList, len(*in))
copy(*out, *in)
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriorityList.
func (in HostPriorityList) DeepCopy() HostPriorityList {
if in == nil {
return nil
}
out := new(HostPriorityList)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelPreference) DeepCopyInto(out *LabelPreference) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelPreference.
func (in *LabelPreference) DeepCopy() *LabelPreference {
if in == nil {
return nil
}
out := new(LabelPreference)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelsPresence) DeepCopyInto(out *LabelsPresence) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelsPresence.
func (in *LabelsPresence) DeepCopy() *LabelsPresence {
if in == nil {
return nil
}
out := new(LabelsPresence)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Policy) DeepCopyInto(out *Policy) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Predicates != nil {
in, out := &in.Predicates, &out.Predicates
*out = make([]PredicatePolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Priorities != nil {
in, out := &in.Priorities, &out.Priorities
*out = make([]PriorityPolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.ExtenderConfigs != nil {
in, out := &in.ExtenderConfigs, &out.ExtenderConfigs
*out = make([]ExtenderConfig, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Policy.
func (in *Policy) DeepCopy() *Policy {
if in == nil {
return nil
}
out := new(Policy)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *Policy) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicateArgument) DeepCopyInto(out *PredicateArgument) {
*out = *in
if in.ServiceAffinity != nil {
in, out := &in.ServiceAffinity, &out.ServiceAffinity
if *in == nil {
*out = nil
} else {
*out = new(ServiceAffinity)
(*in).DeepCopyInto(*out)
}
}
if in.LabelsPresence != nil {
in, out := &in.LabelsPresence, &out.LabelsPresence
if *in == nil {
*out = nil
} else {
*out = new(LabelsPresence)
(*in).DeepCopyInto(*out)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicateArgument.
func (in *PredicateArgument) DeepCopy() *PredicateArgument {
if in == nil {
return nil
}
out := new(PredicateArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicatePolicy) DeepCopyInto(out *PredicatePolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
if *in == nil {
*out = nil
} else {
*out = new(PredicateArgument)
(*in).DeepCopyInto(*out)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicatePolicy.
func (in *PredicatePolicy) DeepCopy() *PredicatePolicy {
if in == nil {
return nil
}
out := new(PredicatePolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityArgument) DeepCopyInto(out *PriorityArgument) {
*out = *in
if in.ServiceAntiAffinity != nil {
in, out := &in.ServiceAntiAffinity, &out.ServiceAntiAffinity
if *in == nil {
*out = nil
} else {
*out = new(ServiceAntiAffinity)
**out = **in
}
}
if in.LabelPreference != nil {
in, out := &in.LabelPreference, &out.LabelPreference
if *in == nil {
*out = nil
} else {
*out = new(LabelPreference)
**out = **in
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityArgument.
func (in *PriorityArgument) DeepCopy() *PriorityArgument {
if in == nil {
return nil
}
out := new(PriorityArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityPolicy) DeepCopyInto(out *PriorityPolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
if *in == nil {
*out = nil
} else {
*out = new(PriorityArgument)
(*in).DeepCopyInto(*out)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityPolicy.
func (in *PriorityPolicy) DeepCopy() *PriorityPolicy {
if in == nil {
return nil
}
out := new(PriorityPolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAffinity) DeepCopyInto(out *ServiceAffinity) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAffinity.
func (in *ServiceAffinity) DeepCopy() *ServiceAffinity {
if in == nil {
return nil
}
out := new(ServiceAffinity)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAntiAffinity) DeepCopyInto(out *ServiceAntiAffinity) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAntiAffinity.
func (in *ServiceAntiAffinity) DeepCopy() *ServiceAntiAffinity {
if in == nil {
return nil
}
out := new(ServiceAntiAffinity)
in.DeepCopyInto(out)
return out
}

View File

@ -0,0 +1,41 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = ["validation.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/api/validation",
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/validation:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["validation_test.go"],
embed = [":go_default_library"],
deps = ["//pkg/scheduler/api:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,82 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
"errors"
"fmt"
"k8s.io/api/core/v1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
)
// ValidatePolicy checks for errors in the Config
// It does not return early so that it can find as many errors as possible
func ValidatePolicy(policy schedulerapi.Policy) error {
var validationErrors []error
for _, priority := range policy.Priorities {
if priority.Weight <= 0 || priority.Weight >= schedulerapi.MaxWeight {
validationErrors = append(validationErrors, fmt.Errorf("Priority %s should have a positive weight applied to it or it has overflown", priority.Name))
}
}
binders := 0
extenderManagedResources := sets.NewString()
for _, extender := range policy.ExtenderConfigs {
if len(extender.PrioritizeVerb) > 0 && extender.Weight <= 0 {
validationErrors = append(validationErrors, fmt.Errorf("Priority for extender %s should have a positive weight applied to it", extender.URLPrefix))
}
if extender.BindVerb != "" {
binders++
}
for _, resource := range extender.ManagedResources {
errs := validateExtendedResourceName(resource.Name)
if len(errs) != 0 {
validationErrors = append(validationErrors, errs...)
}
if extenderManagedResources.Has(string(resource.Name)) {
validationErrors = append(validationErrors, fmt.Errorf("Duplicate extender managed resource name %s", string(resource.Name)))
}
extenderManagedResources.Insert(string(resource.Name))
}
}
if binders > 1 {
validationErrors = append(validationErrors, fmt.Errorf("Only one extender can implement bind, found %v", binders))
}
return utilerrors.NewAggregate(validationErrors)
}
// validateExtendedResourceName checks whether the specified name is a valid
// extended resource name.
func validateExtendedResourceName(name v1.ResourceName) []error {
var validationErrors []error
for _, msg := range validation.IsQualifiedName(string(name)) {
validationErrors = append(validationErrors, errors.New(msg))
}
if len(validationErrors) != 0 {
return validationErrors
}
if !v1helper.IsExtendedResourceName(name) {
validationErrors = append(validationErrors, fmt.Errorf("%s is an invalid extended resource name", name))
}
return validationErrors
}

View File

@ -0,0 +1,95 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
"errors"
"fmt"
"testing"
"k8s.io/kubernetes/pkg/scheduler/api"
)
func TestValidatePolicy(t *testing.T) {
tests := []struct {
policy api.Policy
expected error
}{
{
policy: api.Policy{Priorities: []api.PriorityPolicy{{Name: "NoWeightPriority"}}},
expected: errors.New("Priority NoWeightPriority should have a positive weight applied to it or it has overflown"),
},
{
policy: api.Policy{Priorities: []api.PriorityPolicy{{Name: "NoWeightPriority", Weight: 0}}},
expected: errors.New("Priority NoWeightPriority should have a positive weight applied to it or it has overflown"),
},
{
policy: api.Policy{Priorities: []api.PriorityPolicy{{Name: "WeightPriority", Weight: 2}}},
expected: nil,
},
{
policy: api.Policy{Priorities: []api.PriorityPolicy{{Name: "WeightPriority", Weight: -2}}},
expected: errors.New("Priority WeightPriority should have a positive weight applied to it or it has overflown"),
},
{
policy: api.Policy{Priorities: []api.PriorityPolicy{{Name: "WeightPriority", Weight: api.MaxWeight}}},
expected: errors.New("Priority WeightPriority should have a positive weight applied to it or it has overflown"),
},
{
policy: api.Policy{ExtenderConfigs: []api.ExtenderConfig{{URLPrefix: "http://127.0.0.1:8081/extender", PrioritizeVerb: "prioritize", Weight: 2}}},
expected: nil,
},
{
policy: api.Policy{ExtenderConfigs: []api.ExtenderConfig{{URLPrefix: "http://127.0.0.1:8081/extender", PrioritizeVerb: "prioritize", Weight: -2}}},
expected: errors.New("Priority for extender http://127.0.0.1:8081/extender should have a positive weight applied to it"),
},
{
policy: api.Policy{ExtenderConfigs: []api.ExtenderConfig{{URLPrefix: "http://127.0.0.1:8081/extender", FilterVerb: "filter"}}},
expected: nil,
},
{
policy: api.Policy{
ExtenderConfigs: []api.ExtenderConfig{
{URLPrefix: "http://127.0.0.1:8081/extender", BindVerb: "bind"},
{URLPrefix: "http://127.0.0.1:8082/extender", BindVerb: "bind"},
}},
expected: errors.New("Only one extender can implement bind, found 2"),
},
{
policy: api.Policy{
ExtenderConfigs: []api.ExtenderConfig{
{URLPrefix: "http://127.0.0.1:8081/extender", ManagedResources: []api.ExtenderManagedResource{{Name: "foo.com/bar"}}},
{URLPrefix: "http://127.0.0.1:8082/extender", BindVerb: "bind", ManagedResources: []api.ExtenderManagedResource{{Name: "foo.com/bar"}}},
}},
expected: errors.New("Duplicate extender managed resource name foo.com/bar"),
},
{
policy: api.Policy{
ExtenderConfigs: []api.ExtenderConfig{
{URLPrefix: "http://127.0.0.1:8081/extender", ManagedResources: []api.ExtenderManagedResource{{Name: "kubernetes.io/foo"}}},
}},
expected: errors.New("kubernetes.io/foo is an invalid extended resource name"),
},
}
for _, test := range tests {
actual := ValidatePolicy(test.policy)
if fmt.Sprint(test.expected) != fmt.Sprint(actual) {
t.Errorf("expected: %s, actual: %s", test.expected, actual)
}
}
}

View File

@ -0,0 +1,485 @@
// +build !ignore_autogenerated
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package api
import (
v1 "k8s.io/api/core/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
rest "k8s.io/client-go/rest"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderArgs) DeepCopyInto(out *ExtenderArgs) {
*out = *in
in.Pod.DeepCopyInto(&out.Pod)
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
if *in == nil {
*out = nil
} else {
*out = new(v1.NodeList)
(*in).DeepCopyInto(*out)
}
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
if *in == nil {
*out = nil
} else {
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderArgs.
func (in *ExtenderArgs) DeepCopy() *ExtenderArgs {
if in == nil {
return nil
}
out := new(ExtenderArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingArgs) DeepCopyInto(out *ExtenderBindingArgs) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingArgs.
func (in *ExtenderBindingArgs) DeepCopy() *ExtenderBindingArgs {
if in == nil {
return nil
}
out := new(ExtenderBindingArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingResult) DeepCopyInto(out *ExtenderBindingResult) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingResult.
func (in *ExtenderBindingResult) DeepCopy() *ExtenderBindingResult {
if in == nil {
return nil
}
out := new(ExtenderBindingResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderConfig) DeepCopyInto(out *ExtenderConfig) {
*out = *in
if in.TLSConfig != nil {
in, out := &in.TLSConfig, &out.TLSConfig
if *in == nil {
*out = nil
} else {
*out = new(rest.TLSClientConfig)
(*in).DeepCopyInto(*out)
}
}
if in.ManagedResources != nil {
in, out := &in.ManagedResources, &out.ManagedResources
*out = make([]ExtenderManagedResource, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderConfig.
func (in *ExtenderConfig) DeepCopy() *ExtenderConfig {
if in == nil {
return nil
}
out := new(ExtenderConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderFilterResult) DeepCopyInto(out *ExtenderFilterResult) {
*out = *in
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
if *in == nil {
*out = nil
} else {
*out = new(v1.NodeList)
(*in).DeepCopyInto(*out)
}
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
if *in == nil {
*out = nil
} else {
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
}
if in.FailedNodes != nil {
in, out := &in.FailedNodes, &out.FailedNodes
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderFilterResult.
func (in *ExtenderFilterResult) DeepCopy() *ExtenderFilterResult {
if in == nil {
return nil
}
out := new(ExtenderFilterResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderManagedResource) DeepCopyInto(out *ExtenderManagedResource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderManagedResource.
func (in *ExtenderManagedResource) DeepCopy() *ExtenderManagedResource {
if in == nil {
return nil
}
out := new(ExtenderManagedResource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in FailedNodesMap) DeepCopyInto(out *FailedNodesMap) {
{
in := &in
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedNodesMap.
func (in FailedNodesMap) DeepCopy() FailedNodesMap {
if in == nil {
return nil
}
out := new(FailedNodesMap)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HostPriority) DeepCopyInto(out *HostPriority) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriority.
func (in *HostPriority) DeepCopy() *HostPriority {
if in == nil {
return nil
}
out := new(HostPriority)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in HostPriorityList) DeepCopyInto(out *HostPriorityList) {
{
in := &in
*out = make(HostPriorityList, len(*in))
copy(*out, *in)
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriorityList.
func (in HostPriorityList) DeepCopy() HostPriorityList {
if in == nil {
return nil
}
out := new(HostPriorityList)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelPreference) DeepCopyInto(out *LabelPreference) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelPreference.
func (in *LabelPreference) DeepCopy() *LabelPreference {
if in == nil {
return nil
}
out := new(LabelPreference)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelsPresence) DeepCopyInto(out *LabelsPresence) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelsPresence.
func (in *LabelsPresence) DeepCopy() *LabelsPresence {
if in == nil {
return nil
}
out := new(LabelsPresence)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Policy) DeepCopyInto(out *Policy) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Predicates != nil {
in, out := &in.Predicates, &out.Predicates
*out = make([]PredicatePolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Priorities != nil {
in, out := &in.Priorities, &out.Priorities
*out = make([]PriorityPolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.ExtenderConfigs != nil {
in, out := &in.ExtenderConfigs, &out.ExtenderConfigs
*out = make([]ExtenderConfig, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Policy.
func (in *Policy) DeepCopy() *Policy {
if in == nil {
return nil
}
out := new(Policy)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *Policy) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicateArgument) DeepCopyInto(out *PredicateArgument) {
*out = *in
if in.ServiceAffinity != nil {
in, out := &in.ServiceAffinity, &out.ServiceAffinity
if *in == nil {
*out = nil
} else {
*out = new(ServiceAffinity)
(*in).DeepCopyInto(*out)
}
}
if in.LabelsPresence != nil {
in, out := &in.LabelsPresence, &out.LabelsPresence
if *in == nil {
*out = nil
} else {
*out = new(LabelsPresence)
(*in).DeepCopyInto(*out)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicateArgument.
func (in *PredicateArgument) DeepCopy() *PredicateArgument {
if in == nil {
return nil
}
out := new(PredicateArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicatePolicy) DeepCopyInto(out *PredicatePolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
if *in == nil {
*out = nil
} else {
*out = new(PredicateArgument)
(*in).DeepCopyInto(*out)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicatePolicy.
func (in *PredicatePolicy) DeepCopy() *PredicatePolicy {
if in == nil {
return nil
}
out := new(PredicatePolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityArgument) DeepCopyInto(out *PriorityArgument) {
*out = *in
if in.ServiceAntiAffinity != nil {
in, out := &in.ServiceAntiAffinity, &out.ServiceAntiAffinity
if *in == nil {
*out = nil
} else {
*out = new(ServiceAntiAffinity)
**out = **in
}
}
if in.LabelPreference != nil {
in, out := &in.LabelPreference, &out.LabelPreference
if *in == nil {
*out = nil
} else {
*out = new(LabelPreference)
**out = **in
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityArgument.
func (in *PriorityArgument) DeepCopy() *PriorityArgument {
if in == nil {
return nil
}
out := new(PriorityArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityPolicy) DeepCopyInto(out *PriorityPolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
if *in == nil {
*out = nil
} else {
*out = new(PriorityArgument)
(*in).DeepCopyInto(*out)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityPolicy.
func (in *PriorityPolicy) DeepCopy() *PriorityPolicy {
if in == nil {
return nil
}
out := new(PriorityPolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAffinity) DeepCopyInto(out *ServiceAffinity) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAffinity.
func (in *ServiceAffinity) DeepCopy() *ServiceAffinity {
if in == nil {
return nil
}
out := new(ServiceAffinity)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAntiAffinity) DeepCopyInto(out *ServiceAntiAffinity) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAntiAffinity.
func (in *ServiceAntiAffinity) DeepCopy() *ServiceAntiAffinity {
if in == nil {
return nil
}
out := new(ServiceAntiAffinity)
in.DeepCopyInto(out)
return out
}

85
vendor/k8s.io/kubernetes/pkg/scheduler/core/BUILD generated vendored Normal file
View File

@ -0,0 +1,85 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = [
"equivalence_cache_test.go",
"extender_test.go",
"generic_scheduler_test.go",
"scheduling_queue_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/algorithm/priorities:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"equivalence_cache.go",
"extender.go",
"generic_scheduler.go",
"scheduling_queue.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/core",
deps = [
"//pkg/api/v1/pod:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//pkg/util/hash:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/golang/groupcache/lru:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/net:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/trace:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,233 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package core
import (
"hash/fnv"
"sync"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
hashutil "k8s.io/kubernetes/pkg/util/hash"
"github.com/golang/glog"
"github.com/golang/groupcache/lru"
)
// We use predicate names as cache's key, its count is limited
const maxCacheEntries = 100
// EquivalenceCache holds:
// 1. a map of AlgorithmCache with node name as key
// 2. function to get equivalence pod
type EquivalenceCache struct {
sync.RWMutex
getEquivalencePod algorithm.GetEquivalencePodFunc
algorithmCache map[string]AlgorithmCache
}
// The AlgorithmCache stores PredicateMap with predicate name as key
type AlgorithmCache struct {
// Only consider predicates for now
predicatesCache *lru.Cache
}
// PredicateMap stores HostPrediacte with equivalence hash as key
type PredicateMap map[uint64]HostPredicate
// HostPredicate is the cached predicate result
type HostPredicate struct {
Fit bool
FailReasons []algorithm.PredicateFailureReason
}
func newAlgorithmCache() AlgorithmCache {
return AlgorithmCache{
predicatesCache: lru.New(maxCacheEntries),
}
}
// NewEquivalenceCache creates a EquivalenceCache object.
func NewEquivalenceCache(getEquivalencePodFunc algorithm.GetEquivalencePodFunc) *EquivalenceCache {
return &EquivalenceCache{
getEquivalencePod: getEquivalencePodFunc,
algorithmCache: make(map[string]AlgorithmCache),
}
}
// UpdateCachedPredicateItem updates pod predicate for equivalence class
func (ec *EquivalenceCache) UpdateCachedPredicateItem(
podName, nodeName, predicateKey string,
fit bool,
reasons []algorithm.PredicateFailureReason,
equivalenceHash uint64,
needLock bool,
) {
if needLock {
ec.Lock()
defer ec.Unlock()
}
if _, exist := ec.algorithmCache[nodeName]; !exist {
ec.algorithmCache[nodeName] = newAlgorithmCache()
}
predicateItem := HostPredicate{
Fit: fit,
FailReasons: reasons,
}
// if cached predicate map already exists, just update the predicate by key
if v, ok := ec.algorithmCache[nodeName].predicatesCache.Get(predicateKey); ok {
predicateMap := v.(PredicateMap)
// maps in golang are references, no need to add them back
predicateMap[equivalenceHash] = predicateItem
} else {
ec.algorithmCache[nodeName].predicatesCache.Add(predicateKey,
PredicateMap{
equivalenceHash: predicateItem,
})
}
glog.V(5).Infof("Updated cached predicate: %v for pod: %v on node: %s, with item %v", predicateKey, podName, nodeName, predicateItem)
}
// PredicateWithECache returns:
// 1. if fit
// 2. reasons if not fit
// 3. if this cache is invalid
// based on cached predicate results
func (ec *EquivalenceCache) PredicateWithECache(
podName, nodeName, predicateKey string,
equivalenceHash uint64, needLock bool,
) (bool, []algorithm.PredicateFailureReason, bool) {
if needLock {
ec.RLock()
defer ec.RUnlock()
}
glog.V(5).Infof("Begin to calculate predicate: %v for pod: %s on node: %s based on equivalence cache",
predicateKey, podName, nodeName)
if algorithmCache, exist := ec.algorithmCache[nodeName]; exist {
if cachePredicate, exist := algorithmCache.predicatesCache.Get(predicateKey); exist {
predicateMap := cachePredicate.(PredicateMap)
// TODO(resouer) Is it possible a race that cache failed to update immediately?
if hostPredicate, ok := predicateMap[equivalenceHash]; ok {
if hostPredicate.Fit {
return true, []algorithm.PredicateFailureReason{}, false
}
return false, hostPredicate.FailReasons, false
}
// is invalid
return false, []algorithm.PredicateFailureReason{}, true
}
}
return false, []algorithm.PredicateFailureReason{}, true
}
// InvalidateCachedPredicateItem marks all items of given predicateKeys, of all pods, on the given node as invalid
func (ec *EquivalenceCache) InvalidateCachedPredicateItem(nodeName string, predicateKeys sets.String) {
if len(predicateKeys) == 0 {
return
}
ec.Lock()
defer ec.Unlock()
if algorithmCache, exist := ec.algorithmCache[nodeName]; exist {
for predicateKey := range predicateKeys {
algorithmCache.predicatesCache.Remove(predicateKey)
}
}
glog.V(5).Infof("Done invalidating cached predicates: %v on node: %s", predicateKeys, nodeName)
}
// InvalidateCachedPredicateItemOfAllNodes marks all items of given predicateKeys, of all pods, on all node as invalid
func (ec *EquivalenceCache) InvalidateCachedPredicateItemOfAllNodes(predicateKeys sets.String) {
if len(predicateKeys) == 0 {
return
}
ec.Lock()
defer ec.Unlock()
// algorithmCache uses nodeName as key, so we just iterate it and invalid given predicates
for _, algorithmCache := range ec.algorithmCache {
for predicateKey := range predicateKeys {
// just use keys is enough
algorithmCache.predicatesCache.Remove(predicateKey)
}
}
glog.V(5).Infof("Done invalidating cached predicates: %v on all node", predicateKeys)
}
// InvalidateAllCachedPredicateItemOfNode marks all cached items on given node as invalid
func (ec *EquivalenceCache) InvalidateAllCachedPredicateItemOfNode(nodeName string) {
ec.Lock()
defer ec.Unlock()
delete(ec.algorithmCache, nodeName)
glog.V(5).Infof("Done invalidating all cached predicates on node: %s", nodeName)
}
// InvalidateCachedPredicateItemForPodAdd is a wrapper of InvalidateCachedPredicateItem for pod add case
func (ec *EquivalenceCache) InvalidateCachedPredicateItemForPodAdd(pod *v1.Pod, nodeName string) {
// MatchInterPodAffinity: we assume scheduler can make sure newly bound pod
// will not break the existing inter pod affinity. So we does not need to invalidate
// MatchInterPodAffinity when pod added.
//
// But when a pod is deleted, existing inter pod affinity may become invalid.
// (e.g. this pod was preferred by some else, or vice versa)
//
// NOTE: assumptions above will not stand when we implemented features like
// RequiredDuringSchedulingRequiredDuringExecution.
// NoDiskConflict: the newly scheduled pod fits to existing pods on this node,
// it will also fits to equivalence class of existing pods
// GeneralPredicates: will always be affected by adding a new pod
invalidPredicates := sets.NewString("GeneralPredicates")
// MaxPDVolumeCountPredicate: we check the volumes of pod to make decision.
for _, vol := range pod.Spec.Volumes {
if vol.PersistentVolumeClaim != nil {
invalidPredicates.Insert("MaxEBSVolumeCount", "MaxGCEPDVolumeCount", "MaxAzureDiskVolumeCount")
} else {
if vol.AWSElasticBlockStore != nil {
invalidPredicates.Insert("MaxEBSVolumeCount")
}
if vol.GCEPersistentDisk != nil {
invalidPredicates.Insert("MaxGCEPDVolumeCount")
}
if vol.AzureDisk != nil {
invalidPredicates.Insert("MaxAzureDiskVolumeCount")
}
}
}
ec.InvalidateCachedPredicateItem(nodeName, invalidPredicates)
}
// equivalenceClassInfo holds equivalence hash which is used for checking equivalence cache.
// We will pass this to podFitsOnNode to ensure equivalence hash is only calculated per schedule.
type equivalenceClassInfo struct {
// Equivalence hash.
hash uint64
}
// getEquivalenceClassInfo returns the equivalence class of given pod.
func (ec *EquivalenceCache) getEquivalenceClassInfo(pod *v1.Pod) *equivalenceClassInfo {
equivalencePod := ec.getEquivalencePod(pod)
if equivalencePod != nil {
hash := fnv.New32a()
hashutil.DeepHashObject(hash, equivalencePod)
return &equivalenceClassInfo{
hash: uint64(hash.Sum32()),
}
}
return nil
}

View File

@ -0,0 +1,651 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package core
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
)
type predicateItemType struct {
fit bool
reasons []algorithm.PredicateFailureReason
}
func TestUpdateCachedPredicateItem(t *testing.T) {
tests := []struct {
name string
pod string
predicateKey string
nodeName string
fit bool
reasons []algorithm.PredicateFailureReason
equivalenceHash uint64
expectPredicateMap bool
expectCacheItem HostPredicate
}{
{
name: "test 1",
pod: "testPod",
predicateKey: "GeneralPredicates",
nodeName: "node1",
fit: true,
equivalenceHash: 123,
expectPredicateMap: false,
expectCacheItem: HostPredicate{
Fit: true,
},
},
{
name: "test 2",
pod: "testPod",
predicateKey: "GeneralPredicates",
nodeName: "node2",
fit: false,
equivalenceHash: 123,
expectPredicateMap: true,
expectCacheItem: HostPredicate{
Fit: false,
},
},
}
for _, test := range tests {
// this case does not need to calculate equivalence hash, just pass an empty function
fakeGetEquivalencePodFunc := func(pod *v1.Pod) interface{} { return nil }
ecache := NewEquivalenceCache(fakeGetEquivalencePodFunc)
if test.expectPredicateMap {
ecache.algorithmCache[test.nodeName] = newAlgorithmCache()
predicateItem := HostPredicate{
Fit: true,
}
ecache.algorithmCache[test.nodeName].predicatesCache.Add(test.predicateKey,
PredicateMap{
test.equivalenceHash: predicateItem,
})
}
ecache.UpdateCachedPredicateItem(
test.pod,
test.nodeName,
test.predicateKey,
test.fit,
test.reasons,
test.equivalenceHash,
true,
)
value, ok := ecache.algorithmCache[test.nodeName].predicatesCache.Get(test.predicateKey)
if !ok {
t.Errorf("Failed: %s, can't find expected cache item: %v",
test.name, test.expectCacheItem)
} else {
cachedMapItem := value.(PredicateMap)
if !reflect.DeepEqual(cachedMapItem[test.equivalenceHash], test.expectCacheItem) {
t.Errorf("Failed: %s, expected cached item: %v, but got: %v",
test.name, test.expectCacheItem, cachedMapItem[test.equivalenceHash])
}
}
}
}
func TestPredicateWithECache(t *testing.T) {
tests := []struct {
name string
podName string
nodeName string
predicateKey string
equivalenceHashForUpdatePredicate uint64
equivalenceHashForCalPredicate uint64
cachedItem predicateItemType
expectedInvalidPredicateKey bool
expectedInvalidEquivalenceHash bool
expectedPredicateItem predicateItemType
}{
{
name: "test 1",
podName: "testPod",
nodeName: "node1",
equivalenceHashForUpdatePredicate: 123,
equivalenceHashForCalPredicate: 123,
predicateKey: "GeneralPredicates",
cachedItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
expectedInvalidPredicateKey: true,
expectedPredicateItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{},
},
},
{
name: "test 2",
podName: "testPod",
nodeName: "node2",
equivalenceHashForUpdatePredicate: 123,
equivalenceHashForCalPredicate: 123,
predicateKey: "GeneralPredicates",
cachedItem: predicateItemType{
fit: true,
},
expectedInvalidPredicateKey: false,
expectedPredicateItem: predicateItemType{
fit: true,
reasons: []algorithm.PredicateFailureReason{},
},
},
{
name: "test 3",
podName: "testPod",
nodeName: "node3",
equivalenceHashForUpdatePredicate: 123,
equivalenceHashForCalPredicate: 123,
predicateKey: "GeneralPredicates",
cachedItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
expectedInvalidPredicateKey: false,
expectedPredicateItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
},
{
name: "test 4",
podName: "testPod",
nodeName: "node4",
equivalenceHashForUpdatePredicate: 123,
equivalenceHashForCalPredicate: 456,
predicateKey: "GeneralPredicates",
cachedItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
expectedInvalidPredicateKey: false,
expectedInvalidEquivalenceHash: true,
expectedPredicateItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{},
},
},
}
for _, test := range tests {
// this case does not need to calculate equivalence hash, just pass an empty function
fakeGetEquivalencePodFunc := func(pod *v1.Pod) interface{} { return nil }
ecache := NewEquivalenceCache(fakeGetEquivalencePodFunc)
// set cached item to equivalence cache
ecache.UpdateCachedPredicateItem(
test.podName,
test.nodeName,
test.predicateKey,
test.cachedItem.fit,
test.cachedItem.reasons,
test.equivalenceHashForUpdatePredicate,
true,
)
// if we want to do invalid, invalid the cached item
if test.expectedInvalidPredicateKey {
predicateKeys := sets.NewString()
predicateKeys.Insert(test.predicateKey)
ecache.InvalidateCachedPredicateItem(test.nodeName, predicateKeys)
}
// calculate predicate with equivalence cache
fit, reasons, invalid := ecache.PredicateWithECache(test.podName,
test.nodeName,
test.predicateKey,
test.equivalenceHashForCalPredicate,
true,
)
// returned invalid should match expectedInvalidPredicateKey or expectedInvalidEquivalenceHash
if test.equivalenceHashForUpdatePredicate != test.equivalenceHashForCalPredicate {
if invalid != test.expectedInvalidEquivalenceHash {
t.Errorf("Failed: %s, expected invalid: %v, but got: %v",
test.name, test.expectedInvalidEquivalenceHash, invalid)
}
} else {
if invalid != test.expectedInvalidPredicateKey {
t.Errorf("Failed: %s, expected invalid: %v, but got: %v",
test.name, test.expectedInvalidPredicateKey, invalid)
}
}
// returned predicate result should match expected predicate item
if fit != test.expectedPredicateItem.fit {
t.Errorf("Failed: %s, expected fit: %v, but got: %v", test.name, test.cachedItem.fit, fit)
}
if !reflect.DeepEqual(reasons, test.expectedPredicateItem.reasons) {
t.Errorf("Failed: %s, expected reasons: %v, but got: %v",
test.name, test.cachedItem.reasons, reasons)
}
}
}
func TestGetHashEquivalencePod(t *testing.T) {
testNamespace := "test"
pvcInfo := predicates.FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol1", Name: "someEBSVol1", Namespace: testNamespace},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someEBSVol1"},
},
{
ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol2", Name: "someEBSVol2", Namespace: testNamespace},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someNonEBSVol"},
},
{
ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol3-0", Name: "someEBSVol3-0", Namespace: testNamespace},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol3-1", Name: "someEBSVol3-1", Namespace: testNamespace},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
},
}
// use default equivalence class generator
ecache := NewEquivalenceCache(predicates.NewEquivalencePodGenerator(pvcInfo))
isController := true
pod1 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "123",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol1",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol2",
},
},
},
},
},
}
pod2 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod2",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "123",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol2",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol1",
},
},
},
},
},
}
pod3 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod3",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "567",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol3-1",
},
},
},
},
},
}
pod4 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod4",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "567",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol3-0",
},
},
},
},
},
}
pod5 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod5",
Namespace: testNamespace,
},
}
pod6 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod6",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "567",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "no-exists-pvc",
},
},
},
},
},
}
pod7 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod7",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "567",
Controller: &isController,
},
},
},
}
type podInfo struct {
pod *v1.Pod
hashIsValid bool
}
tests := []struct {
podInfoList []podInfo
isEquivalent bool
}{
// pods with same controllerRef and same pvc claim
{
podInfoList: []podInfo{
{pod: pod1, hashIsValid: true},
{pod: pod2, hashIsValid: true},
},
isEquivalent: true,
},
// pods with same controllerRef but different pvc claim
{
podInfoList: []podInfo{
{pod: pod3, hashIsValid: true},
{pod: pod4, hashIsValid: true},
},
isEquivalent: false,
},
// pod without controllerRef
{
podInfoList: []podInfo{
{pod: pod5, hashIsValid: false},
},
isEquivalent: false,
},
// pods with same controllerRef but one has non-exists pvc claim
{
podInfoList: []podInfo{
{pod: pod6, hashIsValid: false},
{pod: pod7, hashIsValid: true},
},
isEquivalent: false,
},
}
var (
targetPodInfo podInfo
targetHash uint64
)
for _, test := range tests {
for i, podInfo := range test.podInfoList {
testPod := podInfo.pod
eclassInfo := ecache.getEquivalenceClassInfo(testPod)
if eclassInfo == nil && podInfo.hashIsValid {
t.Errorf("Failed: pod %v is expected to have valid hash", testPod)
}
if eclassInfo != nil {
// NOTE(harry): the first element will be used as target so
// this logic can't verify more than two inequivalent pods
if i == 0 {
targetHash = eclassInfo.hash
targetPodInfo = podInfo
} else {
if targetHash != eclassInfo.hash {
if test.isEquivalent {
t.Errorf("Failed: pod: %v is expected to be equivalent to: %v", testPod, targetPodInfo.pod)
}
}
}
}
}
}
}
func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
testPredicate := "GeneralPredicates"
// tests is used to initialize all nodes
tests := []struct {
podName string
nodeName string
predicateKey string
equivalenceHashForUpdatePredicate uint64
cachedItem predicateItemType
}{
{
podName: "testPod",
nodeName: "node1",
equivalenceHashForUpdatePredicate: 123,
cachedItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{
predicates.ErrPodNotFitsHostPorts,
},
},
},
{
podName: "testPod",
nodeName: "node2",
equivalenceHashForUpdatePredicate: 456,
cachedItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{
predicates.ErrPodNotFitsHostPorts,
},
},
},
{
podName: "testPod",
nodeName: "node3",
equivalenceHashForUpdatePredicate: 123,
cachedItem: predicateItemType{
fit: true,
},
},
}
// this case does not need to calculate equivalence hash, just pass an empty function
fakeGetEquivalencePodFunc := func(pod *v1.Pod) interface{} { return nil }
ecache := NewEquivalenceCache(fakeGetEquivalencePodFunc)
for _, test := range tests {
// set cached item to equivalence cache
ecache.UpdateCachedPredicateItem(
test.podName,
test.nodeName,
testPredicate,
test.cachedItem.fit,
test.cachedItem.reasons,
test.equivalenceHashForUpdatePredicate,
true,
)
}
// invalidate cached predicate for all nodes
ecache.InvalidateCachedPredicateItemOfAllNodes(sets.NewString(testPredicate))
// there should be no cached predicate any more
for _, test := range tests {
if algorithmCache, exist := ecache.algorithmCache[test.nodeName]; exist {
if _, exist := algorithmCache.predicatesCache.Get(testPredicate); exist {
t.Errorf("Failed: cached item for predicate key: %v on node: %v should be invalidated",
testPredicate, test.nodeName)
break
}
}
}
}
func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
testPredicate := "GeneralPredicates"
// tests is used to initialize all nodes
tests := []struct {
podName string
nodeName string
predicateKey string
equivalenceHashForUpdatePredicate uint64
cachedItem predicateItemType
}{
{
podName: "testPod",
nodeName: "node1",
equivalenceHashForUpdatePredicate: 123,
cachedItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
},
{
podName: "testPod",
nodeName: "node2",
equivalenceHashForUpdatePredicate: 456,
cachedItem: predicateItemType{
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
},
{
podName: "testPod",
nodeName: "node3",
equivalenceHashForUpdatePredicate: 123,
cachedItem: predicateItemType{
fit: true,
},
},
}
// this case does not need to calculate equivalence hash, just pass an empty function
fakeGetEquivalencePodFunc := func(pod *v1.Pod) interface{} { return nil }
ecache := NewEquivalenceCache(fakeGetEquivalencePodFunc)
for _, test := range tests {
// set cached item to equivalence cache
ecache.UpdateCachedPredicateItem(
test.podName,
test.nodeName,
testPredicate,
test.cachedItem.fit,
test.cachedItem.reasons,
test.equivalenceHashForUpdatePredicate,
true,
)
}
for _, test := range tests {
// invalidate cached predicate for all nodes
ecache.InvalidateAllCachedPredicateItemOfNode(test.nodeName)
if _, exist := ecache.algorithmCache[test.nodeName]; exist {
t.Errorf("Failed: cached item for node: %v should be invalidated", test.nodeName)
break
}
}
}

293
vendor/k8s.io/kubernetes/pkg/scheduler/core/extender.go generated vendored Normal file
View File

@ -0,0 +1,293 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package core
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"k8s.io/api/core/v1"
utilnet "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/sets"
restclient "k8s.io/client-go/rest"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
)
const (
// DefaultExtenderTimeout defines the default extender timeout in second.
DefaultExtenderTimeout = 5 * time.Second
)
// HTTPExtender implements the algorithm.SchedulerExtender interface.
type HTTPExtender struct {
extenderURL string
filterVerb string
prioritizeVerb string
bindVerb string
weight int
client *http.Client
nodeCacheCapable bool
managedResources sets.String
}
func makeTransport(config *schedulerapi.ExtenderConfig) (http.RoundTripper, error) {
var cfg restclient.Config
if config.TLSConfig != nil {
cfg.TLSClientConfig = *config.TLSConfig
}
if config.EnableHTTPS {
hasCA := len(cfg.CAFile) > 0 || len(cfg.CAData) > 0
if !hasCA {
cfg.Insecure = true
}
}
tlsConfig, err := restclient.TLSConfigFor(&cfg)
if err != nil {
return nil, err
}
if tlsConfig != nil {
return utilnet.SetTransportDefaults(&http.Transport{
TLSClientConfig: tlsConfig,
}), nil
}
return utilnet.SetTransportDefaults(&http.Transport{}), nil
}
// NewHTTPExtender creates an HTTPExtender object.
func NewHTTPExtender(config *schedulerapi.ExtenderConfig) (algorithm.SchedulerExtender, error) {
if config.HTTPTimeout.Nanoseconds() == 0 {
config.HTTPTimeout = time.Duration(DefaultExtenderTimeout)
}
transport, err := makeTransport(config)
if err != nil {
return nil, err
}
client := &http.Client{
Transport: transport,
Timeout: config.HTTPTimeout,
}
managedResources := sets.NewString()
for _, r := range config.ManagedResources {
managedResources.Insert(string(r.Name))
}
return &HTTPExtender{
extenderURL: config.URLPrefix,
filterVerb: config.FilterVerb,
prioritizeVerb: config.PrioritizeVerb,
bindVerb: config.BindVerb,
weight: config.Weight,
client: client,
nodeCacheCapable: config.NodeCacheCapable,
managedResources: managedResources,
}, nil
}
// Filter based on extender implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
func (h *HTTPExtender) Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo) ([]*v1.Node, schedulerapi.FailedNodesMap, error) {
var (
result schedulerapi.ExtenderFilterResult
nodeList *v1.NodeList
nodeNames *[]string
nodeResult []*v1.Node
args *schedulerapi.ExtenderArgs
)
if h.filterVerb == "" {
return nodes, schedulerapi.FailedNodesMap{}, nil
}
if h.nodeCacheCapable {
nodeNameSlice := make([]string, 0, len(nodes))
for _, node := range nodes {
nodeNameSlice = append(nodeNameSlice, node.Name)
}
nodeNames = &nodeNameSlice
} else {
nodeList = &v1.NodeList{}
for _, node := range nodes {
nodeList.Items = append(nodeList.Items, *node)
}
}
args = &schedulerapi.ExtenderArgs{
Pod: *pod,
Nodes: nodeList,
NodeNames: nodeNames,
}
if err := h.send(h.filterVerb, args, &result); err != nil {
return nil, nil, err
}
if result.Error != "" {
return nil, nil, fmt.Errorf(result.Error)
}
if h.nodeCacheCapable && result.NodeNames != nil {
nodeResult = make([]*v1.Node, 0, len(*result.NodeNames))
for i := range *result.NodeNames {
nodeResult = append(nodeResult, nodeNameToInfo[(*result.NodeNames)[i]].Node())
}
} else if result.Nodes != nil {
nodeResult = make([]*v1.Node, 0, len(result.Nodes.Items))
for i := range result.Nodes.Items {
nodeResult = append(nodeResult, &result.Nodes.Items[i])
}
}
return nodeResult, result.FailedNodes, nil
}
// Prioritize based on extender implemented priority functions. Weight*priority is added
// up for each such priority function. The returned score is added to the score computed
// by Kubernetes scheduler. The total score is used to do the host selection.
func (h *HTTPExtender) Prioritize(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, int, error) {
var (
result schedulerapi.HostPriorityList
nodeList *v1.NodeList
nodeNames *[]string
args *schedulerapi.ExtenderArgs
)
if h.prioritizeVerb == "" {
result := schedulerapi.HostPriorityList{}
for _, node := range nodes {
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: 0})
}
return &result, 0, nil
}
if h.nodeCacheCapable {
nodeNameSlice := make([]string, 0, len(nodes))
for _, node := range nodes {
nodeNameSlice = append(nodeNameSlice, node.Name)
}
nodeNames = &nodeNameSlice
} else {
nodeList = &v1.NodeList{}
for _, node := range nodes {
nodeList.Items = append(nodeList.Items, *node)
}
}
args = &schedulerapi.ExtenderArgs{
Pod: *pod,
Nodes: nodeList,
NodeNames: nodeNames,
}
if err := h.send(h.prioritizeVerb, args, &result); err != nil {
return nil, 0, err
}
return &result, h.weight, nil
}
// Bind delegates the action of binding a pod to a node to the extender.
func (h *HTTPExtender) Bind(binding *v1.Binding) error {
var result schedulerapi.ExtenderBindingResult
if !h.IsBinder() {
// This shouldn't happen as this extender wouldn't have become a Binder.
return fmt.Errorf("Unexpected empty bindVerb in extender")
}
req := &schedulerapi.ExtenderBindingArgs{
PodName: binding.Name,
PodNamespace: binding.Namespace,
PodUID: binding.UID,
Node: binding.Target.Name,
}
if err := h.send(h.bindVerb, &req, &result); err != nil {
return err
}
if result.Error != "" {
return fmt.Errorf(result.Error)
}
return nil
}
// IsBinder returns whether this extender is configured for the Bind method.
func (h *HTTPExtender) IsBinder() bool {
return h.bindVerb != ""
}
// Helper function to send messages to the extender
func (h *HTTPExtender) send(action string, args interface{}, result interface{}) error {
out, err := json.Marshal(args)
if err != nil {
return err
}
url := strings.TrimRight(h.extenderURL, "/") + "/" + action
req, err := http.NewRequest("POST", url, bytes.NewReader(out))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
resp, err := h.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("Failed %v with extender at URL %v, code %v", action, h.extenderURL, resp.StatusCode)
}
return json.NewDecoder(resp.Body).Decode(result)
}
// IsInterested returns true if at least one extended resource requested by
// this pod is managed by this extender.
func (h *HTTPExtender) IsInterested(pod *v1.Pod) bool {
if h.managedResources.Len() == 0 {
return true
}
if h.hasManagedResources(pod.Spec.Containers) {
return true
}
if h.hasManagedResources(pod.Spec.InitContainers) {
return true
}
return false
}
func (h *HTTPExtender) hasManagedResources(containers []v1.Container) bool {
for i := range containers {
container := &containers[i]
for resourceName := range container.Resources.Requests {
if h.managedResources.Has(string(resourceName)) {
return true
}
}
for resourceName := range container.Resources.Limits {
if h.managedResources.Has(string(resourceName)) {
return true
}
}
}
return false
}

View File

@ -0,0 +1,364 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package core
import (
"fmt"
"testing"
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
type fitPredicate func(pod *v1.Pod, node *v1.Node) (bool, error)
type priorityFunc func(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, error)
type priorityConfig struct {
function priorityFunc
weight int
}
func errorPredicateExtender(pod *v1.Pod, node *v1.Node) (bool, error) {
return false, fmt.Errorf("Some error")
}
func falsePredicateExtender(pod *v1.Pod, node *v1.Node) (bool, error) {
return false, nil
}
func truePredicateExtender(pod *v1.Pod, node *v1.Node) (bool, error) {
return true, nil
}
func machine1PredicateExtender(pod *v1.Pod, node *v1.Node) (bool, error) {
if node.Name == "machine1" {
return true, nil
}
return false, nil
}
func machine2PredicateExtender(pod *v1.Pod, node *v1.Node) (bool, error) {
if node.Name == "machine2" {
return true, nil
}
return false, nil
}
func errorPrioritizerExtender(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, error) {
return &schedulerapi.HostPriorityList{}, fmt.Errorf("Some error")
}
func machine1PrioritizerExtender(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, error) {
result := schedulerapi.HostPriorityList{}
for _, node := range nodes {
score := 1
if node.Name == "machine1" {
score = 10
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: score})
}
return &result, nil
}
func machine2PrioritizerExtender(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, error) {
result := schedulerapi.HostPriorityList{}
for _, node := range nodes {
score := 1
if node.Name == "machine2" {
score = 10
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: score})
}
return &result, nil
}
func machine2Prioritizer(_ *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
result := []schedulerapi.HostPriority{}
for _, node := range nodes {
score := 1
if node.Name == "machine2" {
score = 10
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: score})
}
return result, nil
}
type FakeExtender struct {
predicates []fitPredicate
prioritizers []priorityConfig
weight int
nodeCacheCapable bool
filteredNodes []*v1.Node
unInterested bool
}
func (f *FakeExtender) Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo) ([]*v1.Node, schedulerapi.FailedNodesMap, error) {
filtered := []*v1.Node{}
failedNodesMap := schedulerapi.FailedNodesMap{}
for _, node := range nodes {
fits := true
for _, predicate := range f.predicates {
fit, err := predicate(pod, node)
if err != nil {
return []*v1.Node{}, schedulerapi.FailedNodesMap{}, err
}
if !fit {
fits = false
break
}
}
if fits {
filtered = append(filtered, node)
} else {
failedNodesMap[node.Name] = "FakeExtender failed"
}
}
f.filteredNodes = filtered
if f.nodeCacheCapable {
return filtered, failedNodesMap, nil
}
return filtered, failedNodesMap, nil
}
func (f *FakeExtender) Prioritize(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, int, error) {
result := schedulerapi.HostPriorityList{}
combinedScores := map[string]int{}
for _, prioritizer := range f.prioritizers {
weight := prioritizer.weight
if weight == 0 {
continue
}
priorityFunc := prioritizer.function
prioritizedList, err := priorityFunc(pod, nodes)
if err != nil {
return &schedulerapi.HostPriorityList{}, 0, err
}
for _, hostEntry := range *prioritizedList {
combinedScores[hostEntry.Host] += hostEntry.Score * weight
}
}
for host, score := range combinedScores {
result = append(result, schedulerapi.HostPriority{Host: host, Score: score})
}
return &result, f.weight, nil
}
func (f *FakeExtender) Bind(binding *v1.Binding) error {
if len(f.filteredNodes) != 0 {
for _, node := range f.filteredNodes {
if node.Name == binding.Target.Name {
f.filteredNodes = nil
return nil
}
}
err := fmt.Errorf("Node %v not in filtered nodes %v", binding.Target.Name, f.filteredNodes)
f.filteredNodes = nil
return err
}
return nil
}
func (f *FakeExtender) IsBinder() bool {
return true
}
func (f *FakeExtender) IsInterested(pod *v1.Pod) bool {
return !f.unInterested
}
var _ algorithm.SchedulerExtender = &FakeExtender{}
func TestGenericSchedulerWithExtenders(t *testing.T) {
tests := []struct {
name string
predicates map[string]algorithm.FitPredicate
prioritizers []algorithm.PriorityConfig
extenders []FakeExtender
extenderPredicates []fitPredicate
extenderPrioritizers []priorityConfig
nodes []string
expectedHost string
expectsErr bool
}{
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
},
{
predicates: []fitPredicate{errorPredicateExtender},
},
},
nodes: []string{"machine1", "machine2"},
expectsErr: true,
name: "test 1",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
},
{
predicates: []fitPredicate{falsePredicateExtender},
},
},
nodes: []string{"machine1", "machine2"},
expectsErr: true,
name: "test 2",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
},
{
predicates: []fitPredicate{machine1PredicateExtender},
},
},
nodes: []string{"machine1", "machine2"},
expectedHost: "machine1",
name: "test 3",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{machine2PredicateExtender},
},
{
predicates: []fitPredicate{machine1PredicateExtender},
},
},
nodes: []string{"machine1", "machine2"},
expectsErr: true,
name: "test 4",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
prioritizers: []priorityConfig{{errorPrioritizerExtender, 10}},
weight: 1,
},
},
nodes: []string{"machine1"},
expectedHost: "machine1",
name: "test 5",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
prioritizers: []priorityConfig{{machine1PrioritizerExtender, 10}},
weight: 1,
},
{
predicates: []fitPredicate{truePredicateExtender},
prioritizers: []priorityConfig{{machine2PrioritizerExtender, 10}},
weight: 5,
},
},
nodes: []string{"machine1", "machine2"},
expectedHost: "machine2",
name: "test 6",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: machine2Prioritizer, Weight: 20}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
prioritizers: []priorityConfig{{machine1PrioritizerExtender, 10}},
weight: 1,
},
},
nodes: []string{"machine1", "machine2"},
expectedHost: "machine2", // machine2 has higher score
name: "test 7",
},
{
// Scheduler is expected to not send pod to extender in
// Filter/Prioritize phases if the extender is not interested in
// the pod.
//
// If scheduler sends the pod by mistake, the test will fail
// because of the errors from errorPredicateExtender and/or
// errorPrioritizerExtender.
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{errorPredicateExtender},
prioritizers: []priorityConfig{{errorPrioritizerExtender, 10}},
unInterested: true,
},
},
nodes: []string{"machine1", "machine2"},
expectsErr: false,
expectedHost: "machine2", // machine2 has higher score
name: "test 8",
},
}
for _, test := range tests {
extenders := []algorithm.SchedulerExtender{}
for ii := range test.extenders {
extenders = append(extenders, &test.extenders[ii])
}
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
for _, name := range test.nodes {
cache.AddNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: name}})
}
queue := NewSchedulingQueue()
scheduler := NewGenericScheduler(
cache, nil, queue, test.predicates, algorithm.EmptyPredicateMetadataProducer, test.prioritizers, algorithm.EmptyPriorityMetadataProducer, extenders, nil, schedulertesting.FakePersistentVolumeClaimLister{}, false)
podIgnored := &v1.Pod{}
machine, err := scheduler.Schedule(podIgnored, schedulertesting.FakeNodeLister(makeNodeList(test.nodes)))
if test.expectsErr {
if err == nil {
t.Errorf("Unexpected non-error for %s, machine %s", test.name, machine)
}
} else {
if err != nil {
t.Errorf("Unexpected error: %v", err)
continue
}
if test.expectedHost != machine {
t.Errorf("Failed : %s, Expected: %s, Saw: %s", test.name, test.expectedHost, machine)
}
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,699 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This file contains structures that implement scheduling queue types.
// Scheduling queues hold pods waiting to be scheduled. This file has two types
// of scheduling queue: 1) a FIFO, which is mostly the same as cache.FIFO, 2) a
// priority queue which has two sub queues. One sub-queue holds pods that are
// being considered for scheduling. This is called activeQ. Another queue holds
// pods that are already tried and are determined to be unschedulable. The latter
// is called unschedulableQ.
// FIFO is here for flag-gating purposes and allows us to use the traditional
// scheduling queue when util.PodPriorityEnabled() returns false.
package core
import (
"container/heap"
"fmt"
"sync"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/cache"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/util"
"github.com/golang/glog"
"reflect"
)
// SchedulingQueue is an interface for a queue to store pods waiting to be scheduled.
// The interface follows a pattern similar to cache.FIFO and cache.Heap and
// makes it easy to use those data structures as a SchedulingQueue.
type SchedulingQueue interface {
Add(pod *v1.Pod) error
AddIfNotPresent(pod *v1.Pod) error
AddUnschedulableIfNotPresent(pod *v1.Pod) error
Pop() (*v1.Pod, error)
Update(oldPod, newPod *v1.Pod) error
Delete(pod *v1.Pod) error
MoveAllToActiveQueue()
AssignedPodAdded(pod *v1.Pod)
AssignedPodUpdated(pod *v1.Pod)
WaitingPodsForNode(nodeName string) []*v1.Pod
}
// NewSchedulingQueue initializes a new scheduling queue. If pod priority is
// enabled a priority queue is returned. If it is disabled, a FIFO is returned.
func NewSchedulingQueue() SchedulingQueue {
if util.PodPriorityEnabled() {
return NewPriorityQueue()
}
return NewFIFO()
}
// FIFO is basically a simple wrapper around cache.FIFO to make it compatible
// with the SchedulingQueue interface.
type FIFO struct {
*cache.FIFO
}
var _ = SchedulingQueue(&FIFO{}) // Making sure that FIFO implements SchedulingQueue.
// Add adds a pod to the FIFO.
func (f *FIFO) Add(pod *v1.Pod) error {
return f.FIFO.Add(pod)
}
// AddIfNotPresent adds a pod to the FIFO if it is absent in the FIFO.
func (f *FIFO) AddIfNotPresent(pod *v1.Pod) error {
return f.FIFO.AddIfNotPresent(pod)
}
// AddUnschedulableIfNotPresent adds an unschedulable pod back to the queue. In
// FIFO it is added to the end of the queue.
func (f *FIFO) AddUnschedulableIfNotPresent(pod *v1.Pod) error {
return f.FIFO.AddIfNotPresent(pod)
}
// Update updates a pod in the FIFO.
func (f *FIFO) Update(oldPod, newPod *v1.Pod) error {
return f.FIFO.Update(newPod)
}
// Delete deletes a pod in the FIFO.
func (f *FIFO) Delete(pod *v1.Pod) error {
return f.FIFO.Delete(pod)
}
// Pop removes the head of FIFO and returns it.
// This is just a copy/paste of cache.Pop(queue Queue) from fifo.go that scheduler
// has always been using. There is a comment in that file saying that this method
// shouldn't be used in production code, but scheduler has always been using it.
// This function does minimal error checking.
func (f *FIFO) Pop() (*v1.Pod, error) {
var result interface{}
f.FIFO.Pop(func(obj interface{}) error {
result = obj
return nil
})
return result.(*v1.Pod), nil
}
// FIFO does not need to react to events, as all pods are always in the active
// scheduling queue anyway.
// AssignedPodAdded does nothing here.
func (f *FIFO) AssignedPodAdded(pod *v1.Pod) {}
// AssignedPodUpdated does nothing here.
func (f *FIFO) AssignedPodUpdated(pod *v1.Pod) {}
// MoveAllToActiveQueue does nothing in FIFO as all pods are always in the active queue.
func (f *FIFO) MoveAllToActiveQueue() {}
// WaitingPodsForNode returns pods that are nominated to run on the given node,
// but FIFO does not support it.
func (f *FIFO) WaitingPodsForNode(nodeName string) []*v1.Pod {
return nil
}
// NewFIFO creates a FIFO object.
func NewFIFO() *FIFO {
return &FIFO{FIFO: cache.NewFIFO(cache.MetaNamespaceKeyFunc)}
}
// NominatedNodeName returns nominated node name of a Pod.
func NominatedNodeName(pod *v1.Pod) string {
return pod.Status.NominatedNodeName
}
// PriorityQueue implements a scheduling queue. It is an alternative to FIFO.
// The head of PriorityQueue is the highest priority pending pod. This structure
// has two sub queues. One sub-queue holds pods that are being considered for
// scheduling. This is called activeQ and is a Heap. Another queue holds
// pods that are already tried and are determined to be unschedulable. The latter
// is called unschedulableQ.
type PriorityQueue struct {
lock sync.RWMutex
cond sync.Cond
// activeQ is heap structure that scheduler actively looks at to find pods to
// schedule. Head of heap is the highest priority pod.
activeQ *Heap
// unschedulableQ holds pods that have been tried and determined unschedulable.
unschedulableQ *UnschedulablePodsMap
// nominatedPods is a map keyed by a node name and the value is a list of
// pods which are nominated to run on the node. These are pods which can be in
// the activeQ or unschedulableQ.
nominatedPods map[string][]*v1.Pod
// receivedMoveRequest is set to true whenever we receive a request to move a
// pod from the unschedulableQ to the activeQ, and is set to false, when we pop
// a pod from the activeQ. It indicates if we received a move request when a
// pod was in flight (we were trying to schedule it). In such a case, we put
// the pod back into the activeQ if it is determined unschedulable.
receivedMoveRequest bool
}
// Making sure that PriorityQueue implements SchedulingQueue.
var _ = SchedulingQueue(&PriorityQueue{})
// NewPriorityQueue creates a PriorityQueue object.
func NewPriorityQueue() *PriorityQueue {
pq := &PriorityQueue{
activeQ: newHeap(cache.MetaNamespaceKeyFunc, util.HigherPriorityPod),
unschedulableQ: newUnschedulablePodsMap(),
nominatedPods: map[string][]*v1.Pod{},
}
pq.cond.L = &pq.lock
return pq
}
// addNominatedPodIfNeeded adds a pod to nominatedPods if it has a NominatedNodeName and it does not
// already exist in the map. Adding an existing pod is not going to update the pod.
func (p *PriorityQueue) addNominatedPodIfNeeded(pod *v1.Pod) {
nnn := NominatedNodeName(pod)
if len(nnn) > 0 {
for _, np := range p.nominatedPods[nnn] {
if np.UID == pod.UID {
glog.Errorf("Pod %v/%v already exists in the nominated map!", pod.Namespace, pod.Name)
return
}
}
p.nominatedPods[nnn] = append(p.nominatedPods[nnn], pod)
}
}
// deleteNominatedPodIfExists deletes a pod from the nominatedPods.
func (p *PriorityQueue) deleteNominatedPodIfExists(pod *v1.Pod) {
nnn := NominatedNodeName(pod)
if len(nnn) > 0 {
for i, np := range p.nominatedPods[nnn] {
if np.UID == pod.UID {
p.nominatedPods[nnn] = append(p.nominatedPods[nnn][:i], p.nominatedPods[nnn][i+1:]...)
if len(p.nominatedPods[nnn]) == 0 {
delete(p.nominatedPods, nnn)
}
break
}
}
}
}
// updateNominatedPod updates a pod in the nominatedPods.
func (p *PriorityQueue) updateNominatedPod(oldPod, newPod *v1.Pod) {
// Even if the nominated node name of the Pod is not changed, we must delete and add it again
// to ensure that its pointer is updated.
p.deleteNominatedPodIfExists(oldPod)
p.addNominatedPodIfNeeded(newPod)
}
// Add adds a pod to the active queue. It should be called only when a new pod
// is added so there is no chance the pod is already in either queue.
func (p *PriorityQueue) Add(pod *v1.Pod) error {
p.lock.Lock()
defer p.lock.Unlock()
err := p.activeQ.Add(pod)
if err != nil {
glog.Errorf("Error adding pod %v to the scheduling queue: %v", pod.Name, err)
} else {
if p.unschedulableQ.get(pod) != nil {
glog.Errorf("Error: pod %v is already in the unschedulable queue.", pod.Name)
p.deleteNominatedPodIfExists(pod)
p.unschedulableQ.delete(pod)
}
p.addNominatedPodIfNeeded(pod)
p.cond.Broadcast()
}
return err
}
// AddIfNotPresent adds a pod to the active queue if it is not present in any of
// the two queues. If it is present in any, it doesn't do any thing.
func (p *PriorityQueue) AddIfNotPresent(pod *v1.Pod) error {
p.lock.Lock()
defer p.lock.Unlock()
if p.unschedulableQ.get(pod) != nil {
return nil
}
if _, exists, _ := p.activeQ.Get(pod); exists {
return nil
}
err := p.activeQ.Add(pod)
if err != nil {
glog.Errorf("Error adding pod %v to the scheduling queue: %v", pod.Name, err)
} else {
p.addNominatedPodIfNeeded(pod)
p.cond.Broadcast()
}
return err
}
func isPodUnschedulable(pod *v1.Pod) bool {
_, cond := podutil.GetPodCondition(&pod.Status, v1.PodScheduled)
return cond != nil && cond.Status == v1.ConditionFalse && cond.Reason == v1.PodReasonUnschedulable
}
// AddUnschedulableIfNotPresent does nothing if the pod is present in either
// queue. Otherwise it adds the pod to the unschedulable queue if
// p.receivedMoveRequest is false, and to the activeQ if p.receivedMoveRequest is true.
func (p *PriorityQueue) AddUnschedulableIfNotPresent(pod *v1.Pod) error {
p.lock.Lock()
defer p.lock.Unlock()
if p.unschedulableQ.get(pod) != nil {
return fmt.Errorf("pod is already present in unschedulableQ")
}
if _, exists, _ := p.activeQ.Get(pod); exists {
return fmt.Errorf("pod is already present in the activeQ")
}
if !p.receivedMoveRequest && isPodUnschedulable(pod) {
p.unschedulableQ.addOrUpdate(pod)
p.addNominatedPodIfNeeded(pod)
return nil
}
err := p.activeQ.Add(pod)
if err == nil {
p.addNominatedPodIfNeeded(pod)
p.cond.Broadcast()
}
return err
}
// Pop removes the head of the active queue and returns it. It blocks if the
// activeQ is empty and waits until a new item is added to the queue. It also
// clears receivedMoveRequest to mark the beginning of a new scheduling cycle.
func (p *PriorityQueue) Pop() (*v1.Pod, error) {
p.lock.Lock()
defer p.lock.Unlock()
for len(p.activeQ.data.queue) == 0 {
p.cond.Wait()
}
obj, err := p.activeQ.Pop()
if err != nil {
return nil, err
}
pod := obj.(*v1.Pod)
p.deleteNominatedPodIfExists(pod)
p.receivedMoveRequest = false
return pod, err
}
// isPodUpdated checks if the pod is updated in a way that it may have become
// schedulable. It drops status of the pod and compares it with old version.
func isPodUpdated(oldPod, newPod *v1.Pod) bool {
strip := func(pod *v1.Pod) *v1.Pod {
p := pod.DeepCopy()
p.ResourceVersion = ""
p.Generation = 0
p.Status = v1.PodStatus{}
return p
}
return !reflect.DeepEqual(strip(oldPod), strip(newPod))
}
// Update updates a pod in the active queue if present. Otherwise, it removes
// the item from the unschedulable queue and adds the updated one to the active
// queue.
func (p *PriorityQueue) Update(oldPod, newPod *v1.Pod) error {
p.lock.Lock()
defer p.lock.Unlock()
// If the pod is already in the active queue, just update it there.
if _, exists, _ := p.activeQ.Get(newPod); exists {
p.updateNominatedPod(oldPod, newPod)
err := p.activeQ.Update(newPod)
return err
}
// If the pod is in the unschedulable queue, updating it may make it schedulable.
if usPod := p.unschedulableQ.get(newPod); usPod != nil {
p.updateNominatedPod(oldPod, newPod)
if isPodUpdated(oldPod, newPod) {
p.unschedulableQ.delete(usPod)
err := p.activeQ.Add(newPod)
if err == nil {
p.cond.Broadcast()
}
return err
}
p.unschedulableQ.addOrUpdate(newPod)
return nil
}
// If pod is not in any of the two queue, we put it in the active queue.
err := p.activeQ.Add(newPod)
if err == nil {
p.addNominatedPodIfNeeded(newPod)
p.cond.Broadcast()
}
return err
}
// Delete deletes the item from either of the two queues. It assumes the pod is
// only in one queue.
func (p *PriorityQueue) Delete(pod *v1.Pod) error {
p.lock.Lock()
defer p.lock.Unlock()
p.deleteNominatedPodIfExists(pod)
err := p.activeQ.Delete(pod)
if err != nil { // The item was probably not found in the activeQ.
p.unschedulableQ.delete(pod)
}
return nil
}
// AssignedPodAdded is called when a bound pod is added. Creation of this pod
// may make pending pods with matching affinity terms schedulable.
func (p *PriorityQueue) AssignedPodAdded(pod *v1.Pod) {
p.movePodsToActiveQueue(p.getUnschedulablePodsWithMatchingAffinityTerm(pod))
}
// AssignedPodUpdated is called when a bound pod is updated. Change of labels
// may make pending pods with matching affinity terms schedulable.
func (p *PriorityQueue) AssignedPodUpdated(pod *v1.Pod) {
p.movePodsToActiveQueue(p.getUnschedulablePodsWithMatchingAffinityTerm(pod))
}
// MoveAllToActiveQueue moves all pods from unschedulableQ to activeQ. This
// function adds all pods and then signals the condition variable to ensure that
// if Pop() is waiting for an item, it receives it after all the pods are in the
// queue and the head is the highest priority pod.
// TODO(bsalamat): We should add a back-off mechanism here so that a high priority
// pod which is unschedulable does not go to the head of the queue frequently. For
// example in a cluster where a lot of pods being deleted, such a high priority
// pod can deprive other pods from getting scheduled.
func (p *PriorityQueue) MoveAllToActiveQueue() {
p.lock.Lock()
defer p.lock.Unlock()
for _, pod := range p.unschedulableQ.pods {
if err := p.activeQ.Add(pod); err != nil {
glog.Errorf("Error adding pod %v to the scheduling queue: %v", pod.Name, err)
}
}
p.unschedulableQ.clear()
p.receivedMoveRequest = true
p.cond.Broadcast()
}
func (p *PriorityQueue) movePodsToActiveQueue(pods []*v1.Pod) {
p.lock.Lock()
defer p.lock.Unlock()
for _, pod := range pods {
if err := p.activeQ.Add(pod); err == nil {
p.unschedulableQ.delete(pod)
} else {
glog.Errorf("Error adding pod %v to the scheduling queue: %v", pod.Name, err)
}
}
p.receivedMoveRequest = true
p.cond.Broadcast()
}
// getUnschedulablePodsWithMatchingAffinityTerm returns unschedulable pods which have
// any affinity term that matches "pod".
func (p *PriorityQueue) getUnschedulablePodsWithMatchingAffinityTerm(pod *v1.Pod) []*v1.Pod {
p.lock.RLock()
defer p.lock.RUnlock()
var podsToMove []*v1.Pod
for _, up := range p.unschedulableQ.pods {
affinity := up.Spec.Affinity
if affinity != nil && affinity.PodAffinity != nil {
terms := predicates.GetPodAffinityTerms(affinity.PodAffinity)
for _, term := range terms {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(up, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
glog.Errorf("Error getting label selectors for pod: %v.", up.Name)
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector) {
podsToMove = append(podsToMove, up)
break
}
}
}
}
return podsToMove
}
// WaitingPodsForNode returns pods that are nominated to run on the given node,
// but they are waiting for other pods to be removed from the node before they
// can be actually scheduled.
func (p *PriorityQueue) WaitingPodsForNode(nodeName string) []*v1.Pod {
p.lock.RLock()
defer p.lock.RUnlock()
if list, ok := p.nominatedPods[nodeName]; ok {
return list
}
return nil
}
// UnschedulablePodsMap holds pods that cannot be scheduled. This data structure
// is used to implement unschedulableQ.
type UnschedulablePodsMap struct {
// pods is a map key by a pod's full-name and the value is a pointer to the pod.
pods map[string]*v1.Pod
keyFunc func(*v1.Pod) string
}
// Add adds a pod to the unschedulable pods.
func (u *UnschedulablePodsMap) addOrUpdate(pod *v1.Pod) {
u.pods[u.keyFunc(pod)] = pod
}
// Delete deletes a pod from the unschedulable pods.
func (u *UnschedulablePodsMap) delete(pod *v1.Pod) {
delete(u.pods, u.keyFunc(pod))
}
// Get returns the pod if a pod with the same key as the key of the given "pod"
// is found in the map. It returns nil otherwise.
func (u *UnschedulablePodsMap) get(pod *v1.Pod) *v1.Pod {
podKey := u.keyFunc(pod)
if p, exists := u.pods[podKey]; exists {
return p
}
return nil
}
// Clear removes all the entries from the unschedulable maps.
func (u *UnschedulablePodsMap) clear() {
u.pods = make(map[string]*v1.Pod)
}
// newUnschedulablePodsMap initializes a new object of UnschedulablePodsMap.
func newUnschedulablePodsMap() *UnschedulablePodsMap {
return &UnschedulablePodsMap{
pods: make(map[string]*v1.Pod),
keyFunc: util.GetPodFullName,
}
}
// Below is the implementation of the a heap. The logic is pretty much the same
// as cache.heap, however, this heap does not perform synchronization. It leaves
// synchronization to the SchedulingQueue.
// LessFunc is a function type to compare two objects.
type LessFunc func(interface{}, interface{}) bool
// KeyFunc is a function type to get the key from an object.
type KeyFunc func(obj interface{}) (string, error)
type heapItem struct {
obj interface{} // The object which is stored in the heap.
index int // The index of the object's key in the Heap.queue.
}
type itemKeyValue struct {
key string
obj interface{}
}
// heapData is an internal struct that implements the standard heap interface
// and keeps the data stored in the heap.
type heapData struct {
// items is a map from key of the objects to the objects and their index.
// We depend on the property that items in the map are in the queue and vice versa.
items map[string]*heapItem
// queue implements a heap data structure and keeps the order of elements
// according to the heap invariant. The queue keeps the keys of objects stored
// in "items".
queue []string
// keyFunc is used to make the key used for queued item insertion and retrieval, and
// should be deterministic.
keyFunc KeyFunc
// lessFunc is used to compare two objects in the heap.
lessFunc LessFunc
}
var (
_ = heap.Interface(&heapData{}) // heapData is a standard heap
)
// Less compares two objects and returns true if the first one should go
// in front of the second one in the heap.
func (h *heapData) Less(i, j int) bool {
if i > len(h.queue) || j > len(h.queue) {
return false
}
itemi, ok := h.items[h.queue[i]]
if !ok {
return false
}
itemj, ok := h.items[h.queue[j]]
if !ok {
return false
}
return h.lessFunc(itemi.obj, itemj.obj)
}
// Len returns the number of items in the Heap.
func (h *heapData) Len() int { return len(h.queue) }
// Swap implements swapping of two elements in the heap. This is a part of standard
// heap interface and should never be called directly.
func (h *heapData) Swap(i, j int) {
h.queue[i], h.queue[j] = h.queue[j], h.queue[i]
item := h.items[h.queue[i]]
item.index = i
item = h.items[h.queue[j]]
item.index = j
}
// Push is supposed to be called by heap.Push only.
func (h *heapData) Push(kv interface{}) {
keyValue := kv.(*itemKeyValue)
n := len(h.queue)
h.items[keyValue.key] = &heapItem{keyValue.obj, n}
h.queue = append(h.queue, keyValue.key)
}
// Pop is supposed to be called by heap.Pop only.
func (h *heapData) Pop() interface{} {
key := h.queue[len(h.queue)-1]
h.queue = h.queue[0 : len(h.queue)-1]
item, ok := h.items[key]
if !ok {
// This is an error
return nil
}
delete(h.items, key)
return item.obj
}
// Heap is a producer/consumer queue that implements a heap data structure.
// It can be used to implement priority queues and similar data structures.
type Heap struct {
// data stores objects and has a queue that keeps their ordering according
// to the heap invariant.
data *heapData
}
// Add inserts an item, and puts it in the queue. The item is updated if it
// already exists.
func (h *Heap) Add(obj interface{}) error {
key, err := h.data.keyFunc(obj)
if err != nil {
return cache.KeyError{Obj: obj, Err: err}
}
if _, exists := h.data.items[key]; exists {
h.data.items[key].obj = obj
heap.Fix(h.data, h.data.items[key].index)
} else {
heap.Push(h.data, &itemKeyValue{key, obj})
}
return nil
}
// AddIfNotPresent inserts an item, and puts it in the queue. If an item with
// the key is present in the map, no changes is made to the item.
func (h *Heap) AddIfNotPresent(obj interface{}) error {
key, err := h.data.keyFunc(obj)
if err != nil {
return cache.KeyError{Obj: obj, Err: err}
}
if _, exists := h.data.items[key]; !exists {
heap.Push(h.data, &itemKeyValue{key, obj})
}
return nil
}
// Update is the same as Add in this implementation. When the item does not
// exist, it is added.
func (h *Heap) Update(obj interface{}) error {
return h.Add(obj)
}
// Delete removes an item.
func (h *Heap) Delete(obj interface{}) error {
key, err := h.data.keyFunc(obj)
if err != nil {
return cache.KeyError{Obj: obj, Err: err}
}
if item, ok := h.data.items[key]; ok {
heap.Remove(h.data, item.index)
return nil
}
return fmt.Errorf("object not found")
}
// Pop returns the head of the heap.
func (h *Heap) Pop() (interface{}, error) {
obj := heap.Pop(h.data)
if obj != nil {
return obj, nil
}
return nil, fmt.Errorf("object was removed from heap data")
}
// Get returns the requested item, or sets exists=false.
func (h *Heap) Get(obj interface{}) (interface{}, bool, error) {
key, err := h.data.keyFunc(obj)
if err != nil {
return nil, false, cache.KeyError{Obj: obj, Err: err}
}
return h.GetByKey(key)
}
// GetByKey returns the requested item, or sets exists=false.
func (h *Heap) GetByKey(key string) (interface{}, bool, error) {
item, exists := h.data.items[key]
if !exists {
return nil, false, nil
}
return item.obj, true, nil
}
// List returns a list of all the items.
func (h *Heap) List() []interface{} {
list := make([]interface{}, 0, len(h.data.items))
for _, item := range h.data.items {
list = append(list, item.obj)
}
return list
}
// newHeap returns a Heap which can be used to queue up items to process.
func newHeap(keyFn KeyFunc, lessFn LessFunc) *Heap {
return &Heap{
data: &heapData{
items: map[string]*heapItem{},
queue: []string{},
keyFunc: keyFn,
lessFunc: lessFn,
},
}
}

View File

@ -0,0 +1,469 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package core
import (
"reflect"
"sync"
"testing"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/scheduler/util"
)
var mediumPriority = (lowPriority + highPriority) / 2
var highPriorityPod, highPriNominatedPod, medPriorityPod, unschedulablePod = v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "hpp",
Namespace: "ns1",
UID: "hppns1",
},
Spec: v1.PodSpec{
Priority: &highPriority,
},
},
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "hpp",
Namespace: "ns1",
UID: "hppns1",
},
Spec: v1.PodSpec{
Priority: &highPriority,
},
Status: v1.PodStatus{
NominatedNodeName: "node1",
},
},
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "mpp",
Namespace: "ns2",
UID: "mppns2",
Annotations: map[string]string{
"annot2": "val2",
},
},
Spec: v1.PodSpec{
Priority: &mediumPriority,
},
Status: v1.PodStatus{
NominatedNodeName: "node1",
},
},
v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "up",
Namespace: "ns1",
UID: "upns1",
Annotations: map[string]string{
"annot2": "val2",
},
},
Spec: v1.PodSpec{
Priority: &lowPriority,
},
Status: v1.PodStatus{
Conditions: []v1.PodCondition{
{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: v1.PodReasonUnschedulable,
},
},
NominatedNodeName: "node1",
},
}
func TestPriorityQueue_Add(t *testing.T) {
q := NewPriorityQueue()
q.Add(&medPriorityPod)
q.Add(&unschedulablePod)
q.Add(&highPriorityPod)
expectedNominatedPods := map[string][]*v1.Pod{
"node1": {&medPriorityPod, &unschedulablePod},
}
if !reflect.DeepEqual(q.nominatedPods, expectedNominatedPods) {
t.Errorf("Unexpected nominated map after adding pods. Expected: %v, got: %v", expectedNominatedPods, q.nominatedPods)
}
if p, err := q.Pop(); err != nil || p != &highPriorityPod {
t.Errorf("Expected: %v after Pop, but got: %v", highPriorityPod.Name, p.Name)
}
if p, err := q.Pop(); err != nil || p != &medPriorityPod {
t.Errorf("Expected: %v after Pop, but got: %v", medPriorityPod.Name, p.Name)
}
if p, err := q.Pop(); err != nil || p != &unschedulablePod {
t.Errorf("Expected: %v after Pop, but got: %v", unschedulablePod.Name, p.Name)
}
if len(q.nominatedPods) != 0 {
t.Errorf("Expected nomindatePods to be empty: %v", q.nominatedPods)
}
}
func TestPriorityQueue_AddIfNotPresent(t *testing.T) {
q := NewPriorityQueue()
q.unschedulableQ.addOrUpdate(&highPriNominatedPod)
q.AddIfNotPresent(&highPriNominatedPod) // Must not add anything.
q.AddIfNotPresent(&medPriorityPod)
q.AddIfNotPresent(&unschedulablePod)
expectedNominatedPods := map[string][]*v1.Pod{
"node1": {&medPriorityPod, &unschedulablePod},
}
if !reflect.DeepEqual(q.nominatedPods, expectedNominatedPods) {
t.Errorf("Unexpected nominated map after adding pods. Expected: %v, got: %v", expectedNominatedPods, q.nominatedPods)
}
if p, err := q.Pop(); err != nil || p != &medPriorityPod {
t.Errorf("Expected: %v after Pop, but got: %v", medPriorityPod.Name, p.Name)
}
if p, err := q.Pop(); err != nil || p != &unschedulablePod {
t.Errorf("Expected: %v after Pop, but got: %v", unschedulablePod.Name, p.Name)
}
if len(q.nominatedPods) != 0 {
t.Errorf("Expected nomindatePods to be empty: %v", q.nominatedPods)
}
if q.unschedulableQ.get(&highPriNominatedPod) != &highPriNominatedPod {
t.Errorf("Pod %v was not found in the unschedulableQ.", highPriNominatedPod.Name)
}
}
func TestPriorityQueue_AddUnschedulableIfNotPresent(t *testing.T) {
q := NewPriorityQueue()
q.Add(&highPriNominatedPod)
q.AddUnschedulableIfNotPresent(&highPriNominatedPod) // Must not add anything.
q.AddUnschedulableIfNotPresent(&medPriorityPod) // This should go to activeQ.
q.AddUnschedulableIfNotPresent(&unschedulablePod)
expectedNominatedPods := map[string][]*v1.Pod{
"node1": {&highPriNominatedPod, &medPriorityPod, &unschedulablePod},
}
if !reflect.DeepEqual(q.nominatedPods, expectedNominatedPods) {
t.Errorf("Unexpected nominated map after adding pods. Expected: %v, got: %v", expectedNominatedPods, q.nominatedPods)
}
if p, err := q.Pop(); err != nil || p != &highPriNominatedPod {
t.Errorf("Expected: %v after Pop, but got: %v", highPriNominatedPod.Name, p.Name)
}
if p, err := q.Pop(); err != nil || p != &medPriorityPod {
t.Errorf("Expected: %v after Pop, but got: %v", medPriorityPod.Name, p.Name)
}
if len(q.nominatedPods) != 1 {
t.Errorf("Expected nomindatePods to have one element: %v", q.nominatedPods)
}
if q.unschedulableQ.get(&unschedulablePod) != &unschedulablePod {
t.Errorf("Pod %v was not found in the unschedulableQ.", unschedulablePod.Name)
}
}
func TestPriorityQueue_Pop(t *testing.T) {
q := NewPriorityQueue()
wg := sync.WaitGroup{}
wg.Add(1)
go func() {
defer wg.Done()
if p, err := q.Pop(); err != nil || p != &medPriorityPod {
t.Errorf("Expected: %v after Pop, but got: %v", medPriorityPod.Name, p.Name)
}
if len(q.nominatedPods) != 0 {
t.Errorf("Expected nomindatePods to be empty: %v", q.nominatedPods)
}
}()
q.Add(&medPriorityPod)
wg.Wait()
}
func TestPriorityQueue_Update(t *testing.T) {
q := NewPriorityQueue()
q.Update(nil, &highPriorityPod)
if _, exists, _ := q.activeQ.Get(&highPriorityPod); !exists {
t.Errorf("Expected %v to be added to activeQ.", highPriorityPod.Name)
}
if len(q.nominatedPods) != 0 {
t.Errorf("Expected nomindatePods to be empty: %v", q.nominatedPods)
}
// Update highPriorityPod and add a nominatedNodeName to it.
q.Update(&highPriorityPod, &highPriNominatedPod)
if q.activeQ.data.Len() != 1 {
t.Error("Expected only one item in activeQ.")
}
if len(q.nominatedPods) != 1 {
t.Errorf("Expected one item in nomindatePods map: %v", q.nominatedPods)
}
// Updating an unschedulable pod which is not in any of the two queues, should
// add the pod to activeQ.
q.Update(&unschedulablePod, &unschedulablePod)
if _, exists, _ := q.activeQ.Get(&unschedulablePod); !exists {
t.Errorf("Expected %v to be added to activeQ.", unschedulablePod.Name)
}
// Updating a pod that is already in activeQ, should not change it.
q.Update(&unschedulablePod, &unschedulablePod)
if len(q.unschedulableQ.pods) != 0 {
t.Error("Expected unschedulableQ to be empty.")
}
if _, exists, _ := q.activeQ.Get(&unschedulablePod); !exists {
t.Errorf("Expected: %v to be added to activeQ.", unschedulablePod.Name)
}
if p, err := q.Pop(); err != nil || p != &highPriNominatedPod {
t.Errorf("Expected: %v after Pop, but got: %v", highPriorityPod.Name, p.Name)
}
}
func TestPriorityQueue_Delete(t *testing.T) {
q := NewPriorityQueue()
q.Update(&highPriorityPod, &highPriNominatedPod)
q.Add(&unschedulablePod)
q.Delete(&highPriNominatedPod)
if _, exists, _ := q.activeQ.Get(&unschedulablePod); !exists {
t.Errorf("Expected %v to be in activeQ.", unschedulablePod.Name)
}
if _, exists, _ := q.activeQ.Get(&highPriNominatedPod); exists {
t.Errorf("Didn't expect %v to be in activeQ.", highPriorityPod.Name)
}
if len(q.nominatedPods) != 1 {
t.Errorf("Expected nomindatePods to have only 'unschedulablePod': %v", q.nominatedPods)
}
q.Delete(&unschedulablePod)
if len(q.nominatedPods) != 0 {
t.Errorf("Expected nomindatePods to be empty: %v", q.nominatedPods)
}
}
func TestPriorityQueue_MoveAllToActiveQueue(t *testing.T) {
q := NewPriorityQueue()
q.Add(&medPriorityPod)
q.unschedulableQ.addOrUpdate(&unschedulablePod)
q.unschedulableQ.addOrUpdate(&highPriorityPod)
q.MoveAllToActiveQueue()
if q.activeQ.data.Len() != 3 {
t.Error("Expected all items to be in activeQ.")
}
}
// TestPriorityQueue_AssignedPodAdded tests AssignedPodAdded. It checks that
// when a pod with pod affinity is in unschedulableQ and another pod with a
// matching label is added, the unschedulable pod is moved to activeQ.
func TestPriorityQueue_AssignedPodAdded(t *testing.T) {
affinityPod := unschedulablePod.DeepCopy()
affinityPod.Name = "afp"
affinityPod.Spec = v1.PodSpec{
Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan", "value2"},
},
},
},
TopologyKey: "region",
},
},
},
},
Priority: &mediumPriority,
}
labelPod := v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "lbp",
Namespace: affinityPod.Namespace,
Labels: map[string]string{"service": "securityscan"},
},
Spec: v1.PodSpec{NodeName: "machine1"},
}
q := NewPriorityQueue()
q.Add(&medPriorityPod)
// Add a couple of pods to the unschedulableQ.
q.unschedulableQ.addOrUpdate(&unschedulablePod)
q.unschedulableQ.addOrUpdate(affinityPod)
// Simulate addition of an assigned pod. The pod has matching labels for
// affinityPod. So, affinityPod should go to activeQ.
q.AssignedPodAdded(&labelPod)
if q.unschedulableQ.get(affinityPod) != nil {
t.Error("affinityPod is still in the unschedulableQ.")
}
if _, exists, _ := q.activeQ.Get(affinityPod); !exists {
t.Error("affinityPod is not moved to activeQ.")
}
// Check that the other pod is still in the unschedulableQ.
if q.unschedulableQ.get(&unschedulablePod) == nil {
t.Error("unschedulablePod is not in the unschedulableQ.")
}
}
func TestPriorityQueue_WaitingPodsForNode(t *testing.T) {
q := NewPriorityQueue()
q.Add(&medPriorityPod)
q.Add(&unschedulablePod)
q.Add(&highPriorityPod)
if p, err := q.Pop(); err != nil || p != &highPriorityPod {
t.Errorf("Expected: %v after Pop, but got: %v", highPriorityPod.Name, p.Name)
}
expectedList := []*v1.Pod{&medPriorityPod, &unschedulablePod}
if !reflect.DeepEqual(expectedList, q.WaitingPodsForNode("node1")) {
t.Error("Unexpected list of nominated Pods for node.")
}
if q.WaitingPodsForNode("node2") != nil {
t.Error("Expected list of nominated Pods for node2 to be empty.")
}
}
func TestUnschedulablePodsMap(t *testing.T) {
var pods = []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "p0",
Namespace: "ns1",
Annotations: map[string]string{
"annot1": "val1",
},
},
Status: v1.PodStatus{
NominatedNodeName: "node1",
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "p1",
Namespace: "ns1",
Annotations: map[string]string{
"annot": "val",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "p2",
Namespace: "ns2",
Annotations: map[string]string{
"annot2": "val2", "annot3": "val3",
},
},
Status: v1.PodStatus{
NominatedNodeName: "node3",
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "p3",
Namespace: "ns4",
},
Status: v1.PodStatus{
NominatedNodeName: "node1",
},
},
}
var updatedPods = make([]*v1.Pod, len(pods))
updatedPods[0] = pods[0].DeepCopy()
updatedPods[1] = pods[1].DeepCopy()
updatedPods[3] = pods[3].DeepCopy()
tests := []struct {
podsToAdd []*v1.Pod
expectedMapAfterAdd map[string]*v1.Pod
podsToUpdate []*v1.Pod
expectedMapAfterUpdate map[string]*v1.Pod
podsToDelete []*v1.Pod
expectedMapAfterDelete map[string]*v1.Pod
}{
{
podsToAdd: []*v1.Pod{pods[0], pods[1], pods[2], pods[3]},
expectedMapAfterAdd: map[string]*v1.Pod{
util.GetPodFullName(pods[0]): pods[0],
util.GetPodFullName(pods[1]): pods[1],
util.GetPodFullName(pods[2]): pods[2],
util.GetPodFullName(pods[3]): pods[3],
},
podsToUpdate: []*v1.Pod{updatedPods[0]},
expectedMapAfterUpdate: map[string]*v1.Pod{
util.GetPodFullName(pods[0]): updatedPods[0],
util.GetPodFullName(pods[1]): pods[1],
util.GetPodFullName(pods[2]): pods[2],
util.GetPodFullName(pods[3]): pods[3],
},
podsToDelete: []*v1.Pod{pods[0], pods[1]},
expectedMapAfterDelete: map[string]*v1.Pod{
util.GetPodFullName(pods[2]): pods[2],
util.GetPodFullName(pods[3]): pods[3],
},
},
{
podsToAdd: []*v1.Pod{pods[0], pods[3]},
expectedMapAfterAdd: map[string]*v1.Pod{
util.GetPodFullName(pods[0]): pods[0],
util.GetPodFullName(pods[3]): pods[3],
},
podsToUpdate: []*v1.Pod{updatedPods[3]},
expectedMapAfterUpdate: map[string]*v1.Pod{
util.GetPodFullName(pods[0]): pods[0],
util.GetPodFullName(pods[3]): updatedPods[3],
},
podsToDelete: []*v1.Pod{pods[0], pods[3]},
expectedMapAfterDelete: map[string]*v1.Pod{},
},
{
podsToAdd: []*v1.Pod{pods[1], pods[2]},
expectedMapAfterAdd: map[string]*v1.Pod{
util.GetPodFullName(pods[1]): pods[1],
util.GetPodFullName(pods[2]): pods[2],
},
podsToUpdate: []*v1.Pod{updatedPods[1]},
expectedMapAfterUpdate: map[string]*v1.Pod{
util.GetPodFullName(pods[1]): updatedPods[1],
util.GetPodFullName(pods[2]): pods[2],
},
podsToDelete: []*v1.Pod{pods[2], pods[3]},
expectedMapAfterDelete: map[string]*v1.Pod{
util.GetPodFullName(pods[1]): updatedPods[1],
},
},
}
for i, test := range tests {
upm := newUnschedulablePodsMap()
for _, p := range test.podsToAdd {
upm.addOrUpdate(p)
}
if !reflect.DeepEqual(upm.pods, test.expectedMapAfterAdd) {
t.Errorf("#%d: Unexpected map after adding pods. Expected: %v, got: %v",
i, test.expectedMapAfterAdd, upm.pods)
}
if len(test.podsToUpdate) > 0 {
for _, p := range test.podsToUpdate {
upm.addOrUpdate(p)
}
if !reflect.DeepEqual(upm.pods, test.expectedMapAfterUpdate) {
t.Errorf("#%d: Unexpected map after updating pods. Expected: %v, got: %v",
i, test.expectedMapAfterUpdate, upm.pods)
}
}
for _, p := range test.podsToDelete {
upm.delete(p)
}
if !reflect.DeepEqual(upm.pods, test.expectedMapAfterDelete) {
t.Errorf("#%d: Unexpected map after deleting pods. Expected: %v, got: %v",
i, test.expectedMapAfterDelete, upm.pods)
}
upm.clear()
if len(upm.pods) != 0 {
t.Errorf("Expected the map to be empty, but has %v elements.", len(upm.pods))
}
}
}

100
vendor/k8s.io/kubernetes/pkg/scheduler/factory/BUILD generated vendored Normal file
View File

@ -0,0 +1,100 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"factory.go",
"plugins.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/factory",
deps = [
"//pkg/api/v1/pod:go_default_library",
"//pkg/apis/core/helper:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/algorithm/priorities:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/api/validation:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/informers/apps/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/informers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/informers/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/informers/policy/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/informers/storage/v1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/listers/apps/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/listers/policy/v1beta1:go_default_library",
"//vendor/k8s.io/client-go/listers/storage/v1:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"factory_test.go",
"plugins_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/api/testing:go_default_library",
"//pkg/scheduler:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/api/latest:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/rest:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/util/testing:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

1407
vendor/k8s.io/kubernetes/pkg/scheduler/factory/factory.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,619 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import (
"errors"
"fmt"
"net/http"
"net/http/httptest"
"reflect"
"testing"
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
utiltesting "k8s.io/client-go/util/testing"
"k8s.io/kubernetes/pkg/api/legacyscheme"
apitesting "k8s.io/kubernetes/pkg/api/testing"
"k8s.io/kubernetes/pkg/scheduler"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
latestschedulerapi "k8s.io/kubernetes/pkg/scheduler/api/latest"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
"k8s.io/kubernetes/pkg/scheduler/util"
)
const enableEquivalenceCache = true
func TestCreate(t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
factory.Create()
}
// Test configures a scheduler from a policies defined in a file
// It combines some configurable predicate/priorities with some pre-defined ones
func TestCreateFromConfig(t *testing.T) {
var configData []byte
var policy schedulerapi.Policy
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
// Pre-register some predicate and priority functions
RegisterFitPredicate("PredicateOne", PredicateOne)
RegisterFitPredicate("PredicateTwo", PredicateTwo)
RegisterPriorityFunction("PriorityOne", PriorityOne, 1)
RegisterPriorityFunction("PriorityTwo", PriorityTwo, 1)
configData = []byte(`{
"kind" : "Policy",
"apiVersion" : "v1",
"predicates" : [
{"name" : "TestZoneAffinity", "argument" : {"serviceAffinity" : {"labels" : ["zone"]}}},
{"name" : "TestRequireZone", "argument" : {"labelsPresence" : {"labels" : ["zone"], "presence" : true}}},
{"name" : "PredicateOne"},
{"name" : "PredicateTwo"}
],
"priorities" : [
{"name" : "RackSpread", "weight" : 3, "argument" : {"serviceAntiAffinity" : {"label" : "rack"}}},
{"name" : "PriorityOne", "weight" : 2},
{"name" : "PriorityTwo", "weight" : 1} ]
}`)
if err := runtime.DecodeInto(latestschedulerapi.Codec, configData, &policy); err != nil {
t.Errorf("Invalid configuration: %v", err)
}
factory.CreateFromConfig(policy)
hpa := factory.GetHardPodAffinitySymmetricWeight()
if hpa != v1.DefaultHardPodAffinitySymmetricWeight {
t.Errorf("Wrong hardPodAffinitySymmetricWeight, ecpected: %d, got: %d", v1.DefaultHardPodAffinitySymmetricWeight, hpa)
}
}
func TestCreateFromConfigWithHardPodAffinitySymmetricWeight(t *testing.T) {
var configData []byte
var policy schedulerapi.Policy
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
// Pre-register some predicate and priority functions
RegisterFitPredicate("PredicateOne", PredicateOne)
RegisterFitPredicate("PredicateTwo", PredicateTwo)
RegisterPriorityFunction("PriorityOne", PriorityOne, 1)
RegisterPriorityFunction("PriorityTwo", PriorityTwo, 1)
configData = []byte(`{
"kind" : "Policy",
"apiVersion" : "v1",
"predicates" : [
{"name" : "TestZoneAffinity", "argument" : {"serviceAffinity" : {"labels" : ["zone"]}}},
{"name" : "TestRequireZone", "argument" : {"labelsPresence" : {"labels" : ["zone"], "presence" : true}}},
{"name" : "PredicateOne"},
{"name" : "PredicateTwo"}
],
"priorities" : [
{"name" : "RackSpread", "weight" : 3, "argument" : {"serviceAntiAffinity" : {"label" : "rack"}}},
{"name" : "PriorityOne", "weight" : 2},
{"name" : "PriorityTwo", "weight" : 1}
],
"hardPodAffinitySymmetricWeight" : 10
}`)
if err := runtime.DecodeInto(latestschedulerapi.Codec, configData, &policy); err != nil {
t.Errorf("Invalid configuration: %v", err)
}
factory.CreateFromConfig(policy)
hpa := factory.GetHardPodAffinitySymmetricWeight()
if hpa != 10 {
t.Errorf("Wrong hardPodAffinitySymmetricWeight, ecpected: %d, got: %d", 10, hpa)
}
}
func TestCreateFromEmptyConfig(t *testing.T) {
var configData []byte
var policy schedulerapi.Policy
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
configData = []byte(`{}`)
if err := runtime.DecodeInto(latestschedulerapi.Codec, configData, &policy); err != nil {
t.Errorf("Invalid configuration: %v", err)
}
factory.CreateFromConfig(policy)
}
// Test configures a scheduler from a policy that does not specify any
// predicate/priority.
// The predicate/priority from DefaultProvider will be used.
func TestCreateFromConfigWithUnspecifiedPredicatesOrPriorities(t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
RegisterFitPredicate("PredicateOne", PredicateOne)
RegisterPriorityFunction("PriorityOne", PriorityOne, 1)
RegisterAlgorithmProvider(DefaultProvider, sets.NewString("PredicateOne"), sets.NewString("PriorityOne"))
configData := []byte(`{
"kind" : "Policy",
"apiVersion" : "v1"
}`)
var policy schedulerapi.Policy
if err := runtime.DecodeInto(latestschedulerapi.Codec, configData, &policy); err != nil {
t.Fatalf("Invalid configuration: %v", err)
}
config, err := factory.CreateFromConfig(policy)
if err != nil {
t.Fatalf("Failed to create scheduler from configuration: %v", err)
}
if _, found := config.Algorithm.Predicates()["PredicateOne"]; !found {
t.Errorf("Expected predicate PredicateOne from %q", DefaultProvider)
}
if len(config.Algorithm.Prioritizers()) != 1 || config.Algorithm.Prioritizers()[0].Name != "PriorityOne" {
t.Errorf("Expected priority PriorityOne from %q", DefaultProvider)
}
}
// Test configures a scheduler from a policy that contains empty
// predicate/priority.
// Empty predicate/priority sets will be used.
func TestCreateFromConfigWithEmptyPredicatesOrPriorities(t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
RegisterFitPredicate("PredicateOne", PredicateOne)
RegisterPriorityFunction("PriorityOne", PriorityOne, 1)
RegisterAlgorithmProvider(DefaultProvider, sets.NewString("PredicateOne"), sets.NewString("PriorityOne"))
configData := []byte(`{
"kind" : "Policy",
"apiVersion" : "v1",
"predicates" : [],
"priorities" : []
}`)
var policy schedulerapi.Policy
if err := runtime.DecodeInto(latestschedulerapi.Codec, configData, &policy); err != nil {
t.Fatalf("Invalid configuration: %v", err)
}
config, err := factory.CreateFromConfig(policy)
if err != nil {
t.Fatalf("Failed to create scheduler from configuration: %v", err)
}
if len(config.Algorithm.Predicates()) != 0 {
t.Error("Expected empty predicate sets")
}
if len(config.Algorithm.Prioritizers()) != 0 {
t.Error("Expected empty priority sets")
}
}
func PredicateOne(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
return true, nil, nil
}
func PredicateTwo(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
return true, nil, nil
}
func PriorityOne(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
return []schedulerapi.HostPriority{}, nil
}
func PriorityTwo(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
return []schedulerapi.HostPriority{}, nil
}
func TestDefaultErrorFunc(t *testing.T) {
testPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "bar"},
Spec: apitesting.V1DeepEqualSafePodSpec(),
}
handler := utiltesting.FakeHandler{
StatusCode: 200,
ResponseBody: runtime.EncodeOrDie(util.Test.Codec(), testPod),
T: t,
}
mux := http.NewServeMux()
// FakeHandler mustn't be sent requests other than the one you want to test.
mux.Handle(util.Test.ResourcePath(string(v1.ResourcePods), "bar", "foo"), &handler)
server := httptest.NewServer(mux)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
queue := &core.FIFO{FIFO: cache.NewFIFO(cache.MetaNamespaceKeyFunc)}
podBackoff := util.CreatePodBackoff(1*time.Millisecond, 1*time.Second)
errFunc := factory.MakeDefaultErrorFunc(podBackoff, queue)
errFunc(testPod, nil)
for {
// This is a terrible way to do this but I plan on replacing this
// whole error handling system in the future. The test will time
// out if something doesn't work.
time.Sleep(10 * time.Millisecond)
got, exists, _ := queue.Get(testPod)
if !exists {
continue
}
handler.ValidateRequest(t, util.Test.ResourcePath(string(v1.ResourcePods), "bar", "foo"), "GET", nil)
if e, a := testPod, got; !reflect.DeepEqual(e, a) {
t.Errorf("Expected %v, got %v", e, a)
}
break
}
}
func TestNodeEnumerator(t *testing.T) {
testList := &v1.NodeList{
Items: []v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "foo"}},
{ObjectMeta: metav1.ObjectMeta{Name: "bar"}},
{ObjectMeta: metav1.ObjectMeta{Name: "baz"}},
},
}
me := nodeEnumerator{testList}
if e, a := 3, me.Len(); e != a {
t.Fatalf("expected %v, got %v", e, a)
}
for i := range testList.Items {
gotObj := me.Get(i)
if e, a := testList.Items[i].Name, gotObj.(*v1.Node).Name; e != a {
t.Errorf("Expected %v, got %v", e, a)
}
if e, a := &testList.Items[i], gotObj; !reflect.DeepEqual(e, a) {
t.Errorf("Expected %#v, got %v#", e, a)
}
}
}
func TestBind(t *testing.T) {
table := []struct {
binding *v1.Binding
}{
{binding: &v1.Binding{
ObjectMeta: metav1.ObjectMeta{
Namespace: metav1.NamespaceDefault,
Name: "foo",
},
Target: v1.ObjectReference{
Name: "foohost.kubernetes.mydomain.com",
},
}},
}
for _, item := range table {
handler := utiltesting.FakeHandler{
StatusCode: 200,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
b := binder{client}
if err := b.Bind(item.binding); err != nil {
t.Errorf("Unexpected error: %v", err)
continue
}
expectedBody := runtime.EncodeOrDie(util.Test.Codec(), item.binding)
handler.ValidateRequest(t,
util.Test.SubResourcePath(string(v1.ResourcePods), metav1.NamespaceDefault, "foo", "binding"),
"POST", &expectedBody)
}
}
func TestInvalidHardPodAffinitySymmetricWeight(t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
// factory of "default-scheduler"
factory := newConfigFactory(client, -1)
_, err := factory.Create()
if err == nil {
t.Errorf("expected err: invalid hardPodAffinitySymmetricWeight, got nothing")
}
}
func TestInvalidFactoryArgs(t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
testCases := []struct {
hardPodAffinitySymmetricWeight int32
expectErr string
}{
{
hardPodAffinitySymmetricWeight: -1,
expectErr: "invalid hardPodAffinitySymmetricWeight: -1, must be in the range 0-100",
},
{
hardPodAffinitySymmetricWeight: 101,
expectErr: "invalid hardPodAffinitySymmetricWeight: 101, must be in the range 0-100",
},
}
for _, test := range testCases {
factory := newConfigFactory(client, test.hardPodAffinitySymmetricWeight)
_, err := factory.Create()
if err == nil {
t.Errorf("expected err: %s, got nothing", test.expectErr)
}
}
}
func TestSkipPodUpdate(t *testing.T) {
for _, test := range []struct {
pod *v1.Pod
isAssumedPodFunc func(*v1.Pod) bool
getPodFunc func(*v1.Pod) *v1.Pod
expected bool
}{
// Non-assumed pod should not be skipped.
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-0",
},
},
isAssumedPodFunc: func(*v1.Pod) bool { return false },
getPodFunc: func(*v1.Pod) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-0",
},
}
},
expected: false,
},
// Pod update (with changes on ResourceVersion, Spec.NodeName and/or
// Annotations) for an already assumed pod should be skipped.
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-0",
Annotations: map[string]string{"a": "b"},
ResourceVersion: "0",
},
Spec: v1.PodSpec{
NodeName: "node-0",
},
},
isAssumedPodFunc: func(*v1.Pod) bool {
return true
},
getPodFunc: func(*v1.Pod) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-0",
Annotations: map[string]string{"c": "d"},
ResourceVersion: "1",
},
Spec: v1.PodSpec{
NodeName: "node-1",
},
}
},
expected: true,
},
// Pod update (with changes on Labels) for an already assumed pod
// should not be skipped.
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-0",
Labels: map[string]string{"a": "b"},
},
},
isAssumedPodFunc: func(*v1.Pod) bool {
return true
},
getPodFunc: func(*v1.Pod) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-0",
Labels: map[string]string{"c": "d"},
},
}
},
expected: false,
},
} {
c := &configFactory{
schedulerCache: &schedulertesting.FakeCache{
IsAssumedPodFunc: test.isAssumedPodFunc,
GetPodFunc: test.getPodFunc,
},
}
got := c.skipPodUpdate(test.pod)
if got != test.expected {
t.Errorf("skipPodUpdate() = %t, expected = %t", got, test.expected)
}
}
}
func newConfigFactory(client *clientset.Clientset, hardPodAffinitySymmetricWeight int32) scheduler.Configurator {
informerFactory := informers.NewSharedInformerFactory(client, 0)
return NewConfigFactory(
v1.DefaultSchedulerName,
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
informerFactory.Extensions().V1beta1().ReplicaSets(),
informerFactory.Apps().V1beta1().StatefulSets(),
informerFactory.Core().V1().Services(),
informerFactory.Policy().V1beta1().PodDisruptionBudgets(),
informerFactory.Storage().V1().StorageClasses(),
hardPodAffinitySymmetricWeight,
enableEquivalenceCache,
)
}
type fakeExtender struct {
isBinder bool
interestedPodName string
}
func (f *fakeExtender) Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error) {
return nil, nil, nil
}
func (f *fakeExtender) Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *schedulerapi.HostPriorityList, weight int, err error) {
return nil, 0, nil
}
func (f *fakeExtender) Bind(binding *v1.Binding) error {
if f.isBinder {
return nil
}
return errors.New("not a binder")
}
func (f *fakeExtender) IsBinder() bool {
return f.isBinder
}
func (f *fakeExtender) IsInterested(pod *v1.Pod) bool {
return pod != nil && pod.Name == f.interestedPodName
}
func TestGetBinderFunc(t *testing.T) {
for _, test := range []struct {
podName string
extenders []algorithm.SchedulerExtender
expectedBinderType string
}{
// Expect to return the default binder because the extender is not a
// binder, even though it's interested in the pod.
{
podName: "pod0",
extenders: []algorithm.SchedulerExtender{
&fakeExtender{isBinder: false, interestedPodName: "pod0"},
},
expectedBinderType: "*factory.binder",
},
// Expect to return the fake binder because one of the extenders is a
// binder and it's interested in the pod.
{
podName: "pod0",
extenders: []algorithm.SchedulerExtender{
&fakeExtender{isBinder: false, interestedPodName: "pod0"},
&fakeExtender{isBinder: true, interestedPodName: "pod0"},
},
expectedBinderType: "*factory.fakeExtender",
},
// Expect to return the default binder because one of the extenders is
// a binder but the binder is not interested in the pod.
{
podName: "pod1",
extenders: []algorithm.SchedulerExtender{
&fakeExtender{isBinder: false, interestedPodName: "pod1"},
&fakeExtender{isBinder: true, interestedPodName: "pod0"},
},
expectedBinderType: "*factory.binder",
},
} {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: test.podName,
},
}
f := &configFactory{}
binderFunc := f.getBinderFunc(test.extenders)
binder := binderFunc(pod)
binderType := fmt.Sprintf("%s", reflect.TypeOf(binder))
if binderType != test.expectedBinderType {
t.Errorf("Expected binder %q but got %q", test.expectedBinderType, binderType)
}
}
}

View File

@ -0,0 +1,546 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import (
"fmt"
"regexp"
"sort"
"strings"
"sync"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
"k8s.io/kubernetes/pkg/scheduler/algorithm/priorities"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
"github.com/golang/glog"
)
// PluginFactoryArgs are passed to all plugin factory functions.
type PluginFactoryArgs struct {
PodLister algorithm.PodLister
ServiceLister algorithm.ServiceLister
ControllerLister algorithm.ControllerLister
ReplicaSetLister algorithm.ReplicaSetLister
StatefulSetLister algorithm.StatefulSetLister
NodeLister algorithm.NodeLister
NodeInfo predicates.NodeInfo
PVInfo predicates.PersistentVolumeInfo
PVCInfo predicates.PersistentVolumeClaimInfo
StorageClassInfo predicates.StorageClassInfo
VolumeBinder *volumebinder.VolumeBinder
HardPodAffinitySymmetricWeight int32
}
// PriorityMetadataProducerFactory produces PriorityMetadataProducer from the given args.
type PriorityMetadataProducerFactory func(PluginFactoryArgs) algorithm.PriorityMetadataProducer
// PredicateMetadataProducerFactory produces PredicateMetadataProducer from the given args.
type PredicateMetadataProducerFactory func(PluginFactoryArgs) algorithm.PredicateMetadataProducer
// FitPredicateFactory produces a FitPredicate from the given args.
type FitPredicateFactory func(PluginFactoryArgs) algorithm.FitPredicate
// PriorityFunctionFactory produces a PriorityConfig from the given args.
// DEPRECATED
// Use Map-Reduce pattern for priority functions.
type PriorityFunctionFactory func(PluginFactoryArgs) algorithm.PriorityFunction
// PriorityFunctionFactory2 produces map & reduce priority functions
// from a given args.
// FIXME: Rename to PriorityFunctionFactory.
type PriorityFunctionFactory2 func(PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction)
// PriorityConfigFactory produces a PriorityConfig from the given function and weight
type PriorityConfigFactory struct {
Function PriorityFunctionFactory
MapReduceFunction PriorityFunctionFactory2
Weight int
}
// EquivalencePodFuncFactory produces a function to get equivalence class for given pod.
type EquivalencePodFuncFactory func(PluginFactoryArgs) algorithm.GetEquivalencePodFunc
var (
schedulerFactoryMutex sync.Mutex
// maps that hold registered algorithm types
fitPredicateMap = make(map[string]FitPredicateFactory)
mandatoryFitPredicates = sets.NewString()
priorityFunctionMap = make(map[string]PriorityConfigFactory)
algorithmProviderMap = make(map[string]AlgorithmProviderConfig)
// Registered metadata producers
priorityMetadataProducer PriorityMetadataProducerFactory
predicateMetadataProducer PredicateMetadataProducerFactory
// get equivalence pod function
getEquivalencePodFuncFactory EquivalencePodFuncFactory
)
const (
// DefaultProvider defines the default algorithm provider name.
DefaultProvider = "DefaultProvider"
)
// AlgorithmProviderConfig is used to store the configuration of algorithm providers.
type AlgorithmProviderConfig struct {
FitPredicateKeys sets.String
PriorityFunctionKeys sets.String
}
// RegisterFitPredicate registers a fit predicate with the algorithm
// registry. Returns the name with which the predicate was registered.
func RegisterFitPredicate(name string, predicate algorithm.FitPredicate) string {
return RegisterFitPredicateFactory(name, func(PluginFactoryArgs) algorithm.FitPredicate { return predicate })
}
// RemoveFitPredicate removes a fit predicate from factory.
func RemoveFitPredicate(name string) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(name)
delete(fitPredicateMap, name)
mandatoryFitPredicates.Delete(name)
}
// RemovePredicateKeyFromAlgoProvider removes a fit predicate key from algorithmProvider.
func RemovePredicateKeyFromAlgoProvider(providerName, key string) error {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(providerName)
provider, ok := algorithmProviderMap[providerName]
if !ok {
return fmt.Errorf("plugin %v has not been registered", providerName)
}
provider.FitPredicateKeys.Delete(key)
return nil
}
// RemovePredicateKeyFromAlgorithmProviderMap removes a fit predicate key from all algorithmProviders which in algorithmProviderMap.
func RemovePredicateKeyFromAlgorithmProviderMap(key string) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
for _, provider := range algorithmProviderMap {
provider.FitPredicateKeys.Delete(key)
}
return
}
// InsertPredicateKeyToAlgoProvider insert a fit predicate key to algorithmProvider.
func InsertPredicateKeyToAlgoProvider(providerName, key string) error {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(providerName)
provider, ok := algorithmProviderMap[providerName]
if !ok {
return fmt.Errorf("plugin %v has not been registered", providerName)
}
provider.FitPredicateKeys.Insert(key)
return nil
}
// InsertPredicateKeyToAlgorithmProviderMap insert a fit predicate key to all algorithmProviders which in algorithmProviderMap.
func InsertPredicateKeyToAlgorithmProviderMap(key string) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
for _, provider := range algorithmProviderMap {
provider.FitPredicateKeys.Insert(key)
}
return
}
// RegisterMandatoryFitPredicate registers a fit predicate with the algorithm registry, the predicate is used by
// kubelet, DaemonSet; it is always included in configuration. Returns the name with which the predicate was
// registered.
func RegisterMandatoryFitPredicate(name string, predicate algorithm.FitPredicate) string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(name)
fitPredicateMap[name] = func(PluginFactoryArgs) algorithm.FitPredicate { return predicate }
mandatoryFitPredicates.Insert(name)
return name
}
// RegisterFitPredicateFactory registers a fit predicate factory with the
// algorithm registry. Returns the name with which the predicate was registered.
func RegisterFitPredicateFactory(name string, predicateFactory FitPredicateFactory) string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(name)
fitPredicateMap[name] = predicateFactory
return name
}
// RegisterCustomFitPredicate registers a custom fit predicate with the algorithm registry.
// Returns the name, with which the predicate was registered.
func RegisterCustomFitPredicate(policy schedulerapi.PredicatePolicy) string {
var predicateFactory FitPredicateFactory
var ok bool
validatePredicateOrDie(policy)
// generate the predicate function, if a custom type is requested
if policy.Argument != nil {
if policy.Argument.ServiceAffinity != nil {
predicateFactory = func(args PluginFactoryArgs) algorithm.FitPredicate {
predicate, precomputationFunction := predicates.NewServiceAffinityPredicate(
args.PodLister,
args.ServiceLister,
args.NodeInfo,
policy.Argument.ServiceAffinity.Labels,
)
// Once we generate the predicate we should also Register the Precomputation
predicates.RegisterPredicateMetadataProducer(policy.Name, precomputationFunction)
return predicate
}
} else if policy.Argument.LabelsPresence != nil {
predicateFactory = func(args PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewNodeLabelPredicate(
policy.Argument.LabelsPresence.Labels,
policy.Argument.LabelsPresence.Presence,
)
}
}
} else if predicateFactory, ok = fitPredicateMap[policy.Name]; ok {
// checking to see if a pre-defined predicate is requested
glog.V(2).Infof("Predicate type %s already registered, reusing.", policy.Name)
return policy.Name
}
if predicateFactory == nil {
glog.Fatalf("Invalid configuration: Predicate type not found for %s", policy.Name)
}
return RegisterFitPredicateFactory(policy.Name, predicateFactory)
}
// IsFitPredicateRegistered is useful for testing providers.
func IsFitPredicateRegistered(name string) bool {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
_, ok := fitPredicateMap[name]
return ok
}
// RegisterPriorityMetadataProducerFactory registers a PriorityMetadataProducerFactory.
func RegisterPriorityMetadataProducerFactory(factory PriorityMetadataProducerFactory) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
priorityMetadataProducer = factory
}
// RegisterPredicateMetadataProducerFactory registers a PredicateMetadataProducerFactory.
func RegisterPredicateMetadataProducerFactory(factory PredicateMetadataProducerFactory) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
predicateMetadataProducer = factory
}
// RegisterPriorityFunction registers a priority function with the algorithm registry. Returns the name,
// with which the function was registered.
// DEPRECATED
// Use Map-Reduce pattern for priority functions.
func RegisterPriorityFunction(name string, function algorithm.PriorityFunction, weight int) string {
return RegisterPriorityConfigFactory(name, PriorityConfigFactory{
Function: func(PluginFactoryArgs) algorithm.PriorityFunction {
return function
},
Weight: weight,
})
}
// RegisterPriorityFunction2 registers a priority function with the algorithm registry. Returns the name,
// with which the function was registered.
// FIXME: Rename to PriorityFunctionFactory.
func RegisterPriorityFunction2(
name string,
mapFunction algorithm.PriorityMapFunction,
reduceFunction algorithm.PriorityReduceFunction,
weight int) string {
return RegisterPriorityConfigFactory(name, PriorityConfigFactory{
MapReduceFunction: func(PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
return mapFunction, reduceFunction
},
Weight: weight,
})
}
// RegisterPriorityConfigFactory registers a priority config factory with its name.
func RegisterPriorityConfigFactory(name string, pcf PriorityConfigFactory) string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(name)
priorityFunctionMap[name] = pcf
return name
}
// RegisterCustomPriorityFunction registers a custom priority function with the algorithm registry.
// Returns the name, with which the priority function was registered.
func RegisterCustomPriorityFunction(policy schedulerapi.PriorityPolicy) string {
var pcf *PriorityConfigFactory
validatePriorityOrDie(policy)
// generate the priority function, if a custom priority is requested
if policy.Argument != nil {
if policy.Argument.ServiceAntiAffinity != nil {
pcf = &PriorityConfigFactory{
MapReduceFunction: func(args PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
return priorities.NewServiceAntiAffinityPriority(
args.PodLister,
args.ServiceLister,
policy.Argument.ServiceAntiAffinity.Label,
)
},
Weight: policy.Weight,
}
} else if policy.Argument.LabelPreference != nil {
pcf = &PriorityConfigFactory{
MapReduceFunction: func(args PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
return priorities.NewNodeLabelPriority(
policy.Argument.LabelPreference.Label,
policy.Argument.LabelPreference.Presence,
)
},
Weight: policy.Weight,
}
}
} else if existingPcf, ok := priorityFunctionMap[policy.Name]; ok {
glog.V(2).Infof("Priority type %s already registered, reusing.", policy.Name)
// set/update the weight based on the policy
pcf = &PriorityConfigFactory{
Function: existingPcf.Function,
MapReduceFunction: existingPcf.MapReduceFunction,
Weight: policy.Weight,
}
}
if pcf == nil {
glog.Fatalf("Invalid configuration: Priority type not found for %s", policy.Name)
}
return RegisterPriorityConfigFactory(policy.Name, *pcf)
}
// RegisterGetEquivalencePodFunction registers equivalenceFuncFactory to produce equivalence class for given pod.
func RegisterGetEquivalencePodFunction(equivalenceFuncFactory EquivalencePodFuncFactory) {
getEquivalencePodFuncFactory = equivalenceFuncFactory
}
// IsPriorityFunctionRegistered is useful for testing providers.
func IsPriorityFunctionRegistered(name string) bool {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
_, ok := priorityFunctionMap[name]
return ok
}
// RegisterAlgorithmProvider registers a new algorithm provider with the algorithm registry. This should
// be called from the init function in a provider plugin.
func RegisterAlgorithmProvider(name string, predicateKeys, priorityKeys sets.String) string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(name)
algorithmProviderMap[name] = AlgorithmProviderConfig{
FitPredicateKeys: predicateKeys,
PriorityFunctionKeys: priorityKeys,
}
return name
}
// GetAlgorithmProvider should not be used to modify providers. It is publicly visible for testing.
func GetAlgorithmProvider(name string) (*AlgorithmProviderConfig, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
provider, ok := algorithmProviderMap[name]
if !ok {
return nil, fmt.Errorf("plugin %q has not been registered", name)
}
return &provider, nil
}
func getFitPredicateFunctions(names sets.String, args PluginFactoryArgs) (map[string]algorithm.FitPredicate, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
predicates := map[string]algorithm.FitPredicate{}
for _, name := range names.List() {
factory, ok := fitPredicateMap[name]
if !ok {
return nil, fmt.Errorf("Invalid predicate name %q specified - no corresponding function found", name)
}
predicates[name] = factory(args)
}
// Always include mandatory fit predicates.
for name := range mandatoryFitPredicates {
if factory, found := fitPredicateMap[name]; found {
predicates[name] = factory(args)
}
}
return predicates, nil
}
func getPriorityMetadataProducer(args PluginFactoryArgs) (algorithm.PriorityMetadataProducer, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
if priorityMetadataProducer == nil {
return algorithm.EmptyPriorityMetadataProducer, nil
}
return priorityMetadataProducer(args), nil
}
func getPredicateMetadataProducer(args PluginFactoryArgs) (algorithm.PredicateMetadataProducer, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
if predicateMetadataProducer == nil {
return algorithm.EmptyPredicateMetadataProducer, nil
}
return predicateMetadataProducer(args), nil
}
func getPriorityFunctionConfigs(names sets.String, args PluginFactoryArgs) ([]algorithm.PriorityConfig, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
configs := []algorithm.PriorityConfig{}
for _, name := range names.List() {
factory, ok := priorityFunctionMap[name]
if !ok {
return nil, fmt.Errorf("Invalid priority name %s specified - no corresponding function found", name)
}
if factory.Function != nil {
configs = append(configs, algorithm.PriorityConfig{
Name: name,
Function: factory.Function(args),
Weight: factory.Weight,
})
} else {
mapFunction, reduceFunction := factory.MapReduceFunction(args)
configs = append(configs, algorithm.PriorityConfig{
Name: name,
Map: mapFunction,
Reduce: reduceFunction,
Weight: factory.Weight,
})
}
}
if err := validateSelectedConfigs(configs); err != nil {
return nil, err
}
return configs, nil
}
// validateSelectedConfigs validates the config weights to avoid the overflow.
func validateSelectedConfigs(configs []algorithm.PriorityConfig) error {
var totalPriority int
for _, config := range configs {
// Checks totalPriority against MaxTotalPriority to avoid overflow
if config.Weight*schedulerapi.MaxPriority > schedulerapi.MaxTotalPriority-totalPriority {
return fmt.Errorf("Total priority of priority functions has overflown")
}
totalPriority += config.Weight * schedulerapi.MaxPriority
}
return nil
}
var validName = regexp.MustCompile("^[a-zA-Z0-9]([-a-zA-Z0-9]*[a-zA-Z0-9])$")
func validateAlgorithmNameOrDie(name string) {
if !validName.MatchString(name) {
glog.Fatalf("Algorithm name %v does not match the name validation regexp \"%v\".", name, validName)
}
}
func validatePredicateOrDie(predicate schedulerapi.PredicatePolicy) {
if predicate.Argument != nil {
numArgs := 0
if predicate.Argument.ServiceAffinity != nil {
numArgs++
}
if predicate.Argument.LabelsPresence != nil {
numArgs++
}
if numArgs != 1 {
glog.Fatalf("Exactly 1 predicate argument is required, numArgs: %v, Predicate: %s", numArgs, predicate.Name)
}
}
}
func validatePriorityOrDie(priority schedulerapi.PriorityPolicy) {
if priority.Argument != nil {
numArgs := 0
if priority.Argument.ServiceAntiAffinity != nil {
numArgs++
}
if priority.Argument.LabelPreference != nil {
numArgs++
}
if numArgs != 1 {
glog.Fatalf("Exactly 1 priority argument is required, numArgs: %v, Priority: %s", numArgs, priority.Name)
}
}
}
// ListRegisteredFitPredicates returns the registered fit predicates.
func ListRegisteredFitPredicates() []string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
names := []string{}
for name := range fitPredicateMap {
names = append(names, name)
}
return names
}
// ListRegisteredPriorityFunctions returns the registered priority functions.
func ListRegisteredPriorityFunctions() []string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
names := []string{}
for name := range priorityFunctionMap {
names = append(names, name)
}
return names
}
// ListAlgorithmProviders is called when listing all available algorithm providers in `kube-scheduler --help`
func ListAlgorithmProviders() string {
var availableAlgorithmProviders []string
for name := range algorithmProviderMap {
availableAlgorithmProviders = append(availableAlgorithmProviders, name)
}
sort.Strings(availableAlgorithmProviders)
return strings.Join(availableAlgorithmProviders, " | ")
}

View File

@ -0,0 +1,82 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import (
"testing"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/api"
)
func TestAlgorithmNameValidation(t *testing.T) {
algorithmNamesShouldValidate := []string{
"1SomeAlgo1rithm",
"someAlgor-ithm1",
}
algorithmNamesShouldNotValidate := []string{
"-SomeAlgorithm",
"SomeAlgorithm-",
"Some,Alg:orithm",
}
for _, name := range algorithmNamesShouldValidate {
if !validName.MatchString(name) {
t.Errorf("%v should be a valid algorithm name but is not valid.", name)
}
}
for _, name := range algorithmNamesShouldNotValidate {
if validName.MatchString(name) {
t.Errorf("%v should be an invalid algorithm name but is valid.", name)
}
}
}
func TestValidatePriorityConfigOverFlow(t *testing.T) {
tests := []struct {
description string
configs []algorithm.PriorityConfig
expected bool
}{
{
description: "one of the weights is MaxInt",
configs: []algorithm.PriorityConfig{{Weight: api.MaxInt}, {Weight: 5}},
expected: true,
},
{
description: "after multiplication with MaxPriority the weight is larger than MaxWeight",
configs: []algorithm.PriorityConfig{{Weight: api.MaxInt/api.MaxPriority + api.MaxPriority}, {Weight: 5}},
expected: true,
},
{
description: "normal weights",
configs: []algorithm.PriorityConfig{{Weight: 10000}, {Weight: 5}},
expected: false,
},
}
for _, test := range tests {
err := validateSelectedConfigs(test.configs)
if test.expected {
if err == nil {
t.Errorf("Expected Overflow for %s", test.description)
}
} else {
if err != nil {
t.Errorf("Did not expect an overflow for %s", test.description)
}
}
}
}

26
vendor/k8s.io/kubernetes/pkg/scheduler/metrics/BUILD generated vendored Normal file
View File

@ -0,0 +1,26 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["metrics.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/metrics",
deps = ["//vendor/github.com/prometheus/client_golang/prometheus:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,113 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const schedulerSubsystem = "scheduler"
// All the histogram based metrics have 1ms as size for the smallest bucket.
var (
E2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Name: "e2e_scheduling_latency_microseconds",
Help: "E2e scheduling latency (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Name: "scheduling_algorithm_latency_microseconds",
Help: "Scheduling algorithm latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation",
Help: "Scheduling algorithm predicate evaluation duration",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation",
Help: "Scheduling algorithm priority evaluation duration",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation",
Help: "Scheduling algorithm preemption evaluation duration",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
BindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Name: "binding_latency_microseconds",
Help: "Binding latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
PreemptionVictims = prometheus.NewGauge(
prometheus.GaugeOpts{
Subsystem: schedulerSubsystem,
Name: "pod_preemption_victims",
Help: "Number of selected preemption victims",
})
PreemptionAttempts = prometheus.NewCounter(
prometheus.CounterOpts{
Subsystem: schedulerSubsystem,
Name: "total_preemption_attempts",
Help: "Total preemption attempts in the cluster till now",
})
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
prometheus.MustRegister(E2eSchedulingLatency)
prometheus.MustRegister(SchedulingAlgorithmLatency)
prometheus.MustRegister(BindingLatency)
prometheus.MustRegister(SchedulingAlgorithmPredicateEvaluationDuration)
prometheus.MustRegister(SchedulingAlgorithmPriorityEvaluationDuration)
prometheus.MustRegister(SchedulingAlgorithmPremptionEvaluationDuration)
prometheus.MustRegister(PreemptionVictims)
prometheus.MustRegister(PreemptionAttempts)
})
}
// SinceInMicroseconds gets the time since the specified start in microseconds.
func SinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
}

497
vendor/k8s.io/kubernetes/pkg/scheduler/scheduler.go generated vendored Normal file
View File

@ -0,0 +1,497 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"fmt"
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
clientset "k8s.io/client-go/kubernetes"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/metrics"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
"github.com/golang/glog"
)
// Binder knows how to write a binding.
type Binder interface {
Bind(binding *v1.Binding) error
}
// PodConditionUpdater updates the condition of a pod based on the passed
// PodCondition
type PodConditionUpdater interface {
Update(pod *v1.Pod, podCondition *v1.PodCondition) error
}
// PodPreemptor has methods needed to delete a pod and to update
// annotations of the preemptor pod.
type PodPreemptor interface {
GetUpdatedPod(pod *v1.Pod) (*v1.Pod, error)
DeletePod(pod *v1.Pod) error
SetNominatedNodeName(pod *v1.Pod, nominatedNode string) error
RemoveNominatedNodeName(pod *v1.Pod) error
}
// Scheduler watches for new unscheduled pods. It attempts to find
// nodes that they fit on and writes bindings back to the api server.
type Scheduler struct {
config *Config
}
// StopEverything closes the scheduler config's StopEverything channel, to shut
// down the Scheduler.
func (sched *Scheduler) StopEverything() {
close(sched.config.StopEverything)
}
// Configurator defines I/O, caching, and other functionality needed to
// construct a new scheduler. An implementation of this can be seen in
// factory.go.
type Configurator interface {
GetPriorityFunctionConfigs(priorityKeys sets.String) ([]algorithm.PriorityConfig, error)
GetPriorityMetadataProducer() (algorithm.PriorityMetadataProducer, error)
GetPredicateMetadataProducer() (algorithm.PredicateMetadataProducer, error)
GetPredicates(predicateKeys sets.String) (map[string]algorithm.FitPredicate, error)
GetHardPodAffinitySymmetricWeight() int32
GetSchedulerName() string
MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue core.SchedulingQueue) func(pod *v1.Pod, err error)
// Needs to be exposed for things like integration tests where we want to make fake nodes.
GetNodeLister() corelisters.NodeLister
GetClient() clientset.Interface
GetScheduledPodLister() corelisters.PodLister
Create() (*Config, error)
CreateFromProvider(providerName string) (*Config, error)
CreateFromConfig(policy schedulerapi.Policy) (*Config, error)
CreateFromKeys(predicateKeys, priorityKeys sets.String, extenders []algorithm.SchedulerExtender) (*Config, error)
}
// Config is an implementation of the Scheduler's configured input data.
// TODO over time we should make this struct a hidden implementation detail of the scheduler.
type Config struct {
// It is expected that changes made via SchedulerCache will be observed
// by NodeLister and Algorithm.
SchedulerCache schedulercache.Cache
// Ecache is used for optimistically invalid affected cache items after
// successfully binding a pod
Ecache *core.EquivalenceCache
NodeLister algorithm.NodeLister
Algorithm algorithm.ScheduleAlgorithm
GetBinder func(pod *v1.Pod) Binder
// PodConditionUpdater is used only in case of scheduling errors. If we succeed
// with scheduling, PodScheduled condition will be updated in apiserver in /bind
// handler so that binding and setting PodCondition it is atomic.
PodConditionUpdater PodConditionUpdater
// PodPreemptor is used to evict pods and update pod annotations.
PodPreemptor PodPreemptor
// NextPod should be a function that blocks until the next pod
// is available. We don't use a channel for this, because scheduling
// a pod may take some amount of time and we don't want pods to get
// stale while they sit in a channel.
NextPod func() *v1.Pod
// WaitForCacheSync waits for scheduler cache to populate.
// It returns true if it was successful, false if the controller should shutdown.
WaitForCacheSync func() bool
// Error is called if there is an error. It is passed the pod in
// question, and the error
Error func(*v1.Pod, error)
// Recorder is the EventRecorder to use
Recorder record.EventRecorder
// Close this to shut down the scheduler.
StopEverything chan struct{}
// VolumeBinder handles PVC/PV binding for the pod.
VolumeBinder *volumebinder.VolumeBinder
}
// NewFromConfigurator returns a new scheduler that is created entirely by the Configurator. Assumes Create() is implemented.
// Supports intermediate Config mutation for now if you provide modifier functions which will run after Config is created.
func NewFromConfigurator(c Configurator, modifiers ...func(c *Config)) (*Scheduler, error) {
cfg, err := c.Create()
if err != nil {
return nil, err
}
// Mutate it if any functions were provided, changes might be required for certain types of tests (i.e. change the recorder).
for _, modifier := range modifiers {
modifier(cfg)
}
// From this point on the config is immutable to the outside.
s := &Scheduler{
config: cfg,
}
metrics.Register()
return s, nil
}
// NewFromConfig returns a new scheduler using the provided Config.
func NewFromConfig(config *Config) *Scheduler {
metrics.Register()
return &Scheduler{
config: config,
}
}
// Run begins watching and scheduling. It waits for cache to be synced, then starts a goroutine and returns immediately.
func (sched *Scheduler) Run() {
if !sched.config.WaitForCacheSync() {
return
}
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
go sched.config.VolumeBinder.Run(sched.bindVolumesWorker, sched.config.StopEverything)
}
go wait.Until(sched.scheduleOne, 0, sched.config.StopEverything)
}
// Config return scheduler's config pointer. It is exposed for testing purposes.
func (sched *Scheduler) Config() *Config {
return sched.config
}
// schedule implements the scheduling algorithm and returns the suggested host.
func (sched *Scheduler) schedule(pod *v1.Pod) (string, error) {
host, err := sched.config.Algorithm.Schedule(pod, sched.config.NodeLister)
if err != nil {
glog.V(1).Infof("Failed to schedule pod: %v/%v", pod.Namespace, pod.Name)
pod = pod.DeepCopy()
sched.config.Error(pod, err)
sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "%v", err)
sched.config.PodConditionUpdater.Update(pod, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: v1.PodReasonUnschedulable,
Message: err.Error(),
})
return "", err
}
return host, err
}
// preempt tries to create room for a pod that has failed to schedule, by preempting lower priority pods if possible.
// If it succeeds, it adds the name of the node where preemption has happened to the pod annotations.
// It returns the node name and an error if any.
func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, error) {
if !util.PodPriorityEnabled() {
glog.V(3).Infof("Pod priority feature is not enabled. No preemption is performed.")
return "", nil
}
preemptor, err := sched.config.PodPreemptor.GetUpdatedPod(preemptor)
if err != nil {
glog.Errorf("Error getting the updated preemptor pod object: %v", err)
return "", err
}
node, victims, nominatedPodsToClear, err := sched.config.Algorithm.Preempt(preemptor, sched.config.NodeLister, scheduleErr)
metrics.PreemptionVictims.Set(float64(len(victims)))
if err != nil {
glog.Errorf("Error preempting victims to make room for %v/%v.", preemptor.Namespace, preemptor.Name)
return "", err
}
var nodeName = ""
if node != nil {
nodeName = node.Name
err = sched.config.PodPreemptor.SetNominatedNodeName(preemptor, nodeName)
if err != nil {
glog.Errorf("Error in preemption process. Cannot update pod %v annotations: %v", preemptor.Name, err)
return "", err
}
for _, victim := range victims {
if err := sched.config.PodPreemptor.DeletePod(victim); err != nil {
glog.Errorf("Error preempting pod %v/%v: %v", victim.Namespace, victim.Name, err)
return "", err
}
sched.config.Recorder.Eventf(victim, v1.EventTypeNormal, "Preempted", "by %v/%v on node %v", preemptor.Namespace, preemptor.Name, nodeName)
}
}
// Clearing nominated pods should happen outside of "if node != nil". Node could
// be nil when a pod with nominated node name is eligible to preempt again,
// but preemption logic does not find any node for it. In that case Preempt()
// function of generic_scheduler.go returns the pod itself for removal of the annotation.
for _, p := range nominatedPodsToClear {
rErr := sched.config.PodPreemptor.RemoveNominatedNodeName(p)
if rErr != nil {
glog.Errorf("Cannot remove nominated node annotation of pod: %v", rErr)
// We do not return as this error is not critical.
}
}
return nodeName, err
}
// assumeAndBindVolumes will update the volume cache and then asynchronously bind volumes if required.
//
// If volume binding is required, then the bind volumes routine will update the pod to send it back through
// the scheduler.
//
// Otherwise, return nil error and continue to assume the pod.
//
// This function modifies assumed if volume binding is required.
func (sched *Scheduler) assumeAndBindVolumes(assumed *v1.Pod, host string) error {
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
allBound, bindingRequired, err := sched.config.VolumeBinder.Binder.AssumePodVolumes(assumed, host)
if err != nil {
sched.config.Error(assumed, err)
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "AssumePodVolumes failed: %v", err)
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: "SchedulerError",
Message: err.Error(),
})
return err
}
if !allBound {
err = fmt.Errorf("Volume binding started, waiting for completion")
if bindingRequired {
if sched.config.Ecache != nil {
invalidPredicates := sets.NewString(predicates.CheckVolumeBindingPred)
sched.config.Ecache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates)
}
// bindVolumesWorker() will update the Pod object to put it back in the scheduler queue
sched.config.VolumeBinder.BindQueue.Add(assumed)
} else {
// We are just waiting for PV controller to finish binding, put it back in the
// scheduler queue
sched.config.Error(assumed, err)
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "FailedScheduling", "%v", err)
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: "VolumeBindingWaiting",
})
}
return err
}
}
return nil
}
// bindVolumesWorker() processes pods queued in assumeAndBindVolumes() and tries to
// make the API update for volume binding.
// This function runs forever until the volume BindQueue is closed.
func (sched *Scheduler) bindVolumesWorker() {
workFunc := func() bool {
keyObj, quit := sched.config.VolumeBinder.BindQueue.Get()
if quit {
return true
}
defer sched.config.VolumeBinder.BindQueue.Done(keyObj)
assumed, ok := keyObj.(*v1.Pod)
if !ok {
glog.V(4).Infof("Object is not a *v1.Pod")
return false
}
// TODO: add metrics
var reason string
var eventType string
glog.V(5).Infof("Trying to bind volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
// The Pod is always sent back to the scheduler afterwards.
err := sched.config.VolumeBinder.Binder.BindPodVolumes(assumed)
if err != nil {
glog.V(1).Infof("Failed to bind volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name, err)
reason = "VolumeBindingFailed"
eventType = v1.EventTypeWarning
} else {
glog.V(4).Infof("Successfully bound volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
reason = "VolumeBindingWaiting"
eventType = v1.EventTypeNormal
err = fmt.Errorf("Volume binding started, waiting for completion")
}
// Always fail scheduling regardless of binding success.
// The Pod needs to be sent back through the scheduler to:
// * Retry volume binding if it fails.
// * Retry volume binding if dynamic provisioning fails.
// * Bind the Pod to the Node once all volumes are bound.
sched.config.Error(assumed, err)
sched.config.Recorder.Eventf(assumed, eventType, "FailedScheduling", "%v", err)
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: reason,
})
return false
}
for {
if quit := workFunc(); quit {
glog.V(4).Infof("bindVolumesWorker shutting down")
break
}
}
}
// assume signals to the cache that a pod is already in the cache, so that binding can be asynchronous.
// assume modifies `assumed`.
func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
// Optimistically assume that the binding will succeed and send it to apiserver
// in the background.
// If the binding fails, scheduler will release resources allocated to assumed pod
// immediately.
assumed.Spec.NodeName = host
if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil {
glog.Errorf("scheduler cache AssumePod failed: %v", err)
// This is most probably result of a BUG in retrying logic.
// We report an error here so that pod scheduling can be retried.
// This relies on the fact that Error will check if the pod has been bound
// to a node and if so will not add it back to the unscheduled pods queue
// (otherwise this would cause an infinite loop).
sched.config.Error(assumed, err)
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "AssumePod failed: %v", err)
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: "SchedulerError",
Message: err.Error(),
})
return err
}
// Optimistically assume that the binding will succeed, so we need to invalidate affected
// predicates in equivalence cache.
// If the binding fails, these invalidated item will not break anything.
if sched.config.Ecache != nil {
sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(assumed, host)
}
return nil
}
// bind binds a pod to a given node defined in a binding object. We expect this to run asynchronously, so we
// handle binding metrics internally.
func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
bindingStart := time.Now()
// If binding succeeded then PodScheduled condition will be updated in apiserver so that
// it's atomic with setting host.
err := sched.config.GetBinder(assumed).Bind(b)
if err := sched.config.SchedulerCache.FinishBinding(assumed); err != nil {
glog.Errorf("scheduler cache FinishBinding failed: %v", err)
}
if err != nil {
glog.V(1).Infof("Failed to bind pod: %v/%v", assumed.Namespace, assumed.Name)
if err := sched.config.SchedulerCache.ForgetPod(assumed); err != nil {
glog.Errorf("scheduler cache ForgetPod failed: %v", err)
}
sched.config.Error(assumed, err)
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err)
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: "BindingRejected",
})
return err
}
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", assumed.Name, b.Target.Name)
return nil
}
// scheduleOne does the entire scheduling workflow for a single pod. It is serialized on the scheduling algorithm's host fitting.
func (sched *Scheduler) scheduleOne() {
pod := sched.config.NextPod()
if pod.DeletionTimestamp != nil {
sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
glog.V(3).Infof("Skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
return
}
glog.V(3).Infof("Attempting to schedule pod: %v/%v", pod.Namespace, pod.Name)
// Synchronously attempt to find a fit for the pod.
start := time.Now()
suggestedHost, err := sched.schedule(pod)
if err != nil {
// schedule() may have failed because the pod would not fit on any host, so we try to
// preempt, with the expectation that the next time the pod is tried for scheduling it
// will fit due to the preemption. It is also possible that a different pod will schedule
// into the resources that were preempted, but this is harmless.
if fitError, ok := err.(*core.FitError); ok {
preemptionStartTime := time.Now()
sched.preempt(pod, fitError)
metrics.PreemptionAttempts.Inc()
metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
}
return
}
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
// This allows us to keep scheduling without waiting on binding to occur.
assumedPod := pod.DeepCopy()
// Assume volumes first before assuming the pod.
//
// If no volumes need binding, then nil is returned, and continue to assume the pod.
//
// Otherwise, error is returned and volume binding is started asynchronously for all of the pod's volumes.
// scheduleOne() returns immediately on error, so that it doesn't continue to assume the pod.
//
// After the asynchronous volume binding updates are made, it will send the pod back through the scheduler for
// subsequent passes until all volumes are fully bound.
//
// This function modifies 'assumedPod' if volume binding is required.
err = sched.assumeAndBindVolumes(assumedPod, suggestedHost)
if err != nil {
return
}
// assume modifies `assumedPod` by setting NodeName=suggestedHost
err = sched.assume(assumedPod, suggestedHost)
if err != nil {
return
}
// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
go func() {
err := sched.bind(assumedPod, &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
Target: v1.ObjectReference{
Kind: "Node",
Name: suggestedHost,
},
})
metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
if err != nil {
glog.Errorf("Internal error binding pod: (%v)", err)
}
}()
}

View File

@ -0,0 +1,826 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"errors"
"fmt"
"reflect"
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
clientcache "k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/legacyscheme"
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
)
type fakeBinder struct {
b func(binding *v1.Binding) error
}
func (fb fakeBinder) Bind(binding *v1.Binding) error { return fb.b(binding) }
type fakePodConditionUpdater struct{}
func (fc fakePodConditionUpdater) Update(pod *v1.Pod, podCondition *v1.PodCondition) error {
return nil
}
type fakePodPreemptor struct{}
func (fp fakePodPreemptor) GetUpdatedPod(pod *v1.Pod) (*v1.Pod, error) {
return pod, nil
}
func (fp fakePodPreemptor) DeletePod(pod *v1.Pod) error {
return nil
}
func (fp fakePodPreemptor) SetNominatedNodeName(pod *v1.Pod, nomNodeName string) error {
return nil
}
func (fp fakePodPreemptor) RemoveNominatedNodeName(pod *v1.Pod) error {
return nil
}
func podWithID(id, desiredHost string) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: id, SelfLink: util.Test.SelfLink(string(v1.ResourcePods), id)},
Spec: v1.PodSpec{
NodeName: desiredHost,
},
}
}
func deletingPod(id string) *v1.Pod {
deletionTimestamp := metav1.Now()
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: id, SelfLink: util.Test.SelfLink(string(v1.ResourcePods), id), DeletionTimestamp: &deletionTimestamp},
Spec: v1.PodSpec{
NodeName: "",
},
}
}
func podWithPort(id, desiredHost string, port int) *v1.Pod {
pod := podWithID(id, desiredHost)
pod.Spec.Containers = []v1.Container{
{Name: "ctr", Ports: []v1.ContainerPort{{HostPort: int32(port)}}},
}
return pod
}
func podWithResources(id, desiredHost string, limits v1.ResourceList, requests v1.ResourceList) *v1.Pod {
pod := podWithID(id, desiredHost)
pod.Spec.Containers = []v1.Container{
{Name: "ctr", Resources: v1.ResourceRequirements{Limits: limits, Requests: requests}},
}
return pod
}
type mockScheduler struct {
machine string
err error
}
func (es mockScheduler) Schedule(pod *v1.Pod, ml algorithm.NodeLister) (string, error) {
return es.machine, es.err
}
func (es mockScheduler) Predicates() map[string]algorithm.FitPredicate {
return nil
}
func (es mockScheduler) Prioritizers() []algorithm.PriorityConfig {
return nil
}
func (es mockScheduler) Preempt(pod *v1.Pod, nodeLister algorithm.NodeLister, scheduleErr error) (*v1.Node, []*v1.Pod, []*v1.Pod, error) {
return nil, nil, nil, nil
}
func TestScheduler(t *testing.T) {
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(t.Logf).Stop()
errS := errors.New("scheduler")
errB := errors.New("binder")
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
table := []struct {
injectBindError error
sendPod *v1.Pod
algo algorithm.ScheduleAlgorithm
expectErrorPod *v1.Pod
expectForgetPod *v1.Pod
expectAssumedPod *v1.Pod
expectError error
expectBind *v1.Binding
eventReason string
}{
{
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name),
eventReason: "Scheduled",
}, {
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, errS},
expectError: errS,
expectErrorPod: podWithID("foo", ""),
eventReason: "FailedScheduling",
}, {
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name),
injectBindError: errB,
expectError: errB,
expectErrorPod: podWithID("foo", testNode.Name),
expectForgetPod: podWithID("foo", testNode.Name),
eventReason: "FailedScheduling",
}, {
sendPod: deletingPod("foo"),
algo: mockScheduler{"", nil},
eventReason: "FailedScheduling",
},
}
for i, item := range table {
var gotError error
var gotPod *v1.Pod
var gotForgetPod *v1.Pod
var gotAssumedPod *v1.Pod
var gotBinding *v1.Binding
configurator := &FakeConfigurator{
Config: &Config{
SchedulerCache: &schedulertesting.FakeCache{
ForgetFunc: func(pod *v1.Pod) {
gotForgetPod = pod
},
AssumeFunc: func(pod *v1.Pod) {
gotAssumedPod = pod
},
},
NodeLister: schedulertesting.FakeNodeLister(
[]*v1.Node{&testNode},
),
Algorithm: item.algo,
GetBinder: func(pod *v1.Pod) Binder {
return fakeBinder{func(b *v1.Binding) error {
gotBinding = b
return item.injectBindError
}}
},
PodConditionUpdater: fakePodConditionUpdater{},
Error: func(p *v1.Pod, err error) {
gotPod = p
gotError = err
},
NextPod: func() *v1.Pod {
return item.sendPod
},
Recorder: eventBroadcaster.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: "scheduler"}),
VolumeBinder: volumebinder.NewFakeVolumeBinder(&persistentvolume.FakeVolumeBinderConfig{AllBound: true}),
},
}
s, _ := NewFromConfigurator(configurator, nil...)
called := make(chan struct{})
events := eventBroadcaster.StartEventWatcher(func(e *v1.Event) {
if e, a := item.eventReason, e.Reason; e != a {
t.Errorf("%v: expected %v, got %v", i, e, a)
}
close(called)
})
s.scheduleOne()
<-called
if e, a := item.expectAssumedPod, gotAssumedPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: assumed pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectErrorPod, gotPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectForgetPod, gotForgetPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: forget pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectError, gotError; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error: wanted %v, got %v", i, e, a)
}
if e, a := item.expectBind, gotBinding; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error: %s", i, diff.ObjectDiff(e, a))
}
events.Stop()
}
}
func TestSchedulerNoPhantomPodAfterExpire(t *testing.T) {
stop := make(chan struct{})
defer close(stop)
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(100*time.Millisecond, stop)
pod := podWithPort("pod.Name", "", 8080)
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
scache.AddNode(&node)
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
scheduler, bindingChan, _ := setupTestSchedulerWithOnePodOnNode(t, queuedPodStore, scache, nodeLister, predicateMap, pod, &node)
waitPodExpireChan := make(chan struct{})
timeout := make(chan struct{})
go func() {
for {
select {
case <-timeout:
return
default:
}
pods, err := scache.List(labels.Everything())
if err != nil {
t.Fatalf("cache.List failed: %v", err)
}
if len(pods) == 0 {
close(waitPodExpireChan)
return
}
time.Sleep(100 * time.Millisecond)
}
}()
// waiting for the assumed pod to expire
select {
case <-waitPodExpireChan:
case <-time.After(wait.ForeverTestTimeout):
close(timeout)
t.Fatalf("timeout timeout in waiting pod expire after %v", wait.ForeverTestTimeout)
}
// We use conflicted pod ports to incur fit predicate failure if first pod not removed.
secondPod := podWithPort("bar", "", 8080)
queuedPodStore.Add(secondPod)
scheduler.scheduleOne()
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
t.Errorf("binding want=%v, get=%v", expectBinding, b)
}
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timeout in binding after %v", wait.ForeverTestTimeout)
}
}
func TestSchedulerNoPhantomPodAfterDelete(t *testing.T) {
stop := make(chan struct{})
defer close(stop)
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(10*time.Minute, stop)
firstPod := podWithPort("pod.Name", "", 8080)
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
scache.AddNode(&node)
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
scheduler, bindingChan, errChan := setupTestSchedulerWithOnePodOnNode(t, queuedPodStore, scache, nodeLister, predicateMap, firstPod, &node)
// We use conflicted pod ports to incur fit predicate failure.
secondPod := podWithPort("bar", "", 8080)
queuedPodStore.Add(secondPod)
// queuedPodStore: [bar:8080]
// cache: [(assumed)foo:8080]
scheduler.scheduleOne()
select {
case err := <-errChan:
expectErr := &core.FitError{
Pod: secondPod,
NumAllNodes: 1,
FailedPredicates: core.FailedPredicateMap{node.Name: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts}},
}
if !reflect.DeepEqual(expectErr, err) {
t.Errorf("err want=%v, get=%v", expectErr, err)
}
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timeout in fitting after %v", wait.ForeverTestTimeout)
}
// We mimic the workflow of cache behavior when a pod is removed by user.
// Note: if the schedulercache timeout would be super short, the first pod would expire
// and would be removed itself (without any explicit actions on schedulercache). Even in that case,
// explicitly AddPod will as well correct the behavior.
firstPod.Spec.NodeName = node.Name
if err := scache.AddPod(firstPod); err != nil {
t.Fatalf("err: %v", err)
}
if err := scache.RemovePod(firstPod); err != nil {
t.Fatalf("err: %v", err)
}
queuedPodStore.Add(secondPod)
scheduler.scheduleOne()
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
t.Errorf("binding want=%v, get=%v", expectBinding, b)
}
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timeout in binding after %v", wait.ForeverTestTimeout)
}
}
// Scheduler should preserve predicate constraint even if binding was longer
// than cache ttl
func TestSchedulerErrorWithLongBinding(t *testing.T) {
stop := make(chan struct{})
defer close(stop)
firstPod := podWithPort("foo", "", 8080)
conflictPod := podWithPort("bar", "", 8080)
pods := map[string]*v1.Pod{firstPod.Name: firstPod, conflictPod.Name: conflictPod}
for _, test := range []struct {
Expected map[string]bool
CacheTTL time.Duration
BindingDuration time.Duration
}{
{
Expected: map[string]bool{firstPod.Name: true},
CacheTTL: 100 * time.Millisecond,
BindingDuration: 300 * time.Millisecond,
},
{
Expected: map[string]bool{firstPod.Name: true},
CacheTTL: 10 * time.Second,
BindingDuration: 300 * time.Millisecond,
},
} {
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(test.CacheTTL, stop)
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
scache.AddNode(&node)
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
scheduler, bindingChan := setupTestSchedulerLongBindingWithRetry(
queuedPodStore, scache, nodeLister, predicateMap, stop, test.BindingDuration)
scheduler.Run()
queuedPodStore.Add(firstPod)
queuedPodStore.Add(conflictPod)
resultBindings := map[string]bool{}
waitChan := time.After(5 * time.Second)
for finished := false; !finished; {
select {
case b := <-bindingChan:
resultBindings[b.Name] = true
p := pods[b.Name]
p.Spec.NodeName = b.Target.Name
scache.AddPod(p)
case <-waitChan:
finished = true
}
}
if !reflect.DeepEqual(resultBindings, test.Expected) {
t.Errorf("Result binding are not equal to expected. %v != %v", resultBindings, test.Expected)
}
}
}
// queuedPodStore: pods queued before processing.
// cache: scheduler cache that might contain assumed pods.
func setupTestSchedulerWithOnePodOnNode(t *testing.T, queuedPodStore *clientcache.FIFO, scache schedulercache.Cache,
nodeLister schedulertesting.FakeNodeLister, predicateMap map[string]algorithm.FitPredicate, pod *v1.Pod, node *v1.Node) (*Scheduler, chan *v1.Binding, chan error) {
scheduler, bindingChan, errChan := setupTestScheduler(queuedPodStore, scache, nodeLister, predicateMap, nil)
queuedPodStore.Add(pod)
// queuedPodStore: [foo:8080]
// cache: []
scheduler.scheduleOne()
// queuedPodStore: []
// cache: [(assumed)foo:8080]
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Name: pod.Name},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
t.Errorf("binding want=%v, get=%v", expectBinding, b)
}
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timeout after %v", wait.ForeverTestTimeout)
}
return scheduler, bindingChan, errChan
}
func TestSchedulerFailedSchedulingReasons(t *testing.T) {
stop := make(chan struct{})
defer close(stop)
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(10*time.Minute, stop)
// Design the baseline for the pods, and we will make nodes that dont fit it later.
var cpu = int64(4)
var mem = int64(500)
podWithTooBigResourceRequests := podWithResources("bar", "", v1.ResourceList{
v1.ResourceCPU: *(resource.NewQuantity(cpu, resource.DecimalSI)),
v1.ResourceMemory: *(resource.NewQuantity(mem, resource.DecimalSI)),
}, v1.ResourceList{
v1.ResourceCPU: *(resource.NewQuantity(cpu, resource.DecimalSI)),
v1.ResourceMemory: *(resource.NewQuantity(mem, resource.DecimalSI)),
})
// create several nodes which cannot schedule the above pod
nodes := []*v1.Node{}
for i := 0; i < 100; i++ {
node := v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("machine%v", i)},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *(resource.NewQuantity(cpu/2, resource.DecimalSI)),
v1.ResourceMemory: *(resource.NewQuantity(mem/5, resource.DecimalSI)),
v1.ResourcePods: *(resource.NewQuantity(10, resource.DecimalSI)),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *(resource.NewQuantity(cpu/2, resource.DecimalSI)),
v1.ResourceMemory: *(resource.NewQuantity(mem/5, resource.DecimalSI)),
v1.ResourcePods: *(resource.NewQuantity(10, resource.DecimalSI)),
}},
}
scache.AddNode(&node)
nodes = append(nodes, &node)
}
nodeLister := schedulertesting.FakeNodeLister(nodes)
predicateMap := map[string]algorithm.FitPredicate{
"PodFitsResources": predicates.PodFitsResources,
}
// Create expected failure reasons for all the nodes. Hopefully they will get rolled up into a non-spammy summary.
failedPredicatesMap := core.FailedPredicateMap{}
for _, node := range nodes {
failedPredicatesMap[node.Name] = []algorithm.PredicateFailureReason{
predicates.NewInsufficientResourceError(v1.ResourceCPU, 4000, 0, 2000),
predicates.NewInsufficientResourceError(v1.ResourceMemory, 500, 0, 100),
}
}
scheduler, _, errChan := setupTestScheduler(queuedPodStore, scache, nodeLister, predicateMap, nil)
queuedPodStore.Add(podWithTooBigResourceRequests)
scheduler.scheduleOne()
select {
case err := <-errChan:
expectErr := &core.FitError{
Pod: podWithTooBigResourceRequests,
NumAllNodes: len(nodes),
FailedPredicates: failedPredicatesMap,
}
if len(fmt.Sprint(expectErr)) > 150 {
t.Errorf("message is too spammy ! %v ", len(fmt.Sprint(expectErr)))
}
if !reflect.DeepEqual(expectErr, err) {
t.Errorf("\n err \nWANT=%+v,\nGOT=%+v", expectErr, err)
}
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timeout after %v", wait.ForeverTestTimeout)
}
}
// queuedPodStore: pods queued before processing.
// scache: scheduler cache that might contain assumed pods.
func setupTestScheduler(queuedPodStore *clientcache.FIFO, scache schedulercache.Cache, nodeLister schedulertesting.FakeNodeLister, predicateMap map[string]algorithm.FitPredicate, recorder record.EventRecorder) (*Scheduler, chan *v1.Binding, chan error) {
algo := core.NewGenericScheduler(
scache,
nil,
nil,
predicateMap,
algorithm.EmptyPredicateMetadataProducer,
[]algorithm.PriorityConfig{},
algorithm.EmptyPriorityMetadataProducer,
[]algorithm.SchedulerExtender{},
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false)
bindingChan := make(chan *v1.Binding, 1)
errChan := make(chan error, 1)
configurator := &FakeConfigurator{
Config: &Config{
SchedulerCache: scache,
NodeLister: nodeLister,
Algorithm: algo,
GetBinder: func(pod *v1.Pod) Binder {
return fakeBinder{func(b *v1.Binding) error {
bindingChan <- b
return nil
}}
},
NextPod: func() *v1.Pod {
return clientcache.Pop(queuedPodStore).(*v1.Pod)
},
Error: func(p *v1.Pod, err error) {
errChan <- err
},
Recorder: &record.FakeRecorder{},
PodConditionUpdater: fakePodConditionUpdater{},
PodPreemptor: fakePodPreemptor{},
VolumeBinder: volumebinder.NewFakeVolumeBinder(&persistentvolume.FakeVolumeBinderConfig{AllBound: true}),
},
}
if recorder != nil {
configurator.Config.Recorder = recorder
}
sched, _ := NewFromConfigurator(configurator, nil...)
return sched, bindingChan, errChan
}
func setupTestSchedulerLongBindingWithRetry(queuedPodStore *clientcache.FIFO, scache schedulercache.Cache, nodeLister schedulertesting.FakeNodeLister, predicateMap map[string]algorithm.FitPredicate, stop chan struct{}, bindingTime time.Duration) (*Scheduler, chan *v1.Binding) {
algo := core.NewGenericScheduler(
scache,
nil,
nil,
predicateMap,
algorithm.EmptyPredicateMetadataProducer,
[]algorithm.PriorityConfig{},
algorithm.EmptyPriorityMetadataProducer,
[]algorithm.SchedulerExtender{},
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false)
bindingChan := make(chan *v1.Binding, 2)
configurator := &FakeConfigurator{
Config: &Config{
SchedulerCache: scache,
NodeLister: nodeLister,
Algorithm: algo,
GetBinder: func(pod *v1.Pod) Binder {
return fakeBinder{func(b *v1.Binding) error {
time.Sleep(bindingTime)
bindingChan <- b
return nil
}}
},
WaitForCacheSync: func() bool {
return true
},
NextPod: func() *v1.Pod {
return clientcache.Pop(queuedPodStore).(*v1.Pod)
},
Error: func(p *v1.Pod, err error) {
queuedPodStore.AddIfNotPresent(p)
},
Recorder: &record.FakeRecorder{},
PodConditionUpdater: fakePodConditionUpdater{},
PodPreemptor: fakePodPreemptor{},
StopEverything: stop,
VolumeBinder: volumebinder.NewFakeVolumeBinder(&persistentvolume.FakeVolumeBinderConfig{AllBound: true}),
},
}
sched, _ := NewFromConfigurator(configurator, nil...)
return sched, bindingChan
}
func setupTestSchedulerWithVolumeBinding(fakeVolumeBinder *volumebinder.VolumeBinder, stop <-chan struct{}, broadcaster record.EventBroadcaster) (*Scheduler, chan *v1.Binding, chan error) {
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&testNode})
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
queuedPodStore.Add(podWithID("foo", ""))
scache := schedulercache.New(10*time.Minute, stop)
scache.AddNode(&testNode)
predicateMap := map[string]algorithm.FitPredicate{
predicates.CheckVolumeBindingPred: predicates.NewVolumeBindingPredicate(fakeVolumeBinder),
}
recorder := broadcaster.NewRecorder(legacyscheme.Scheme, v1.EventSource{Component: "scheduler"})
s, bindingChan, errChan := setupTestScheduler(queuedPodStore, scache, nodeLister, predicateMap, recorder)
s.config.VolumeBinder = fakeVolumeBinder
return s, bindingChan, errChan
}
// This is a workaround because golint complains that errors cannot
// end with punctuation. However, the real predicate error message does
// end with a period.
func makePredicateError(failReason string) error {
s := fmt.Sprintf("0/1 nodes are available: %v.", failReason)
return fmt.Errorf(s)
}
func TestSchedulerWithVolumeBinding(t *testing.T) {
order := []string{predicates.CheckVolumeBindingPred, predicates.GeneralPred}
predicates.SetPredicatesOrdering(order)
findErr := fmt.Errorf("find err")
assumeErr := fmt.Errorf("assume err")
bindErr := fmt.Errorf("bind err")
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(t.Logf).Stop()
// This can be small because we wait for pod to finish scheduling first
chanTimeout := 2 * time.Second
utilfeature.DefaultFeatureGate.Set("VolumeScheduling=true")
defer utilfeature.DefaultFeatureGate.Set("VolumeScheduling=false")
table := map[string]struct {
expectError error
expectPodBind *v1.Binding
expectAssumeCalled bool
expectBindCalled bool
eventReason string
volumeBinderConfig *persistentvolume.FakeVolumeBinderConfig
}{
"all-bound": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
AllBound: true,
FindUnboundSatsified: true,
FindBoundSatsified: true,
},
expectAssumeCalled: true,
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
eventReason: "Scheduled",
},
"bound,invalid-pv-affinity": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
AllBound: true,
FindUnboundSatsified: true,
FindBoundSatsified: false,
},
eventReason: "FailedScheduling",
expectError: makePredicateError("1 node(s) had volume node affinity conflict"),
},
"unbound,no-matches": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: false,
FindBoundSatsified: true,
},
eventReason: "FailedScheduling",
expectError: makePredicateError("1 node(s) didn't find available persistent volumes to bind"),
},
"bound-and-unbound-unsatisfied": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: false,
FindBoundSatsified: false,
},
eventReason: "FailedScheduling",
expectError: makePredicateError("1 node(s) didn't find available persistent volumes to bind, 1 node(s) had volume node affinity conflict"),
},
"unbound,found-matches": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: true,
FindBoundSatsified: true,
AssumeBindingRequired: true,
},
expectAssumeCalled: true,
expectBindCalled: true,
eventReason: "FailedScheduling",
expectError: fmt.Errorf("Volume binding started, waiting for completion"),
},
"unbound,found-matches,already-bound": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: true,
FindBoundSatsified: true,
AssumeBindingRequired: false,
},
expectAssumeCalled: true,
expectBindCalled: false,
eventReason: "FailedScheduling",
expectError: fmt.Errorf("Volume binding started, waiting for completion"),
},
"predicate-error": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindErr: findErr,
},
eventReason: "FailedScheduling",
expectError: findErr,
},
"assume-error": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: true,
FindBoundSatsified: true,
AssumeErr: assumeErr,
},
expectAssumeCalled: true,
eventReason: "FailedScheduling",
expectError: assumeErr,
},
"bind-error": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
FindUnboundSatsified: true,
FindBoundSatsified: true,
AssumeBindingRequired: true,
BindErr: bindErr,
},
expectAssumeCalled: true,
expectBindCalled: true,
eventReason: "FailedScheduling",
expectError: bindErr,
},
}
for name, item := range table {
stop := make(chan struct{})
fakeVolumeBinder := volumebinder.NewFakeVolumeBinder(item.volumeBinderConfig)
internalBinder, ok := fakeVolumeBinder.Binder.(*persistentvolume.FakeVolumeBinder)
if !ok {
t.Fatalf("Failed to get fake volume binder")
}
s, bindingChan, errChan := setupTestSchedulerWithVolumeBinding(fakeVolumeBinder, stop, eventBroadcaster)
eventChan := make(chan struct{})
events := eventBroadcaster.StartEventWatcher(func(e *v1.Event) {
if e, a := item.eventReason, e.Reason; e != a {
t.Errorf("%v: expected %v, got %v", name, e, a)
}
close(eventChan)
})
go fakeVolumeBinder.Run(s.bindVolumesWorker, stop)
s.scheduleOne()
// Wait for pod to succeed or fail scheduling
select {
case <-eventChan:
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("%v: scheduling timeout after %v", name, wait.ForeverTestTimeout)
}
events.Stop()
// Wait for scheduling to return an error
select {
case err := <-errChan:
if item.expectError == nil || !reflect.DeepEqual(item.expectError.Error(), err.Error()) {
t.Errorf("%v: \n err \nWANT=%+v,\nGOT=%+v", name, item.expectError, err)
}
case <-time.After(chanTimeout):
if item.expectError != nil {
t.Errorf("%v: did not receive error after %v", name, chanTimeout)
}
}
// Wait for pod to succeed binding
select {
case b := <-bindingChan:
if !reflect.DeepEqual(item.expectPodBind, b) {
t.Errorf("%v: \n err \nWANT=%+v,\nGOT=%+v", name, item.expectPodBind, b)
}
case <-time.After(chanTimeout):
if item.expectPodBind != nil {
t.Errorf("%v: did not receive pod binding after %v", name, chanTimeout)
}
}
if item.expectAssumeCalled != internalBinder.AssumeCalled {
t.Errorf("%v: expectedAssumeCall %v", name, item.expectAssumeCalled)
}
if item.expectBindCalled != internalBinder.BindCalled {
t.Errorf("%v: expectedBindCall %v", name, item.expectBindCalled)
}
close(stop)
}
}

View File

@ -0,0 +1,58 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"cache.go",
"interface.go",
"node_info.go",
"util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/schedulercache",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"cache_test.go",
"node_info_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,469 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import (
"fmt"
"sync"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
"github.com/golang/glog"
policy "k8s.io/api/policy/v1beta1"
)
var (
cleanAssumedPeriod = 1 * time.Second
)
// New returns a Cache implementation.
// It automatically starts a go routine that manages expiration of assumed pods.
// "ttl" is how long the assumed pod will get expired.
// "stop" is the channel that would close the background goroutine.
func New(ttl time.Duration, stop <-chan struct{}) Cache {
cache := newSchedulerCache(ttl, cleanAssumedPeriod, stop)
cache.run()
return cache
}
type schedulerCache struct {
stop <-chan struct{}
ttl time.Duration
period time.Duration
// This mutex guards all fields within this cache struct.
mu sync.Mutex
// a set of assumed pod keys.
// The key could further be used to get an entry in podStates.
assumedPods map[string]bool
// a map from pod key to podState.
podStates map[string]*podState
nodes map[string]*NodeInfo
pdbs map[string]*policy.PodDisruptionBudget
}
type podState struct {
pod *v1.Pod
// Used by assumedPod to determinate expiration.
deadline *time.Time
// Used to block cache from expiring assumedPod if binding still runs
bindingFinished bool
}
func newSchedulerCache(ttl, period time.Duration, stop <-chan struct{}) *schedulerCache {
return &schedulerCache{
ttl: ttl,
period: period,
stop: stop,
nodes: make(map[string]*NodeInfo),
assumedPods: make(map[string]bool),
podStates: make(map[string]*podState),
pdbs: make(map[string]*policy.PodDisruptionBudget),
}
}
func (cache *schedulerCache) UpdateNodeNameToInfoMap(nodeNameToInfo map[string]*NodeInfo) error {
cache.mu.Lock()
defer cache.mu.Unlock()
for name, info := range cache.nodes {
if current, ok := nodeNameToInfo[name]; !ok || current.generation != info.generation {
nodeNameToInfo[name] = info.Clone()
}
}
for name := range nodeNameToInfo {
if _, ok := cache.nodes[name]; !ok {
delete(nodeNameToInfo, name)
}
}
return nil
}
func (cache *schedulerCache) List(selector labels.Selector) ([]*v1.Pod, error) {
alwaysTrue := func(p *v1.Pod) bool { return true }
return cache.FilteredList(alwaysTrue, selector)
}
func (cache *schedulerCache) FilteredList(podFilter PodFilter, selector labels.Selector) ([]*v1.Pod, error) {
cache.mu.Lock()
defer cache.mu.Unlock()
// podFilter is expected to return true for most or all of the pods. We
// can avoid expensive array growth without wasting too much memory by
// pre-allocating capacity.
maxSize := 0
for _, info := range cache.nodes {
maxSize += len(info.pods)
}
pods := make([]*v1.Pod, 0, maxSize)
for _, info := range cache.nodes {
for _, pod := range info.pods {
if podFilter(pod) && selector.Matches(labels.Set(pod.Labels)) {
pods = append(pods, pod)
}
}
}
return pods, nil
}
func (cache *schedulerCache) AssumePod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
if _, ok := cache.podStates[key]; ok {
return fmt.Errorf("pod %v is in the cache, so can't be assumed", key)
}
cache.addPod(pod)
ps := &podState{
pod: pod,
}
cache.podStates[key] = ps
cache.assumedPods[key] = true
return nil
}
func (cache *schedulerCache) FinishBinding(pod *v1.Pod) error {
return cache.finishBinding(pod, time.Now())
}
// finishBinding exists to make tests determinitistic by injecting now as an argument
func (cache *schedulerCache) finishBinding(pod *v1.Pod, now time.Time) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
glog.V(5).Infof("Finished binding for pod %v. Can be expired.", key)
currState, ok := cache.podStates[key]
if ok && cache.assumedPods[key] {
dl := now.Add(cache.ttl)
currState.bindingFinished = true
currState.deadline = &dl
}
return nil
}
func (cache *schedulerCache) ForgetPod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
if ok && currState.pod.Spec.NodeName != pod.Spec.NodeName {
return fmt.Errorf("pod %v was assumed on %v but assigned to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
}
switch {
// Only assumed pod can be forgotten.
case ok && cache.assumedPods[key]:
err := cache.removePod(pod)
if err != nil {
return err
}
delete(cache.assumedPods, key)
delete(cache.podStates, key)
default:
return fmt.Errorf("pod %v wasn't assumed so cannot be forgotten", key)
}
return nil
}
// Assumes that lock is already acquired.
func (cache *schedulerCache) addPod(pod *v1.Pod) {
n, ok := cache.nodes[pod.Spec.NodeName]
if !ok {
n = NewNodeInfo()
cache.nodes[pod.Spec.NodeName] = n
}
n.AddPod(pod)
}
// Assumes that lock is already acquired.
func (cache *schedulerCache) updatePod(oldPod, newPod *v1.Pod) error {
if err := cache.removePod(oldPod); err != nil {
return err
}
cache.addPod(newPod)
return nil
}
// Assumes that lock is already acquired.
func (cache *schedulerCache) removePod(pod *v1.Pod) error {
n := cache.nodes[pod.Spec.NodeName]
if err := n.RemovePod(pod); err != nil {
return err
}
if len(n.pods) == 0 && n.node == nil {
delete(cache.nodes, pod.Spec.NodeName)
}
return nil
}
func (cache *schedulerCache) AddPod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
switch {
case ok && cache.assumedPods[key]:
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
// The pod was added to a different node than it was assumed to.
glog.Warningf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
// Clean this up.
cache.removePod(currState.pod)
cache.addPod(pod)
}
delete(cache.assumedPods, key)
cache.podStates[key].deadline = nil
cache.podStates[key].pod = pod
case !ok:
// Pod was expired. We should add it back.
cache.addPod(pod)
ps := &podState{
pod: pod,
}
cache.podStates[key] = ps
default:
return fmt.Errorf("pod %v was already in added state", key)
}
return nil
}
func (cache *schedulerCache) UpdatePod(oldPod, newPod *v1.Pod) error {
key, err := getPodKey(oldPod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
switch {
// An assumed pod won't have Update/Remove event. It needs to have Add event
// before Update event, in which case the state would change from Assumed to Added.
case ok && !cache.assumedPods[key]:
if currState.pod.Spec.NodeName != newPod.Spec.NodeName {
glog.Errorf("Pod %v updated on a different node than previously added to.", key)
glog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
}
if err := cache.updatePod(oldPod, newPod); err != nil {
return err
}
default:
return fmt.Errorf("pod %v is not added to scheduler cache, so cannot be updated", key)
}
return nil
}
func (cache *schedulerCache) RemovePod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
switch {
// An assumed pod won't have Delete/Remove event. It needs to have Add event
// before Remove event, in which case the state would change from Assumed to Added.
case ok && !cache.assumedPods[key]:
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
glog.Errorf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
glog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
}
err := cache.removePod(currState.pod)
if err != nil {
return err
}
delete(cache.podStates, key)
default:
return fmt.Errorf("pod %v is not found in scheduler cache, so cannot be removed from it", key)
}
return nil
}
func (cache *schedulerCache) IsAssumedPod(pod *v1.Pod) (bool, error) {
key, err := getPodKey(pod)
if err != nil {
return false, err
}
cache.mu.Lock()
defer cache.mu.Unlock()
b, found := cache.assumedPods[key]
if !found {
return false, nil
}
return b, nil
}
func (cache *schedulerCache) GetPod(pod *v1.Pod) (*v1.Pod, error) {
key, err := getPodKey(pod)
if err != nil {
return nil, err
}
cache.mu.Lock()
defer cache.mu.Unlock()
podState, ok := cache.podStates[key]
if !ok {
return nil, fmt.Errorf("pod %v does not exist in scheduler cache", key)
}
return podState.pod, nil
}
func (cache *schedulerCache) AddNode(node *v1.Node) error {
cache.mu.Lock()
defer cache.mu.Unlock()
n, ok := cache.nodes[node.Name]
if !ok {
n = NewNodeInfo()
cache.nodes[node.Name] = n
}
return n.SetNode(node)
}
func (cache *schedulerCache) UpdateNode(oldNode, newNode *v1.Node) error {
cache.mu.Lock()
defer cache.mu.Unlock()
n, ok := cache.nodes[newNode.Name]
if !ok {
n = NewNodeInfo()
cache.nodes[newNode.Name] = n
}
return n.SetNode(newNode)
}
func (cache *schedulerCache) RemoveNode(node *v1.Node) error {
cache.mu.Lock()
defer cache.mu.Unlock()
n := cache.nodes[node.Name]
if err := n.RemoveNode(node); err != nil {
return err
}
// We remove NodeInfo for this node only if there aren't any pods on this node.
// We can't do it unconditionally, because notifications about pods are delivered
// in a different watch, and thus can potentially be observed later, even though
// they happened before node removal.
if len(n.pods) == 0 && n.node == nil {
delete(cache.nodes, node.Name)
}
return nil
}
func (cache *schedulerCache) AddPDB(pdb *policy.PodDisruptionBudget) error {
cache.mu.Lock()
defer cache.mu.Unlock()
// Unconditionally update cache.
cache.pdbs[pdb.Name] = pdb
return nil
}
func (cache *schedulerCache) UpdatePDB(oldPDB, newPDB *policy.PodDisruptionBudget) error {
return cache.AddPDB(newPDB)
}
func (cache *schedulerCache) RemovePDB(pdb *policy.PodDisruptionBudget) error {
cache.mu.Lock()
defer cache.mu.Unlock()
delete(cache.pdbs, pdb.Name)
return nil
}
func (cache *schedulerCache) ListPDBs(selector labels.Selector) ([]*policy.PodDisruptionBudget, error) {
cache.mu.Lock()
defer cache.mu.Unlock()
var pdbs []*policy.PodDisruptionBudget
for _, pdb := range cache.pdbs {
if selector.Matches(labels.Set(pdb.Labels)) {
pdbs = append(pdbs, pdb)
}
}
return pdbs, nil
}
func (cache *schedulerCache) run() {
go wait.Until(cache.cleanupExpiredAssumedPods, cache.period, cache.stop)
}
func (cache *schedulerCache) cleanupExpiredAssumedPods() {
cache.cleanupAssumedPods(time.Now())
}
// cleanupAssumedPods exists for making test deterministic by taking time as input argument.
func (cache *schedulerCache) cleanupAssumedPods(now time.Time) {
cache.mu.Lock()
defer cache.mu.Unlock()
// The size of assumedPods should be small
for key := range cache.assumedPods {
ps, ok := cache.podStates[key]
if !ok {
panic("Key found in assumed set but not in podStates. Potentially a logical error.")
}
if !ps.bindingFinished {
glog.V(3).Infof("Couldn't expire cache for pod %v/%v. Binding is still in progress.",
ps.pod.Namespace, ps.pod.Name)
continue
}
if now.After(*ps.deadline) {
glog.Warningf("Pod %s/%s expired", ps.pod.Namespace, ps.pod.Name)
if err := cache.expirePod(key, ps); err != nil {
glog.Errorf("ExpirePod failed for %s: %v", key, err)
}
}
}
}
func (cache *schedulerCache) expirePod(key string, ps *podState) error {
if err := cache.removePod(ps.pod); err != nil {
return err
}
delete(cache.assumedPods, key)
delete(cache.podStates, key)
return nil
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,122 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import (
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/labels"
)
// PodFilter is a function to filter a pod. If pod passed return true else return false.
type PodFilter func(*v1.Pod) bool
// Cache collects pods' information and provides node-level aggregated information.
// It's intended for generic scheduler to do efficient lookup.
// Cache's operations are pod centric. It does incremental updates based on pod events.
// Pod events are sent via network. We don't have guaranteed delivery of all events:
// We use Reflector to list and watch from remote.
// Reflector might be slow and do a relist, which would lead to missing events.
//
// State Machine of a pod's events in scheduler's cache:
//
//
// +-------------------------------------------+ +----+
// | Add | | |
// | | | | Update
// + Assume Add v v |
//Initial +--------> Assumed +------------+---> Added <--+
// ^ + + | +
// | | | | |
// | | | Add | | Remove
// | | | | |
// | | | + |
// +----------------+ +-----------> Expired +----> Deleted
// Forget Expire
//
//
// Note that an assumed pod can expire, because if we haven't received Add event notifying us
// for a while, there might be some problems and we shouldn't keep the pod in cache anymore.
//
// Note that "Initial", "Expired", and "Deleted" pods do not actually exist in cache.
// Based on existing use cases, we are making the following assumptions:
// - No pod would be assumed twice
// - A pod could be added without going through scheduler. In this case, we will see Add but not Assume event.
// - If a pod wasn't added, it wouldn't be removed or updated.
// - Both "Expired" and "Deleted" are valid end states. In case of some problems, e.g. network issue,
// a pod might have changed its state (e.g. added and deleted) without delivering notification to the cache.
type Cache interface {
// AssumePod assumes a pod scheduled and aggregates the pod's information into its node.
// The implementation also decides the policy to expire pod before being confirmed (receiving Add event).
// After expiration, its information would be subtracted.
AssumePod(pod *v1.Pod) error
// FinishBinding signals that cache for assumed pod can be expired
FinishBinding(pod *v1.Pod) error
// ForgetPod removes an assumed pod from cache.
ForgetPod(pod *v1.Pod) error
// AddPod either confirms a pod if it's assumed, or adds it back if it's expired.
// If added back, the pod's information would be added again.
AddPod(pod *v1.Pod) error
// UpdatePod removes oldPod's information and adds newPod's information.
UpdatePod(oldPod, newPod *v1.Pod) error
// RemovePod removes a pod. The pod's information would be subtracted from assigned node.
RemovePod(pod *v1.Pod) error
// GetPod returns the pod from the cache with the same namespace and the
// same name of the specified pod.
GetPod(pod *v1.Pod) (*v1.Pod, error)
// IsAssumedPod returns true if the pod is assumed and not expired.
IsAssumedPod(pod *v1.Pod) (bool, error)
// AddNode adds overall information about node.
AddNode(node *v1.Node) error
// UpdateNode updates overall information about node.
UpdateNode(oldNode, newNode *v1.Node) error
// RemoveNode removes overall information about node.
RemoveNode(node *v1.Node) error
// AddPDB adds a PodDisruptionBudget object to the cache.
AddPDB(pdb *policy.PodDisruptionBudget) error
// UpdatePDB updates a PodDisruptionBudget object in the cache.
UpdatePDB(oldPDB, newPDB *policy.PodDisruptionBudget) error
// RemovePDB removes a PodDisruptionBudget object from the cache.
RemovePDB(pdb *policy.PodDisruptionBudget) error
// List lists all cached PDBs matching the selector.
ListPDBs(selector labels.Selector) ([]*policy.PodDisruptionBudget, error)
// UpdateNodeNameToInfoMap updates the passed infoMap to the current contents of Cache.
// The node info contains aggregated information of pods scheduled (including assumed to be)
// on this node.
UpdateNodeNameToInfoMap(infoMap map[string]*NodeInfo) error
// List lists all cached pods (including assumed ones).
List(labels.Selector) ([]*v1.Pod, error)
// FilteredList returns all cached pods that pass the filter.
FilteredList(filter PodFilter, selector labels.Selector) ([]*v1.Pod, error)
}

View File

@ -0,0 +1,514 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import (
"fmt"
"github.com/golang/glog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientcache "k8s.io/client-go/tools/cache"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/util"
)
var emptyResource = Resource{}
// NodeInfo is node level aggregated information.
type NodeInfo struct {
// Overall node information.
node *v1.Node
pods []*v1.Pod
podsWithAffinity []*v1.Pod
usedPorts util.HostPortInfo
// Total requested resource of all pods on this node.
// It includes assumed pods which scheduler sends binding to apiserver but
// didn't get it as scheduled yet.
requestedResource *Resource
nonzeroRequest *Resource
// We store allocatedResources (which is Node.Status.Allocatable.*) explicitly
// as int64, to avoid conversions and accessing map.
allocatableResource *Resource
// Cached tains of the node for faster lookup.
taints []v1.Taint
taintsErr error
// Cached conditions of node for faster lookup.
memoryPressureCondition v1.ConditionStatus
diskPressureCondition v1.ConditionStatus
// Whenever NodeInfo changes, generation is bumped.
// This is used to avoid cloning it if the object didn't change.
generation int64
}
// Resource is a collection of compute resource.
type Resource struct {
MilliCPU int64
Memory int64
NvidiaGPU int64
EphemeralStorage int64
// We store allowedPodNumber (which is Node.Status.Allocatable.Pods().Value())
// explicitly as int, to avoid conversions and improve performance.
AllowedPodNumber int
// ScalarResources
ScalarResources map[v1.ResourceName]int64
}
// NewResource creates a Resource from ResourceList
func NewResource(rl v1.ResourceList) *Resource {
r := &Resource{}
r.Add(rl)
return r
}
// Add adds ResourceList into Resource.
func (r *Resource) Add(rl v1.ResourceList) {
if r == nil {
return
}
for rName, rQuant := range rl {
switch rName {
case v1.ResourceCPU:
r.MilliCPU += rQuant.MilliValue()
case v1.ResourceMemory:
r.Memory += rQuant.Value()
case v1.ResourceNvidiaGPU:
r.NvidiaGPU += rQuant.Value()
case v1.ResourcePods:
r.AllowedPodNumber += int(rQuant.Value())
case v1.ResourceEphemeralStorage:
r.EphemeralStorage += rQuant.Value()
default:
if v1helper.IsScalarResourceName(rName) {
r.AddScalar(rName, rQuant.Value())
}
}
}
}
// ResourceList returns a resource list of this resource.
func (r *Resource) ResourceList() v1.ResourceList {
result := v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(r.MilliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(r.Memory, resource.BinarySI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(r.NvidiaGPU, resource.DecimalSI),
v1.ResourcePods: *resource.NewQuantity(int64(r.AllowedPodNumber), resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(r.EphemeralStorage, resource.BinarySI),
}
for rName, rQuant := range r.ScalarResources {
if v1helper.IsHugePageResourceName(rName) {
result[rName] = *resource.NewQuantity(rQuant, resource.BinarySI)
} else {
result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI)
}
}
return result
}
// Clone returns a copy of this resource.
func (r *Resource) Clone() *Resource {
res := &Resource{
MilliCPU: r.MilliCPU,
Memory: r.Memory,
NvidiaGPU: r.NvidiaGPU,
AllowedPodNumber: r.AllowedPodNumber,
EphemeralStorage: r.EphemeralStorage,
}
if r.ScalarResources != nil {
res.ScalarResources = make(map[v1.ResourceName]int64)
for k, v := range r.ScalarResources {
res.ScalarResources[k] = v
}
}
return res
}
// AddScalar adds a resource by a scalar value of this resource.
func (r *Resource) AddScalar(name v1.ResourceName, quantity int64) {
r.SetScalar(name, r.ScalarResources[name]+quantity)
}
// SetScalar sets a resource by a scalar value of this resource.
func (r *Resource) SetScalar(name v1.ResourceName, quantity int64) {
// Lazily allocate scalar resource map.
if r.ScalarResources == nil {
r.ScalarResources = map[v1.ResourceName]int64{}
}
r.ScalarResources[name] = quantity
}
// NewNodeInfo returns a ready to use empty NodeInfo object.
// If any pods are given in arguments, their information will be aggregated in
// the returned object.
func NewNodeInfo(pods ...*v1.Pod) *NodeInfo {
ni := &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
allocatableResource: &Resource{},
generation: 0,
usedPorts: make(util.HostPortInfo),
}
for _, pod := range pods {
ni.AddPod(pod)
}
return ni
}
// Node returns overall information about this node.
func (n *NodeInfo) Node() *v1.Node {
if n == nil {
return nil
}
return n.node
}
// Pods return all pods scheduled (including assumed to be) on this node.
func (n *NodeInfo) Pods() []*v1.Pod {
if n == nil {
return nil
}
return n.pods
}
// UsedPorts returns used ports on this node.
func (n *NodeInfo) UsedPorts() util.HostPortInfo {
if n == nil {
return nil
}
return n.usedPorts
}
// PodsWithAffinity return all pods with (anti)affinity constraints on this node.
func (n *NodeInfo) PodsWithAffinity() []*v1.Pod {
if n == nil {
return nil
}
return n.podsWithAffinity
}
// AllowedPodNumber returns the number of the allowed pods on this node.
func (n *NodeInfo) AllowedPodNumber() int {
if n == nil || n.allocatableResource == nil {
return 0
}
return n.allocatableResource.AllowedPodNumber
}
// Taints returns the taints list on this node.
func (n *NodeInfo) Taints() ([]v1.Taint, error) {
if n == nil {
return nil, nil
}
return n.taints, n.taintsErr
}
// MemoryPressureCondition returns the memory pressure condition status on this node.
func (n *NodeInfo) MemoryPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.memoryPressureCondition
}
// DiskPressureCondition returns the disk pressure condition status on this node.
func (n *NodeInfo) DiskPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.diskPressureCondition
}
// RequestedResource returns aggregated resource request of pods on this node.
func (n *NodeInfo) RequestedResource() Resource {
if n == nil {
return emptyResource
}
return *n.requestedResource
}
// NonZeroRequest returns aggregated nonzero resource request of pods on this node.
func (n *NodeInfo) NonZeroRequest() Resource {
if n == nil {
return emptyResource
}
return *n.nonzeroRequest
}
// AllocatableResource returns allocatable resources on a given node.
func (n *NodeInfo) AllocatableResource() Resource {
if n == nil {
return emptyResource
}
return *n.allocatableResource
}
// SetAllocatableResource sets the allocatableResource information of given node.
func (n *NodeInfo) SetAllocatableResource(allocatableResource *Resource) {
n.allocatableResource = allocatableResource
}
// Clone returns a copy of this node.
func (n *NodeInfo) Clone() *NodeInfo {
clone := &NodeInfo{
node: n.node,
requestedResource: n.requestedResource.Clone(),
nonzeroRequest: n.nonzeroRequest.Clone(),
allocatableResource: n.allocatableResource.Clone(),
taintsErr: n.taintsErr,
memoryPressureCondition: n.memoryPressureCondition,
diskPressureCondition: n.diskPressureCondition,
usedPorts: make(util.HostPortInfo),
generation: n.generation,
}
if len(n.pods) > 0 {
clone.pods = append([]*v1.Pod(nil), n.pods...)
}
if len(n.usedPorts) > 0 {
for k, v := range n.usedPorts {
clone.usedPorts[k] = v
}
}
if len(n.podsWithAffinity) > 0 {
clone.podsWithAffinity = append([]*v1.Pod(nil), n.podsWithAffinity...)
}
if len(n.taints) > 0 {
clone.taints = append([]v1.Taint(nil), n.taints...)
}
return clone
}
// String returns representation of human readable format of this NodeInfo.
func (n *NodeInfo) String() string {
podKeys := make([]string, len(n.pods))
for i, pod := range n.pods {
podKeys[i] = pod.Name
}
return fmt.Sprintf("&NodeInfo{Pods:%v, RequestedResource:%#v, NonZeroRequest: %#v, UsedPort: %#v, AllocatableResource:%#v}",
podKeys, n.requestedResource, n.nonzeroRequest, n.usedPorts, n.allocatableResource)
}
func hasPodAffinityConstraints(pod *v1.Pod) bool {
affinity := pod.Spec.Affinity
return affinity != nil && (affinity.PodAffinity != nil || affinity.PodAntiAffinity != nil)
}
// AddPod adds pod information to this NodeInfo.
func (n *NodeInfo) AddPod(pod *v1.Pod) {
res, non0CPU, non0Mem := calculateResource(pod)
n.requestedResource.MilliCPU += res.MilliCPU
n.requestedResource.Memory += res.Memory
n.requestedResource.NvidiaGPU += res.NvidiaGPU
n.requestedResource.EphemeralStorage += res.EphemeralStorage
if n.requestedResource.ScalarResources == nil && len(res.ScalarResources) > 0 {
n.requestedResource.ScalarResources = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.ScalarResources {
n.requestedResource.ScalarResources[rName] += rQuant
}
n.nonzeroRequest.MilliCPU += non0CPU
n.nonzeroRequest.Memory += non0Mem
n.pods = append(n.pods, pod)
if hasPodAffinityConstraints(pod) {
n.podsWithAffinity = append(n.podsWithAffinity, pod)
}
// Consume ports when pods added.
n.updateUsedPorts(pod, true)
n.generation++
}
// RemovePod subtracts pod information from this NodeInfo.
func (n *NodeInfo) RemovePod(pod *v1.Pod) error {
k1, err := getPodKey(pod)
if err != nil {
return err
}
for i := range n.podsWithAffinity {
k2, err := getPodKey(n.podsWithAffinity[i])
if err != nil {
glog.Errorf("Cannot get pod key, err: %v", err)
continue
}
if k1 == k2 {
// delete the element
n.podsWithAffinity[i] = n.podsWithAffinity[len(n.podsWithAffinity)-1]
n.podsWithAffinity = n.podsWithAffinity[:len(n.podsWithAffinity)-1]
break
}
}
for i := range n.pods {
k2, err := getPodKey(n.pods[i])
if err != nil {
glog.Errorf("Cannot get pod key, err: %v", err)
continue
}
if k1 == k2 {
// delete the element
n.pods[i] = n.pods[len(n.pods)-1]
n.pods = n.pods[:len(n.pods)-1]
// reduce the resource data
res, non0CPU, non0Mem := calculateResource(pod)
n.requestedResource.MilliCPU -= res.MilliCPU
n.requestedResource.Memory -= res.Memory
n.requestedResource.NvidiaGPU -= res.NvidiaGPU
n.requestedResource.EphemeralStorage -= res.EphemeralStorage
if len(res.ScalarResources) > 0 && n.requestedResource.ScalarResources == nil {
n.requestedResource.ScalarResources = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.ScalarResources {
n.requestedResource.ScalarResources[rName] -= rQuant
}
n.nonzeroRequest.MilliCPU -= non0CPU
n.nonzeroRequest.Memory -= non0Mem
// Release ports when remove Pods.
n.updateUsedPorts(pod, false)
n.generation++
return nil
}
}
return fmt.Errorf("no corresponding pod %s in pods of node %s", pod.Name, n.node.Name)
}
func calculateResource(pod *v1.Pod) (res Resource, non0CPU int64, non0Mem int64) {
resPtr := &res
for _, c := range pod.Spec.Containers {
resPtr.Add(c.Resources.Requests)
non0CPUReq, non0MemReq := priorityutil.GetNonzeroRequests(&c.Resources.Requests)
non0CPU += non0CPUReq
non0Mem += non0MemReq
// No non-zero resources for GPUs or opaque resources.
}
return
}
func (n *NodeInfo) updateUsedPorts(pod *v1.Pod, add bool) {
for j := range pod.Spec.Containers {
container := &pod.Spec.Containers[j]
for k := range container.Ports {
podPort := &container.Ports[k]
if add {
n.usedPorts.Add(podPort.HostIP, string(podPort.Protocol), podPort.HostPort)
} else {
n.usedPorts.Remove(podPort.HostIP, string(podPort.Protocol), podPort.HostPort)
}
}
}
}
// SetNode sets the overall node information.
func (n *NodeInfo) SetNode(node *v1.Node) error {
n.node = node
n.allocatableResource = NewResource(node.Status.Allocatable)
n.taints = node.Spec.Taints
for i := range node.Status.Conditions {
cond := &node.Status.Conditions[i]
switch cond.Type {
case v1.NodeMemoryPressure:
n.memoryPressureCondition = cond.Status
case v1.NodeDiskPressure:
n.diskPressureCondition = cond.Status
default:
// We ignore other conditions.
}
}
n.generation++
return nil
}
// RemoveNode removes the overall information about the node.
func (n *NodeInfo) RemoveNode(node *v1.Node) error {
// We don't remove NodeInfo for because there can still be some pods on this node -
// this is because notifications about pods are delivered in a different watch,
// and thus can potentially be observed later, even though they happened before
// node removal. This is handled correctly in cache.go file.
n.node = nil
n.allocatableResource = &Resource{}
n.taints, n.taintsErr = nil, nil
n.memoryPressureCondition = v1.ConditionUnknown
n.diskPressureCondition = v1.ConditionUnknown
n.generation++
return nil
}
// FilterOutPods receives a list of pods and filters out those whose node names
// are equal to the node of this NodeInfo, but are not found in the pods of this NodeInfo.
//
// Preemption logic simulates removal of pods on a node by removing them from the
// corresponding NodeInfo. In order for the simulation to work, we call this method
// on the pods returned from SchedulerCache, so that predicate functions see
// only the pods that are not removed from the NodeInfo.
func (n *NodeInfo) FilterOutPods(pods []*v1.Pod) []*v1.Pod {
node := n.Node()
if node == nil {
return pods
}
filtered := make([]*v1.Pod, 0, len(pods))
for _, p := range pods {
if p.Spec.NodeName == node.Name {
// If pod is on the given node, add it to 'filtered' only if it is present in nodeInfo.
podKey, _ := getPodKey(p)
for _, np := range n.Pods() {
npodkey, _ := getPodKey(np)
if npodkey == podKey {
filtered = append(filtered, p)
break
}
}
} else {
filtered = append(filtered, p)
}
}
return filtered
}
// getPodKey returns the string key of a pod.
func getPodKey(pod *v1.Pod) (string, error) {
return clientcache.MetaNamespaceKeyFunc(pod)
}
// Filter implements PodFilter interface. It returns false only if the pod node name
// matches NodeInfo.node and the pod is not found in the pods list. Otherwise,
// returns true.
func (n *NodeInfo) Filter(pod *v1.Pod) bool {
if pod.Spec.NodeName != n.node.Name {
return true
}
for _, p := range n.pods {
if p.Name == pod.Name && p.Namespace == pod.Namespace {
return true
}
}
return false
}

View File

@ -0,0 +1,813 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import (
"fmt"
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/scheduler/util"
)
func TestNewResource(t *testing.T) {
tests := []struct {
resourceList v1.ResourceList
expected *Resource
}{
{
resourceList: map[v1.ResourceName]resource.Quantity{},
expected: &Resource{},
},
{
resourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(4, -3),
v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(1000, resource.DecimalSI),
v1.ResourcePods: *resource.NewQuantity(80, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
"scalar.test/" + "scalar1": *resource.NewQuantity(1, resource.DecimalSI),
v1.ResourceHugePagesPrefix + "test": *resource.NewQuantity(2, resource.BinarySI),
},
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
},
},
}
for _, test := range tests {
r := NewResource(test.resourceList)
if !reflect.DeepEqual(test.expected, r) {
t.Errorf("expected: %#v, got: %#v", test.expected, r)
}
}
}
func TestResourceList(t *testing.T) {
tests := []struct {
resource *Resource
expected v1.ResourceList
}{
{
resource: &Resource{},
expected: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(0, -3),
v1.ResourceMemory: *resource.NewQuantity(0, resource.BinarySI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourcePods: *resource.NewQuantity(0, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(0, resource.BinarySI),
},
},
{
resource: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
},
expected: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(4, -3),
v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(1000, resource.DecimalSI),
v1.ResourcePods: *resource.NewQuantity(80, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
"scalar.test/" + "scalar1": *resource.NewQuantity(1, resource.DecimalSI),
v1.ResourceHugePagesPrefix + "test": *resource.NewQuantity(2, resource.BinarySI),
},
},
}
for _, test := range tests {
rl := test.resource.ResourceList()
if !reflect.DeepEqual(test.expected, rl) {
t.Errorf("expected: %#v, got: %#v", test.expected, rl)
}
}
}
func TestResourceClone(t *testing.T) {
tests := []struct {
resource *Resource
expected *Resource
}{
{
resource: &Resource{},
expected: &Resource{},
},
{
resource: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
},
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
},
},
}
for _, test := range tests {
r := test.resource.Clone()
// Modify the field to check if the result is a clone of the origin one.
test.resource.MilliCPU += 1000
if !reflect.DeepEqual(test.expected, r) {
t.Errorf("expected: %#v, got: %#v", test.expected, r)
}
}
}
func TestResourceAddScalar(t *testing.T) {
tests := []struct {
resource *Resource
scalarName v1.ResourceName
scalarQuantity int64
expected *Resource
}{
{
resource: &Resource{},
scalarName: "scalar1",
scalarQuantity: 100,
expected: &Resource{
ScalarResources: map[v1.ResourceName]int64{"scalar1": 100},
},
},
{
resource: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"hugepages-test": 2},
},
scalarName: "scalar2",
scalarQuantity: 200,
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"hugepages-test": 2, "scalar2": 200},
},
},
}
for _, test := range tests {
test.resource.AddScalar(test.scalarName, test.scalarQuantity)
if !reflect.DeepEqual(test.expected, test.resource) {
t.Errorf("expected: %#v, got: %#v", test.expected, test.resource)
}
}
}
func TestNewNodeInfo(t *testing.T) {
nodeName := "test-node"
pods := []*v1.Pod{
makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}),
makeBasePod(t, nodeName, "test-2", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}),
}
expected := &NodeInfo{
requestedResource: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
nonzeroRequest: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 80}: {},
{Protocol: "TCP", Port: 8080}: {},
},
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
}
ni := NewNodeInfo(pods...)
if !reflect.DeepEqual(expected, ni) {
t.Errorf("expected: %#v, got: %#v", expected, ni)
}
}
func TestNodeInfoClone(t *testing.T) {
nodeName := "test-node"
tests := []struct {
nodeInfo *NodeInfo
expected *NodeInfo
}{
{
nodeInfo: &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 80}: {},
{Protocol: "TCP", Port: 8080}: {},
},
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
},
expected: &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 80}: {},
{Protocol: "TCP", Port: 8080}: {},
},
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
},
},
}
for _, test := range tests {
ni := test.nodeInfo.Clone()
// Modify the field to check if the result is a clone of the origin one.
test.nodeInfo.generation += 10
if !reflect.DeepEqual(test.expected, ni) {
t.Errorf("expected: %#v, got: %#v", test.expected, ni)
}
}
}
func TestNodeInfoAddPod(t *testing.T) {
nodeName := "test-node"
pods := []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
}
expected := &NodeInfo{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
},
requestedResource: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
nonzeroRequest: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 80}: {},
{Protocol: "TCP", Port: 8080}: {},
},
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
}
ni := fakeNodeInfo()
for _, pod := range pods {
ni.AddPod(pod)
}
if !reflect.DeepEqual(expected, ni) {
t.Errorf("expected: %#v, got: %#v", expected, ni)
}
}
func TestNodeInfoRemovePod(t *testing.T) {
nodeName := "test-node"
pods := []*v1.Pod{
makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}),
makeBasePod(t, nodeName, "test-2", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}),
}
tests := []struct {
pod *v1.Pod
errExpected bool
expectedNodeInfo *NodeInfo
}{
{
pod: makeBasePod(t, nodeName, "non-exist", "0", "0", "", []v1.ContainerPort{{}}),
errExpected: true,
expectedNodeInfo: &NodeInfo{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
},
requestedResource: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
nonzeroRequest: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 80}: {},
{Protocol: "TCP", Port: 8080}: {},
},
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
},
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
errExpected: false,
expectedNodeInfo: &NodeInfo{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
},
requestedResource: &Resource{
MilliCPU: 200,
Memory: 1024,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
nonzeroRequest: &Resource{
MilliCPU: 200,
Memory: 1024,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
allocatableResource: &Resource{},
generation: 3,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 8080}: {},
},
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
},
},
}
for _, test := range tests {
ni := fakeNodeInfo(pods...)
err := ni.RemovePod(test.pod)
if err != nil {
if test.errExpected {
expectedErrorMsg := fmt.Errorf("no corresponding pod %s in pods of node %s", test.pod.Name, ni.node.Name)
if expectedErrorMsg == err {
t.Errorf("expected error: %v, got: %v", expectedErrorMsg, err)
}
} else {
t.Errorf("expected no error, got: %v", err)
}
}
if !reflect.DeepEqual(test.expectedNodeInfo, ni) {
t.Errorf("expected: %#v, got: %#v", test.expectedNodeInfo, ni)
}
}
}
func fakeNodeInfo(pods ...*v1.Pod) *NodeInfo {
ni := NewNodeInfo(pods...)
ni.node = &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
}
return ni
}

View File

@ -0,0 +1,39 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import "k8s.io/api/core/v1"
// CreateNodeNameToInfoMap obtains a list of pods and pivots that list into a map where the keys are node names
// and the values are the aggregated information for that node.
func CreateNodeNameToInfoMap(pods []*v1.Pod, nodes []*v1.Node) map[string]*NodeInfo {
nodeNameToInfo := make(map[string]*NodeInfo)
for _, pod := range pods {
nodeName := pod.Spec.NodeName
if _, ok := nodeNameToInfo[nodeName]; !ok {
nodeNameToInfo[nodeName] = NewNodeInfo()
}
nodeNameToInfo[nodeName].AddPod(pod)
}
for _, node := range nodes {
if _, ok := nodeNameToInfo[node.Name]; !ok {
nodeNameToInfo[node.Name] = NewNodeInfo()
}
nodeNameToInfo[node.Name].SetNode(node)
}
return nodeNameToInfo
}

Some files were not shown because too many files have changed in this diff Show More