vendor update for E2E framework

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
Madhu Rajanna
2019-05-31 15:15:11 +05:30
parent 9bb23e4e32
commit d300da19b7
2149 changed files with 598692 additions and 14107 deletions

View File

@ -0,0 +1,19 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package algorithm contains a generic Scheduler interface and several
// implementations.
package algorithm // import "k8s.io/kubernetes/pkg/scheduler/algorithm"

View File

@ -0,0 +1,156 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/features"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
// CSIMaxVolumeLimitChecker defines predicate needed for counting CSI volumes
type CSIMaxVolumeLimitChecker struct {
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
}
// NewCSIMaxVolumeLimitPredicate returns a predicate for counting CSI volumes
func NewCSIMaxVolumeLimitPredicate(
pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) FitPredicate {
c := &CSIMaxVolumeLimitChecker{
pvInfo: pvInfo,
pvcInfo: pvcInfo,
}
return c.attachableLimitPredicate
}
func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
pod *v1.Pod, meta PredicateMetadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
// if feature gate is disable we return
if !utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
return true, nil, nil
}
// If a pod doesn't have any volume attached to it, the predicate will always be true.
// Thus we make a fast path for it, to avoid unnecessary computations in this case.
if len(pod.Spec.Volumes) == 0 {
return true, nil, nil
}
nodeVolumeLimits := nodeInfo.VolumeLimits()
// if node does not have volume limits this predicate should exit
if len(nodeVolumeLimits) == 0 {
return true, nil, nil
}
// a map of unique volume name/csi volume handle and volume limit key
newVolumes := make(map[string]string)
if err := c.filterAttachableVolumes(pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
return false, nil, err
}
if len(newVolumes) == 0 {
return true, nil, nil
}
// a map of unique volume name/csi volume handle and volume limit key
attachedVolumes := make(map[string]string)
for _, existingPod := range nodeInfo.Pods() {
if err := c.filterAttachableVolumes(existingPod.Spec.Volumes, existingPod.Namespace, attachedVolumes); err != nil {
return false, nil, err
}
}
newVolumeCount := map[string]int{}
attachedVolumeCount := map[string]int{}
for volumeName, volumeLimitKey := range attachedVolumes {
if _, ok := newVolumes[volumeName]; ok {
delete(newVolumes, volumeName)
}
attachedVolumeCount[volumeLimitKey]++
}
for _, volumeLimitKey := range newVolumes {
newVolumeCount[volumeLimitKey]++
}
for volumeLimitKey, count := range newVolumeCount {
maxVolumeLimit, ok := nodeVolumeLimits[v1.ResourceName(volumeLimitKey)]
if ok {
currentVolumeCount := attachedVolumeCount[volumeLimitKey]
if currentVolumeCount+count > int(maxVolumeLimit) {
return false, []PredicateFailureReason{ErrMaxVolumeCountExceeded}, nil
}
}
}
return true, nil, nil
}
func (c *CSIMaxVolumeLimitChecker) filterAttachableVolumes(
volumes []v1.Volume, namespace string, result map[string]string) error {
for _, vol := range volumes {
// CSI volumes can only be used as persistent volumes
if vol.PersistentVolumeClaim == nil {
continue
}
pvcName := vol.PersistentVolumeClaim.ClaimName
if pvcName == "" {
return fmt.Errorf("PersistentVolumeClaim had no name")
}
pvc, err := c.pvcInfo.GetPersistentVolumeClaimInfo(namespace, pvcName)
if err != nil {
klog.V(4).Infof("Unable to look up PVC info for %s/%s", namespace, pvcName)
continue
}
pvName := pvc.Spec.VolumeName
// TODO - the actual handling of unbound PVCs will be fixed by late binding design.
if pvName == "" {
klog.V(4).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName)
continue
}
pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName)
if err != nil {
klog.V(4).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
continue
}
csiSource := pv.Spec.PersistentVolumeSource.CSI
if csiSource == nil {
klog.V(4).Infof("Not considering non-CSI volume %s/%s", namespace, pvcName)
continue
}
driverName := csiSource.Driver
volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
result[csiSource.VolumeHandle] = volumeLimitKey
}
return nil
}

View File

@ -0,0 +1,155 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
)
var (
// The predicateName tries to be consistent as the predicate name used in DefaultAlgorithmProvider defined in
// defaults.go (which tend to be stable for backward compatibility)
// NOTE: If you add a new predicate failure error for a predicate that can never
// be made to pass by removing pods, or you change an existing predicate so that
// it can never be made to pass by removing pods, you need to add the predicate
// failure error in nodesWherePreemptionMightHelp() in scheduler/core/generic_scheduler.go
// ErrDiskConflict is used for NoDiskConflict predicate error.
ErrDiskConflict = newPredicateFailureError("NoDiskConflict", "node(s) had no available disk")
// ErrVolumeZoneConflict is used for NoVolumeZoneConflict predicate error.
ErrVolumeZoneConflict = newPredicateFailureError("NoVolumeZoneConflict", "node(s) had no available volume zone")
// ErrNodeSelectorNotMatch is used for MatchNodeSelector predicate error.
ErrNodeSelectorNotMatch = newPredicateFailureError("MatchNodeSelector", "node(s) didn't match node selector")
// ErrPodAffinityNotMatch is used for MatchInterPodAffinity predicate error.
ErrPodAffinityNotMatch = newPredicateFailureError("MatchInterPodAffinity", "node(s) didn't match pod affinity/anti-affinity")
// ErrPodAffinityRulesNotMatch is used for PodAffinityRulesNotMatch predicate error.
ErrPodAffinityRulesNotMatch = newPredicateFailureError("PodAffinityRulesNotMatch", "node(s) didn't match pod affinity rules")
// ErrPodAntiAffinityRulesNotMatch is used for PodAntiAffinityRulesNotMatch predicate error.
ErrPodAntiAffinityRulesNotMatch = newPredicateFailureError("PodAntiAffinityRulesNotMatch", "node(s) didn't match pod anti-affinity rules")
// ErrExistingPodsAntiAffinityRulesNotMatch is used for ExistingPodsAntiAffinityRulesNotMatch predicate error.
ErrExistingPodsAntiAffinityRulesNotMatch = newPredicateFailureError("ExistingPodsAntiAffinityRulesNotMatch", "node(s) didn't satisfy existing pods anti-affinity rules")
// ErrTaintsTolerationsNotMatch is used for PodToleratesNodeTaints predicate error.
ErrTaintsTolerationsNotMatch = newPredicateFailureError("PodToleratesNodeTaints", "node(s) had taints that the pod didn't tolerate")
// ErrPodNotMatchHostName is used for HostName predicate error.
ErrPodNotMatchHostName = newPredicateFailureError("HostName", "node(s) didn't match the requested hostname")
// ErrPodNotFitsHostPorts is used for PodFitsHostPorts predicate error.
ErrPodNotFitsHostPorts = newPredicateFailureError("PodFitsHostPorts", "node(s) didn't have free ports for the requested pod ports")
// ErrNodeLabelPresenceViolated is used for CheckNodeLabelPresence predicate error.
ErrNodeLabelPresenceViolated = newPredicateFailureError("CheckNodeLabelPresence", "node(s) didn't have the requested labels")
// ErrServiceAffinityViolated is used for CheckServiceAffinity predicate error.
ErrServiceAffinityViolated = newPredicateFailureError("CheckServiceAffinity", "node(s) didn't match service affinity")
// ErrMaxVolumeCountExceeded is used for MaxVolumeCount predicate error.
ErrMaxVolumeCountExceeded = newPredicateFailureError("MaxVolumeCount", "node(s) exceed max volume count")
// ErrNodeUnderMemoryPressure is used for NodeUnderMemoryPressure predicate error.
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure", "node(s) had memory pressure")
// ErrNodeUnderDiskPressure is used for NodeUnderDiskPressure predicate error.
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure", "node(s) had disk pressure")
// ErrNodeUnderPIDPressure is used for NodeUnderPIDPressure predicate error.
ErrNodeUnderPIDPressure = newPredicateFailureError("NodeUnderPIDPressure", "node(s) had pid pressure")
// ErrNodeNotReady is used for NodeNotReady predicate error.
ErrNodeNotReady = newPredicateFailureError("NodeNotReady", "node(s) were not ready")
// ErrNodeNetworkUnavailable is used for NodeNetworkUnavailable predicate error.
ErrNodeNetworkUnavailable = newPredicateFailureError("NodeNetworkUnavailable", "node(s) had unavailable network")
// ErrNodeUnschedulable is used for NodeUnschedulable predicate error.
ErrNodeUnschedulable = newPredicateFailureError("NodeUnschedulable", "node(s) were unschedulable")
// ErrNodeUnknownCondition is used for NodeUnknownCondition predicate error.
ErrNodeUnknownCondition = newPredicateFailureError("NodeUnknownCondition", "node(s) had unknown conditions")
// ErrVolumeNodeConflict is used for VolumeNodeAffinityConflict predicate error.
ErrVolumeNodeConflict = newPredicateFailureError("VolumeNodeAffinityConflict", "node(s) had volume node affinity conflict")
// ErrVolumeBindConflict is used for VolumeBindingNoMatch predicate error.
ErrVolumeBindConflict = newPredicateFailureError("VolumeBindingNoMatch", "node(s) didn't find available persistent volumes to bind")
// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
// as ErrFakePredicate.
ErrFakePredicate = newPredicateFailureError("FakePredicateError", "Nodes failed the fake predicate")
)
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {
// resourceName is the name of the resource that is insufficient
ResourceName v1.ResourceName
requested int64
used int64
capacity int64
}
// NewInsufficientResourceError returns an InsufficientResourceError.
func NewInsufficientResourceError(resourceName v1.ResourceName, requested, used, capacity int64) *InsufficientResourceError {
return &InsufficientResourceError{
ResourceName: resourceName,
requested: requested,
used: used,
capacity: capacity,
}
}
func (e *InsufficientResourceError) Error() string {
return fmt.Sprintf("Node didn't have enough resource: %s, requested: %d, used: %d, capacity: %d",
e.ResourceName, e.requested, e.used, e.capacity)
}
// GetReason returns the reason of the InsufficientResourceError.
func (e *InsufficientResourceError) GetReason() string {
return fmt.Sprintf("Insufficient %v", e.ResourceName)
}
// GetInsufficientAmount returns the amount of the insufficient resource of the error.
func (e *InsufficientResourceError) GetInsufficientAmount() int64 {
return e.requested - (e.capacity - e.used)
}
// PredicateFailureError describes a failure error of predicate.
type PredicateFailureError struct {
PredicateName string
PredicateDesc string
}
func newPredicateFailureError(predicateName, predicateDesc string) *PredicateFailureError {
return &PredicateFailureError{PredicateName: predicateName, PredicateDesc: predicateDesc}
}
func (e *PredicateFailureError) Error() string {
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
}
// GetReason returns the reason of the PredicateFailureError.
func (e *PredicateFailureError) GetReason() string {
return e.PredicateDesc
}
// PredicateFailureReason interface represents the failure reason of a predicate.
type PredicateFailureReason interface {
GetReason() string
}
// FailureReason describes a failure reason.
type FailureReason struct {
reason string
}
// NewFailureReason creates a FailureReason with message.
func NewFailureReason(msg string) *FailureReason {
return &FailureReason{reason: msg}
}
// GetReason returns the reason of the FailureReason.
func (e *FailureReason) GetReason() string {
return e.reason
}

View File

@ -0,0 +1,542 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"context"
"fmt"
"sync"
"k8s.io/klog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
// PredicateMetadata interface represents anything that can access a predicate metadata.
type PredicateMetadata interface {
ShallowCopy() PredicateMetadata
AddPod(addedPod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) error
RemovePod(deletedPod *v1.Pod) error
}
// PredicateMetadataProducer is a function that computes predicate metadata for a given pod.
type PredicateMetadataProducer func(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo) PredicateMetadata
// PredicateMetadataFactory defines a factory of predicate metadata.
type PredicateMetadataFactory struct {
podLister algorithm.PodLister
}
// AntiAffinityTerm's topology key value used in predicate metadata
type topologyPair struct {
key string
value string
}
// Note that predicateMetadata and matchingPodAntiAffinityTerm need to be declared in the same file
// due to the way declarations are processed in predicate declaration unit tests.
type matchingPodAntiAffinityTerm struct {
term *v1.PodAffinityTerm
node *v1.Node
}
type podSet map[*v1.Pod]struct{}
type topologyPairSet map[topologyPair]struct{}
// topologyPairsMaps keeps topologyPairToAntiAffinityPods and antiAffinityPodToTopologyPairs in sync
// as they are the inverse of each others.
type topologyPairsMaps struct {
topologyPairToPods map[topologyPair]podSet
podToTopologyPairs map[string]topologyPairSet
}
// NOTE: When new fields are added/removed or logic is changed, please make sure that
// RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes.
type predicateMetadata struct {
pod *v1.Pod
podBestEffort bool
podRequest *schedulernodeinfo.Resource
podPorts []*v1.ContainerPort
topologyPairsAntiAffinityPodsMap *topologyPairsMaps
// A map of topology pairs to a list of Pods that can potentially match
// the affinity terms of the "pod" and its inverse.
topologyPairsPotentialAffinityPods *topologyPairsMaps
// A map of topology pairs to a list of Pods that can potentially match
// the anti-affinity terms of the "pod" and its inverse.
topologyPairsPotentialAntiAffinityPods *topologyPairsMaps
serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service
// ignoredExtendedResources is a set of extended resource names that will
// be ignored in the PodFitsResources predicate.
//
// They can be scheduler extender managed resources, the consumption of
// which should be accounted only by the extenders. This set is synthesized
// from scheduler extender configuration and does not change per pod.
ignoredExtendedResources sets.String
}
// Ensure that predicateMetadata implements algorithm.PredicateMetadata.
var _ PredicateMetadata = &predicateMetadata{}
// predicateMetadataProducer function produces predicate metadata. It is stored in a global variable below
// and used to modify the return values of PredicateMetadataProducer
type predicateMetadataProducer func(pm *predicateMetadata)
var predicateMetaProducerRegisterLock sync.Mutex
var predicateMetadataProducers = make(map[string]predicateMetadataProducer)
// RegisterPredicateMetadataProducer registers a PredicateMetadataProducer.
func RegisterPredicateMetadataProducer(predicateName string, precomp predicateMetadataProducer) {
predicateMetaProducerRegisterLock.Lock()
defer predicateMetaProducerRegisterLock.Unlock()
predicateMetadataProducers[predicateName] = precomp
}
// EmptyPredicateMetadataProducer returns a no-op MetadataProducer type.
func EmptyPredicateMetadataProducer(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo) PredicateMetadata {
return nil
}
// RegisterPredicateMetadataProducerWithExtendedResourceOptions registers a
// PredicateMetadataProducer that creates predicate metadata with the provided
// options for extended resources.
//
// See the comments in "predicateMetadata" for the explanation of the options.
func RegisterPredicateMetadataProducerWithExtendedResourceOptions(ignoredExtendedResources sets.String) {
RegisterPredicateMetadataProducer("PredicateWithExtendedResourceOptions", func(pm *predicateMetadata) {
pm.ignoredExtendedResources = ignoredExtendedResources
})
}
// NewPredicateMetadataFactory creates a PredicateMetadataFactory.
func NewPredicateMetadataFactory(podLister algorithm.PodLister) PredicateMetadataProducer {
factory := &PredicateMetadataFactory{
podLister,
}
return factory.GetMetadata
}
// GetMetadata returns the predicateMetadata used which will be used by various predicates.
func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInfoMap map[string]*schedulernodeinfo.NodeInfo) PredicateMetadata {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
// existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity
existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, nodeNameToInfoMap)
if err != nil {
return nil
}
// incomingPodAffinityMap will be used later for efficient check on incoming pod's affinity
// incomingPodAntiAffinityMap will be used later for efficient check on incoming pod's anti-affinity
incomingPodAffinityMap, incomingPodAntiAffinityMap, err := getTPMapMatchingIncomingAffinityAntiAffinity(pod, nodeNameToInfoMap)
if err != nil {
klog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
topologyPairsPotentialAffinityPods: incomingPodAffinityMap,
topologyPairsPotentialAntiAffinityPods: incomingPodAntiAffinityMap,
topologyPairsAntiAffinityPodsMap: existingPodAntiAffinityMap,
}
for predicateName, precomputeFunc := range predicateMetadataProducers {
klog.V(10).Infof("Precompute: %v", predicateName)
precomputeFunc(predicateMetadata)
}
return predicateMetadata
}
// returns a pointer to a new topologyPairsMaps
func newTopologyPairsMaps() *topologyPairsMaps {
return &topologyPairsMaps{topologyPairToPods: make(map[topologyPair]podSet),
podToTopologyPairs: make(map[string]topologyPairSet)}
}
func (topologyPairsMaps *topologyPairsMaps) addTopologyPair(pair topologyPair, pod *v1.Pod) {
podFullName := schedutil.GetPodFullName(pod)
if topologyPairsMaps.topologyPairToPods[pair] == nil {
topologyPairsMaps.topologyPairToPods[pair] = make(map[*v1.Pod]struct{})
}
topologyPairsMaps.topologyPairToPods[pair][pod] = struct{}{}
if topologyPairsMaps.podToTopologyPairs[podFullName] == nil {
topologyPairsMaps.podToTopologyPairs[podFullName] = make(map[topologyPair]struct{})
}
topologyPairsMaps.podToTopologyPairs[podFullName][pair] = struct{}{}
}
func (topologyPairsMaps *topologyPairsMaps) removePod(deletedPod *v1.Pod) {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
for pair := range topologyPairsMaps.podToTopologyPairs[deletedPodFullName] {
delete(topologyPairsMaps.topologyPairToPods[pair], deletedPod)
if len(topologyPairsMaps.topologyPairToPods[pair]) == 0 {
delete(topologyPairsMaps.topologyPairToPods, pair)
}
}
delete(topologyPairsMaps.podToTopologyPairs, deletedPodFullName)
}
func (topologyPairsMaps *topologyPairsMaps) appendMaps(toAppend *topologyPairsMaps) {
if toAppend == nil {
return
}
for pair := range toAppend.topologyPairToPods {
for pod := range toAppend.topologyPairToPods[pair] {
topologyPairsMaps.addTopologyPair(pair, pod)
}
}
}
// RemovePod changes predicateMetadata assuming that the given `deletedPod` is
// deleted from the system.
func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
if deletedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("deletedPod and meta.pod must not be the same")
}
meta.topologyPairsAntiAffinityPodsMap.removePod(deletedPod)
// Delete pod from the matching affinity or anti-affinity topology pairs maps.
meta.topologyPairsPotentialAffinityPods.removePod(deletedPod)
meta.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod)
// All pods in the serviceAffinityMatchingPodList are in the same namespace.
// So, if the namespace of the first one is not the same as the namespace of the
// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
if meta.serviceAffinityInUse &&
len(meta.serviceAffinityMatchingPodList) > 0 &&
deletedPod.Namespace == meta.serviceAffinityMatchingPodList[0].Namespace {
for i, pod := range meta.serviceAffinityMatchingPodList {
if schedutil.GetPodFullName(pod) == deletedPodFullName {
meta.serviceAffinityMatchingPodList = append(
meta.serviceAffinityMatchingPodList[:i],
meta.serviceAffinityMatchingPodList[i+1:]...)
break
}
}
}
return nil
}
// AddPod changes predicateMetadata assuming that `newPod` is added to the
// system.
func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) error {
addedPodFullName := schedutil.GetPodFullName(addedPod)
if addedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("addedPod and meta.pod must not be the same")
}
if nodeInfo.Node() == nil {
return fmt.Errorf("invalid node in nodeInfo")
}
// Add matching anti-affinity terms of the addedPod to the map.
topologyPairsMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(meta.pod, addedPod, nodeInfo.Node())
if err != nil {
return err
}
meta.topologyPairsAntiAffinityPodsMap.appendMaps(topologyPairsMaps)
// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
affinity := meta.pod.Spec.Affinity
podNodeName := addedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
podNode := nodeInfo.Node()
// It is assumed that when the added pod matches affinity of the meta.pod, all the terms must match,
// this should be changed when the implementation of targetPodMatchesAffinityOfPod/podMatchesAffinityTermProperties
// is changed
if targetPodMatchesAffinityOfPod(meta.pod, addedPod) {
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
for _, term := range affinityTerms {
if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
meta.topologyPairsPotentialAffinityPods.addTopologyPair(pair, addedPod)
}
}
}
if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) {
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
for _, term := range antiAffinityTerms {
if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
meta.topologyPairsPotentialAntiAffinityPods.addTopologyPair(pair, addedPod)
}
}
}
}
// If addedPod is in the same namespace as the meta.pod, update the list
// of matching pods if applicable.
if meta.serviceAffinityInUse && addedPod.Namespace == meta.pod.Namespace {
selector := CreateSelectorFromLabels(meta.pod.Labels)
if selector.Matches(labels.Set(addedPod.Labels)) {
meta.serviceAffinityMatchingPodList = append(meta.serviceAffinityMatchingPodList,
addedPod)
}
}
return nil
}
// ShallowCopy copies a metadata struct into a new struct and creates a copy of
// its maps and slices, but it does not copy the contents of pointer values.
func (meta *predicateMetadata) ShallowCopy() PredicateMetadata {
newPredMeta := &predicateMetadata{
pod: meta.pod,
podBestEffort: meta.podBestEffort,
podRequest: meta.podRequest,
serviceAffinityInUse: meta.serviceAffinityInUse,
ignoredExtendedResources: meta.ignoredExtendedResources,
}
newPredMeta.podPorts = append([]*v1.ContainerPort(nil), meta.podPorts...)
newPredMeta.topologyPairsPotentialAffinityPods = newTopologyPairsMaps()
newPredMeta.topologyPairsPotentialAffinityPods.appendMaps(meta.topologyPairsPotentialAffinityPods)
newPredMeta.topologyPairsPotentialAntiAffinityPods = newTopologyPairsMaps()
newPredMeta.topologyPairsPotentialAntiAffinityPods.appendMaps(meta.topologyPairsPotentialAntiAffinityPods)
newPredMeta.topologyPairsAntiAffinityPodsMap = newTopologyPairsMaps()
newPredMeta.topologyPairsAntiAffinityPodsMap.appendMaps(meta.topologyPairsAntiAffinityPodsMap)
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
meta.serviceAffinityMatchingPodServices...)
newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
meta.serviceAffinityMatchingPodList...)
return (PredicateMetadata)(newPredMeta)
}
type affinityTermProperties struct {
namespaces sets.String
selector labels.Selector
}
// getAffinityTermProperties receives a Pod and affinity terms and returns the namespaces and
// selectors of the terms.
func getAffinityTermProperties(pod *v1.Pod, terms []v1.PodAffinityTerm) (properties []*affinityTermProperties, err error) {
if terms == nil {
return properties, nil
}
for _, term := range terms {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
return nil, err
}
properties = append(properties, &affinityTermProperties{namespaces: namespaces, selector: selector})
}
return properties, nil
}
// podMatchesAllAffinityTermProperties returns true IFF the given pod matches all the given properties.
func podMatchesAllAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
if len(properties) == 0 {
return false
}
for _, property := range properties {
if !priorityutil.PodMatchesTermsNamespaceAndSelector(pod, property.namespaces, property.selector) {
return false
}
}
return true
}
// podMatchesAnyAffinityTermProperties returns true if the given pod matches any given property.
func podMatchesAnyAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
if len(properties) == 0 {
return false
}
for _, property := range properties {
if priorityutil.PodMatchesTermsNamespaceAndSelector(pod, property.namespaces, property.selector) {
return true
}
}
return false
}
// getTPMapMatchingExistingAntiAffinity calculates the following for each existing pod on each node:
// (1) Whether it has PodAntiAffinity
// (2) Whether any AffinityTerm matches the incoming pod
func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulernodeinfo.NodeInfo) (*topologyPairsMaps, error) {
allNodeNames := make([]string, 0, len(nodeInfoMap))
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
}
var lock sync.Mutex
var firstError error
topologyMaps := newTopologyPairsMaps()
appendTopologyPairsMaps := func(toAppend *topologyPairsMaps) {
lock.Lock()
defer lock.Unlock()
topologyMaps.appendMaps(toAppend)
}
catchError := func(err error) {
lock.Lock()
defer lock.Unlock()
if firstError == nil {
firstError = err
}
}
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
node := nodeInfo.Node()
if node == nil {
catchError(fmt.Errorf("node not found"))
return
}
for _, existingPod := range nodeInfo.PodsWithAffinity() {
existingPodTopologyMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, existingPod, node)
if err != nil {
catchError(err)
return
}
appendTopologyPairsMaps(existingPodTopologyMaps)
}
}
workqueue.ParallelizeUntil(context.TODO(), 16, len(allNodeNames), processNode)
return topologyMaps, firstError
}
// getTPMapMatchingIncomingAffinityAntiAffinity finds existing Pods that match affinity terms of the given "pod".
// It returns a topologyPairsMaps that are checked later by the affinity
// predicate. With this topologyPairsMaps available, the affinity predicate does not
// need to check all the pods in the cluster.
func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulernodeinfo.NodeInfo) (topologyPairsAffinityPodsMaps *topologyPairsMaps, topologyPairsAntiAffinityPodsMaps *topologyPairsMaps, err error) {
affinity := pod.Spec.Affinity
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return newTopologyPairsMaps(), newTopologyPairsMaps(), nil
}
allNodeNames := make([]string, 0, len(nodeInfoMap))
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
}
var lock sync.Mutex
var firstError error
topologyPairsAffinityPodsMaps = newTopologyPairsMaps()
topologyPairsAntiAffinityPodsMaps = newTopologyPairsMaps()
appendResult := func(nodeName string, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps *topologyPairsMaps) {
lock.Lock()
defer lock.Unlock()
if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 {
topologyPairsAffinityPodsMaps.appendMaps(nodeTopologyPairsAffinityPodsMaps)
}
if len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
topologyPairsAntiAffinityPodsMaps.appendMaps(nodeTopologyPairsAntiAffinityPodsMaps)
}
}
catchError := func(err error) {
lock.Lock()
defer lock.Unlock()
if firstError == nil {
firstError = err
}
}
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
affinityProperties, err := getAffinityTermProperties(pod, affinityTerms)
if err != nil {
return nil, nil, err
}
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
node := nodeInfo.Node()
if node == nil {
catchError(fmt.Errorf("nodeInfo.Node is nil"))
return
}
nodeTopologyPairsAffinityPodsMaps := newTopologyPairsMaps()
nodeTopologyPairsAntiAffinityPodsMaps := newTopologyPairsMaps()
for _, existingPod := range nodeInfo.Pods() {
// Check affinity properties.
if podMatchesAllAffinityTermProperties(existingPod, affinityProperties) {
for _, term := range affinityTerms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
nodeTopologyPairsAffinityPodsMaps.addTopologyPair(pair, existingPod)
}
}
}
// Check anti-affinity properties.
for _, term := range antiAffinityTerms {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
catchError(err)
return
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
nodeTopologyPairsAntiAffinityPodsMaps.addTopologyPair(pair, existingPod)
}
}
}
}
if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 || len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
appendResult(node.Name, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps)
}
}
workqueue.ParallelizeUntil(context.TODO(), 16, len(allNodeNames), processNode)
return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, firstError
}
// targetPodMatchesAffinityOfPod returns true if "targetPod" matches ALL affinity terms of
// "pod". This function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAffinityOfPod(pod, targetPod *v1.Pod) bool {
affinity := pod.Spec.Affinity
if affinity == nil || affinity.PodAffinity == nil {
return false
}
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
if err != nil {
klog.Errorf("error in getting affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAllAffinityTermProperties(targetPod, affinityProperties)
}
// targetPodMatchesAntiAffinityOfPod returns true if "targetPod" matches ANY anti-affinity
// term of "pod". This function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAntiAffinityOfPod(pod, targetPod *v1.Pod) bool {
affinity := pod.Spec.Affinity
if affinity == nil || affinity.PodAntiAffinity == nil {
return false
}
properties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
if err != nil {
klog.Errorf("error in getting anti-affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAnyAffinityTermProperties(targetPod, properties)
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,85 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
)
// FakePersistentVolumeClaimInfo declares a []v1.PersistentVolumeClaim type for testing.
type FakePersistentVolumeClaimInfo []v1.PersistentVolumeClaim
// GetPersistentVolumeClaimInfo gets PVC matching the namespace and PVC ID.
func (pvcs FakePersistentVolumeClaimInfo) GetPersistentVolumeClaimInfo(namespace string, pvcID string) (*v1.PersistentVolumeClaim, error) {
for _, pvc := range pvcs {
if pvc.Name == pvcID && pvc.Namespace == namespace {
return &pvc, nil
}
}
return nil, fmt.Errorf("Unable to find persistent volume claim: %s/%s", namespace, pvcID)
}
// FakeNodeInfo declares a v1.Node type for testing.
type FakeNodeInfo v1.Node
// GetNodeInfo return a fake node info object.
func (n FakeNodeInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
node := v1.Node(n)
return &node, nil
}
// FakeNodeListInfo declares a []v1.Node type for testing.
type FakeNodeListInfo []v1.Node
// GetNodeInfo returns a fake node object in the fake nodes.
func (nodes FakeNodeListInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
for _, node := range nodes {
if node.Name == nodeName {
return &node, nil
}
}
return nil, fmt.Errorf("Unable to find node: %s", nodeName)
}
// FakePersistentVolumeInfo declares a []v1.PersistentVolume type for testing.
type FakePersistentVolumeInfo []v1.PersistentVolume
// GetPersistentVolumeInfo returns a fake PV object in the fake PVs by PV ID.
func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.PersistentVolume, error) {
for _, pv := range pvs {
if pv.Name == pvID {
return &pv, nil
}
}
return nil, fmt.Errorf("Unable to find persistent volume: %s", pvID)
}
// FakeStorageClassInfo declares a []storagev1.StorageClass type for testing.
type FakeStorageClassInfo []storagev1.StorageClass
// GetStorageClassInfo returns a fake storage class object in the fake storage classes by name.
func (classes FakeStorageClassInfo) GetStorageClassInfo(name string) (*storagev1.StorageClass, error) {
for _, sc := range classes {
if sc.Name == name {
return &sc, nil
}
}
return nil, fmt.Errorf("Unable to find storage class: %s", name)
}

View File

@ -0,0 +1,79 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// FindLabelsInSet gets as many key/value pairs as possible out of a label set.
func FindLabelsInSet(labelsToKeep []string, selector labels.Set) map[string]string {
aL := make(map[string]string)
for _, l := range labelsToKeep {
if selector.Has(l) {
aL[l] = selector.Get(l)
}
}
return aL
}
// AddUnsetLabelsToMap backfills missing values with values we find in a map.
func AddUnsetLabelsToMap(aL map[string]string, labelsToAdd []string, labelSet labels.Set) {
for _, l := range labelsToAdd {
// if the label is already there, dont overwrite it.
if _, exists := aL[l]; exists {
continue
}
// otherwise, backfill this label.
if labelSet.Has(l) {
aL[l] = labelSet.Get(l)
}
}
}
// FilterPodsByNamespace filters pods outside a namespace from the given list.
func FilterPodsByNamespace(pods []*v1.Pod, ns string) []*v1.Pod {
filtered := []*v1.Pod{}
for _, nsPod := range pods {
if nsPod.Namespace == ns {
filtered = append(filtered, nsPod)
}
}
return filtered
}
// CreateSelectorFromLabels is used to define a selector that corresponds to the keys in a map.
func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
if aL == nil || len(aL) == 0 {
return labels.Everything()
}
return labels.Set(aL).AsSelector()
}
// portsConflict check whether existingPorts and wantPorts conflict with each other
// return true if we have a conflict
func portsConflict(existingPorts schedulernodeinfo.HostPortInfo, wantPorts []*v1.ContainerPort) bool {
for _, cp := range wantPorts {
if existingPorts.CheckConflict(cp.HostIP, string(cp.Protocol), cp.HostPort) {
return true
}
}
return false
}

View File

@ -0,0 +1,52 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import "k8s.io/api/core/v1"
// For each of these resources, a pod that doesn't request the resource explicitly
// will be treated as having requested the amount indicated below, for the purpose
// of computing priority only. This ensures that when scheduling zero-request pods, such
// pods will not all be scheduled to the machine with the smallest in-use request,
// and that when scheduling regular pods, such pods will not see zero-request pods as
// consuming no resources whatsoever. We chose these values to be similar to the
// resources that we give to cluster addon pods (#10653). But they are pretty arbitrary.
// As described in #11713, we use request instead of limit to deal with resource requirements.
// DefaultMilliCPURequest defines default milli cpu request number.
const DefaultMilliCPURequest int64 = 100 // 0.1 core
// DefaultMemoryRequest defines default memory request size.
const DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
// GetNonzeroRequests returns the default resource request if none is found or
// what is provided on the request.
func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
var outMilliCPU, outMemory int64
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceCPU]; !found {
outMilliCPU = DefaultMilliCPURequest
} else {
outMilliCPU = requests.Cpu().MilliValue()
}
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceMemory]; !found {
outMemory = DefaultMemoryRequest
} else {
outMemory = requests.Memory().Value()
}
return outMilliCPU, outMemory
}

View File

@ -0,0 +1,81 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
)
// GetNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.String {
names := sets.String{}
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
} else {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}
// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.String, selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}
if !selector.Matches(labels.Set(pod.Labels)) {
return false
}
return true
}
// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key.
// Returns false if topologyKey is empty.
func NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool {
if len(topologyKey) == 0 {
return false
}
if nodeA.Labels == nil || nodeB.Labels == nil {
return false
}
nodeALabel, okA := nodeA.Labels[topologyKey]
nodeBLabel, okB := nodeB.Labels[topologyKey]
// If found label in both nodes, check the label
if okB && okA {
return nodeALabel == nodeBLabel
}
return false
}
// Topologies contains topologies information of nodes.
type Topologies struct {
DefaultKeys []string
}
// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key.
func (tps *Topologies) NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool {
return NodesHaveSameTopologyKey(nodeA, nodeB, topologyKey)
}

View File

@ -0,0 +1,74 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// SchedulerExtender is an interface for external processes to influence scheduling
// decisions made by Kubernetes. This is typically needed for resources not directly
// managed by Kubernetes.
type SchedulerExtender interface {
// Name returns a unique name that identifies the extender.
Name() string
// Filter based on extender-implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
Filter(pod *v1.Pod,
nodes []*v1.Node, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo,
) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
// Prioritize based on extender-implemented priority functions. The returned scores & weight
// are used to compute the weighted score for an extender. The weighted scores are added to
// the scores computed by Kubernetes scheduler. The total scores are used to do the host selection.
Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *schedulerapi.HostPriorityList, weight int, err error)
// Bind delegates the action of binding a pod to a node to the extender.
Bind(binding *v1.Binding) error
// IsBinder returns whether this extender is configured for the Bind method.
IsBinder() bool
// IsInterested returns true if at least one extended resource requested by
// this pod is managed by this extender.
IsInterested(pod *v1.Pod) bool
// ProcessPreemption returns nodes with their victim pods processed by extender based on
// given:
// 1. Pod to schedule
// 2. Candidate nodes and victim pods (nodeToVictims) generated by previous scheduling process.
// 3. nodeNameToInfo to restore v1.Node from node name if extender cache is enabled.
// The possible changes made by extender may include:
// 1. Subset of given candidate nodes after preemption phase of extender.
// 2. A different set of victim pod for every given candidate node after preemption phase of extender.
ProcessPreemption(
pod *v1.Pod,
nodeToVictims map[*v1.Node]*schedulerapi.Victims,
nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo,
) (map[*v1.Node]*schedulerapi.Victims, error)
// SupportsPreemption returns if the scheduler extender support preemption or not.
SupportsPreemption() bool
// IsIgnorable returns true indicates scheduling should not fail when this extender
// is unavailable. This gives scheduler ability to fail fast and tolerate non-critical extenders as well.
IsIgnorable() bool
}

View File

@ -0,0 +1,120 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
policyv1beta1 "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/labels"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
)
// NodeFieldSelectorKeys is a map that: the key are node field selector keys; the values are
// the functions to get the value of the node field.
var NodeFieldSelectorKeys = map[string]func(*v1.Node) string{
schedulerapi.NodeFieldSelectorKeyNodeName: func(n *v1.Node) string { return n.Name },
}
// NodeLister interface represents anything that can list nodes for a scheduler.
type NodeLister interface {
// We explicitly return []*v1.Node, instead of v1.NodeList, to avoid
// performing expensive copies that are unneeded.
List() ([]*v1.Node, error)
}
// PodFilter is a function to filter a pod. If pod passed return true else return false.
type PodFilter func(*v1.Pod) bool
// PodLister interface represents anything that can list pods for a scheduler.
type PodLister interface {
// We explicitly return []*v1.Pod, instead of v1.PodList, to avoid
// performing expensive copies that are unneeded.
List(labels.Selector) ([]*v1.Pod, error)
// This is similar to "List()", but the returned slice does not
// contain pods that don't pass `podFilter`.
FilteredList(podFilter PodFilter, selector labels.Selector) ([]*v1.Pod, error)
}
// ServiceLister interface represents anything that can produce a list of services; the list is consumed by a scheduler.
type ServiceLister interface {
// Lists all the services
List(labels.Selector) ([]*v1.Service, error)
// Gets the services for the given pod
GetPodServices(*v1.Pod) ([]*v1.Service, error)
}
// ControllerLister interface represents anything that can produce a list of ReplicationController; the list is consumed by a scheduler.
type ControllerLister interface {
// Lists all the replication controllers
List(labels.Selector) ([]*v1.ReplicationController, error)
// Gets the services for the given pod
GetPodControllers(*v1.Pod) ([]*v1.ReplicationController, error)
}
// ReplicaSetLister interface represents anything that can produce a list of ReplicaSet; the list is consumed by a scheduler.
type ReplicaSetLister interface {
// Gets the replicasets for the given pod
GetPodReplicaSets(*v1.Pod) ([]*apps.ReplicaSet, error)
}
// PDBLister interface represents anything that can list PodDisruptionBudget objects.
type PDBLister interface {
// List() returns a list of PodDisruptionBudgets matching the selector.
List(labels.Selector) ([]*policyv1beta1.PodDisruptionBudget, error)
}
var _ ControllerLister = &EmptyControllerLister{}
// EmptyControllerLister implements ControllerLister on []v1.ReplicationController returning empty data
type EmptyControllerLister struct{}
// List returns nil
func (f EmptyControllerLister) List(labels.Selector) ([]*v1.ReplicationController, error) {
return nil, nil
}
// GetPodControllers returns nil
func (f EmptyControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1.ReplicationController, err error) {
return nil, nil
}
var _ ReplicaSetLister = &EmptyReplicaSetLister{}
// EmptyReplicaSetLister implements ReplicaSetLister on []extensions.ReplicaSet returning empty data
type EmptyReplicaSetLister struct{}
// GetPodReplicaSets returns nil
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*apps.ReplicaSet, err error) {
return nil, nil
}
// StatefulSetLister interface represents anything that can produce a list of StatefulSet; the list is consumed by a scheduler.
type StatefulSetLister interface {
// Gets the StatefulSet for the given pod.
GetPodStatefulSets(*v1.Pod) ([]*apps.StatefulSet, error)
}
var _ StatefulSetLister = &EmptyStatefulSetLister{}
// EmptyStatefulSetLister implements StatefulSetLister on []apps.StatefulSet returning empty data.
type EmptyStatefulSetLister struct{}
// GetPodStatefulSets of EmptyStatefulSetLister returns nil.
func (f EmptyStatefulSetLister) GetPodStatefulSets(pod *v1.Pod) (sss []*apps.StatefulSet, err error) {
return nil, nil
}

20
vendor/k8s.io/kubernetes/pkg/scheduler/api/doc.go generated vendored Normal file
View File

@ -0,0 +1,20 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// +k8s:deepcopy-gen=package
// Package api contains scheduler API objects.
package api // import "k8s.io/kubernetes/pkg/scheduler/api"

55
vendor/k8s.io/kubernetes/pkg/scheduler/api/register.go generated vendored Normal file
View File

@ -0,0 +1,55 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
)
// Scheme is the default instance of runtime.Scheme to which types in the Kubernetes API are already registered.
// TODO: remove this, scheduler should not have its own scheme.
var Scheme = runtime.NewScheme()
// SchemeGroupVersion is group version used to register these objects
// TODO this should be in the "scheduler" group
var SchemeGroupVersion = schema.GroupVersion{Group: "", Version: runtime.APIVersionInternal}
var (
// SchemeBuilder defines a SchemeBuilder object.
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
// AddToScheme is used to add stored functions to scheme.
AddToScheme = SchemeBuilder.AddToScheme
)
func init() {
if err := addKnownTypes(Scheme); err != nil {
// Programmer error.
panic(err)
}
}
func addKnownTypes(scheme *runtime.Scheme) error {
if err := scheme.AddIgnoredConversionType(&metav1.TypeMeta{}, &metav1.TypeMeta{}); err != nil {
return err
}
scheme.AddKnownTypes(SchemeGroupVersion,
&Policy{},
)
return nil
}

354
vendor/k8s.io/kubernetes/pkg/scheduler/api/types.go generated vendored Normal file
View File

@ -0,0 +1,354 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
)
const (
// MaxUint defines the max unsigned int value.
MaxUint = ^uint(0)
// MaxInt defines the max signed int value.
MaxInt = int(MaxUint >> 1)
// MaxTotalPriority defines the max total priority value.
MaxTotalPriority = MaxInt
// MaxPriority defines the max priority value.
MaxPriority = 10
// MaxWeight defines the max weight value.
MaxWeight = MaxInt / MaxPriority
// DefaultPercentageOfNodesToScore defines the percentage of nodes of all nodes
// that once found feasible, the scheduler stops looking for more nodes.
DefaultPercentageOfNodesToScore = 50
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// Policy describes a struct of a policy resource in api.
type Policy struct {
metav1.TypeMeta
// Holds the information to configure the fit predicate functions.
// If unspecified, the default predicate functions will be applied.
// If empty list, all predicates (except the mandatory ones) will be
// bypassed.
Predicates []PredicatePolicy
// Holds the information to configure the priority functions.
// If unspecified, the default priority functions will be applied.
// If empty list, all priority functions will be bypassed.
Priorities []PriorityPolicy
// Holds the information to communicate with the extender(s)
ExtenderConfigs []ExtenderConfig
// RequiredDuringScheduling affinity is not symmetric, but there is an implicit PreferredDuringScheduling affinity rule
// corresponding to every RequiredDuringScheduling affinity rule.
// HardPodAffinitySymmetricWeight represents the weight of implicit PreferredDuringScheduling affinity rule, in the range 1-100.
HardPodAffinitySymmetricWeight int32
// When AlwaysCheckAllPredicates is set to true, scheduler checks all
// the configured predicates even after one or more of them fails.
// When the flag is set to false, scheduler skips checking the rest
// of the predicates after it finds one predicate that failed.
AlwaysCheckAllPredicates bool
}
// PredicatePolicy describes a struct of a predicate policy.
type PredicatePolicy struct {
// Identifier of the predicate policy
// For a custom predicate, the name can be user-defined
// For the Kubernetes provided predicates, the name is the identifier of the pre-defined predicate
Name string
// Holds the parameters to configure the given predicate
Argument *PredicateArgument
}
// PriorityPolicy describes a struct of a priority policy.
type PriorityPolicy struct {
// Identifier of the priority policy
// For a custom priority, the name can be user-defined
// For the Kubernetes provided priority functions, the name is the identifier of the pre-defined priority function
Name string
// The numeric multiplier for the node scores that the priority function generates
// The weight should be a positive integer
Weight int
// Holds the parameters to configure the given priority function
Argument *PriorityArgument
}
// PredicateArgument represents the arguments to configure predicate functions in scheduler policy configuration.
// Only one of its members may be specified
type PredicateArgument struct {
// The predicate that provides affinity for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAffinity *ServiceAffinity
// The predicate that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelsPresence *LabelsPresence
}
// PriorityArgument represents the arguments to configure priority functions in scheduler policy configuration.
// Only one of its members may be specified
type PriorityArgument struct {
// The priority function that ensures a good spread (anti-affinity) for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAntiAffinity *ServiceAntiAffinity
// The priority function that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelPreference *LabelPreference
// The RequestedToCapacityRatio priority function is parametrized with function shape.
RequestedToCapacityRatioArguments *RequestedToCapacityRatioArguments
}
// ServiceAffinity holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type ServiceAffinity struct {
// The list of labels that identify node "groups"
// All of the labels should match for the node to be considered a fit for hosting the pod
Labels []string
}
// LabelsPresence holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type LabelsPresence struct {
// The list of labels that identify node "groups"
// All of the labels should be either present (or absent) for the node to be considered a fit for hosting the pod
Labels []string
// The boolean flag that indicates whether the labels should be present or absent from the node
Presence bool
}
// ServiceAntiAffinity holds the parameters that are used to configure the corresponding priority function
type ServiceAntiAffinity struct {
// Used to identify node "groups"
Label string
}
// LabelPreference holds the parameters that are used to configure the corresponding priority function
type LabelPreference struct {
// Used to identify node "groups"
Label string
// This is a boolean flag
// If true, higher priority is given to nodes that have the label
// If false, higher priority is given to nodes that do not have the label
Presence bool
}
// RequestedToCapacityRatioArguments holds arguments specific to RequestedToCapacityRatio priority function
type RequestedToCapacityRatioArguments struct {
// Array of point defining priority function shape
UtilizationShape []UtilizationShapePoint
}
// UtilizationShapePoint represents single point of priority function shape
type UtilizationShapePoint struct {
// Utilization (x axis). Valid values are 0 to 100. Fully utilized node maps to 100.
Utilization int
// Score assigned to given utilization (y axis). Valid values are 0 to 10.
Score int
}
// ExtenderManagedResource describes the arguments of extended resources
// managed by an extender.
type ExtenderManagedResource struct {
// Name is the extended resource name.
Name v1.ResourceName
// IgnoredByScheduler indicates whether kube-scheduler should ignore this
// resource when applying predicates.
IgnoredByScheduler bool
}
// ExtenderTLSConfig contains settings to enable TLS with extender
type ExtenderTLSConfig struct {
// Server should be accessed without verifying the TLS certificate. For testing only.
Insecure bool
// ServerName is passed to the server for SNI and is used in the client to check server
// certificates against. If ServerName is empty, the hostname used to contact the
// server is used.
ServerName string
// Server requires TLS client certificate authentication
CertFile string
// Server requires TLS client certificate authentication
KeyFile string
// Trusted root certificates for server
CAFile string
// CertData holds PEM-encoded bytes (typically read from a client certificate file).
// CertData takes precedence over CertFile
CertData []byte
// KeyData holds PEM-encoded bytes (typically read from a client certificate key file).
// KeyData takes precedence over KeyFile
KeyData []byte
// CAData holds PEM-encoded bytes (typically read from a root certificates bundle).
// CAData takes precedence over CAFile
CAData []byte
}
// ExtenderConfig holds the parameters used to communicate with the extender. If a verb is unspecified/empty,
// it is assumed that the extender chose not to provide that extension.
type ExtenderConfig struct {
// URLPrefix at which the extender is available
URLPrefix string
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
FilterVerb string
// Verb for the preempt call, empty if not supported. This verb is appended to the URLPrefix when issuing the preempt call to extender.
PreemptVerb string
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
PrioritizeVerb string
// The numeric multiplier for the node scores that the prioritize call generates.
// The weight should be a positive integer
Weight int
// Verb for the bind call, empty if not supported. This verb is appended to the URLPrefix when issuing the bind call to extender.
// If this method is implemented by the extender, it is the extender's responsibility to bind the pod to apiserver. Only one extender
// can implement this function.
BindVerb string
// EnableHTTPS specifies whether https should be used to communicate with the extender
EnableHTTPS bool
// TLSConfig specifies the transport layer security config
TLSConfig *ExtenderTLSConfig
// HTTPTimeout specifies the timeout duration for a call to the extender. Filter timeout fails the scheduling of the pod. Prioritize
// timeout is ignored, k8s/other extenders priorities are used to select the node.
HTTPTimeout time.Duration
// NodeCacheCapable specifies that the extender is capable of caching node information,
// so the scheduler should only send minimal information about the eligible nodes
// assuming that the extender already cached full details of all nodes in the cluster
NodeCacheCapable bool
// ManagedResources is a list of extended resources that are managed by
// this extender.
// - A pod will be sent to the extender on the Filter, Prioritize and Bind
// (if the extender is the binder) phases iff the pod requests at least
// one of the extended resources in this list. If empty or unspecified,
// all pods will be sent to this extender.
// - If IgnoredByScheduler is set to true for a resource, kube-scheduler
// will skip checking the resource in predicates.
// +optional
ManagedResources []ExtenderManagedResource
// Ignorable specifies if the extender is ignorable, i.e. scheduling should not
// fail when the extender returns an error or is not reachable.
Ignorable bool
}
// ExtenderPreemptionResult represents the result returned by preemption phase of extender.
type ExtenderPreemptionResult struct {
NodeNameToMetaVictims map[string]*MetaVictims
}
// ExtenderPreemptionArgs represents the arguments needed by the extender to preempt pods on nodes.
type ExtenderPreemptionArgs struct {
// Pod being scheduled
Pod *v1.Pod
// Victims map generated by scheduler preemption phase
// Only set NodeNameToMetaVictims if ExtenderConfig.NodeCacheCapable == true. Otherwise, only set NodeNameToVictims.
NodeNameToVictims map[string]*Victims
NodeNameToMetaVictims map[string]*MetaVictims
}
// Victims represents:
// pods: a group of pods expected to be preempted.
// numPDBViolations: the count of violations of PodDisruptionBudget
type Victims struct {
Pods []*v1.Pod
NumPDBViolations int
}
// MetaPod represent identifier for a v1.Pod
type MetaPod struct {
UID string
}
// MetaVictims represents:
// pods: a group of pods expected to be preempted.
// Only Pod identifiers will be sent and user are expect to get v1.Pod in their own way.
// numPDBViolations: the count of violations of PodDisruptionBudget
type MetaVictims struct {
Pods []*MetaPod
NumPDBViolations int
}
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
// nodes for a pod.
type ExtenderArgs struct {
// Pod being scheduled
Pod *v1.Pod
// List of candidate nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
// List of candidate node names where the pod can be scheduled; to be
// populated only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string
}
// FailedNodesMap represents the filtered out nodes, with node names and failure messages
type FailedNodesMap map[string]string
// ExtenderFilterResult represents the results of a filter call to an extender
type ExtenderFilterResult struct {
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string
// Filtered out nodes where the pod can't be scheduled and the failure messages
FailedNodes FailedNodesMap
// Error message indicating failure
Error string
}
// ExtenderBindingArgs represents the arguments to an extender for binding a pod to a node.
type ExtenderBindingArgs struct {
// PodName is the name of the pod being bound
PodName string
// PodNamespace is the namespace of the pod being bound
PodNamespace string
// PodUID is the UID of the pod being bound
PodUID types.UID
// Node selected by the scheduler
Node string
}
// ExtenderBindingResult represents the result of binding of a pod to a node from an extender.
type ExtenderBindingResult struct {
// Error message indicating failure
Error string
}
// HostPriority represents the priority of scheduling to a particular host, higher priority is better.
type HostPriority struct {
// Name of the host
Host string
// Score associated with the host
Score int
}
// HostPriorityList declares a []HostPriority type.
type HostPriorityList []HostPriority
func (h HostPriorityList) Len() int {
return len(h)
}
func (h HostPriorityList) Less(i, j int) bool {
if h[i].Score == h[j].Score {
return h[i].Host < h[j].Host
}
return h[i].Score < h[j].Score
}
func (h HostPriorityList) Swap(i, j int) {
h[i], h[j] = h[j], h[i]
}

View File

@ -0,0 +1,72 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
api "k8s.io/kubernetes/pkg/apis/core"
)
const (
// TaintNodeNotReady will be added when node is not ready
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes ready.
TaintNodeNotReady = "node.kubernetes.io/not-ready"
// TaintNodeUnreachable will be added when node becomes unreachable
// (corresponding to NodeReady status ConditionUnknown)
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes reachable (NodeReady status ConditionTrue).
TaintNodeUnreachable = "node.kubernetes.io/unreachable"
// TaintNodeUnschedulable will be added when node becomes unschedulable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node becomes scheduable.
TaintNodeUnschedulable = "node.kubernetes.io/unschedulable"
// TaintNodeMemoryPressure will be added when node has memory pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough memory.
TaintNodeMemoryPressure = "node.kubernetes.io/memory-pressure"
// TaintNodeDiskPressure will be added when node has disk pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodeDiskPressure = "node.kubernetes.io/disk-pressure"
// TaintNodeNetworkUnavailable will be added when node's network is unavailable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when network becomes ready.
TaintNodeNetworkUnavailable = "node.kubernetes.io/network-unavailable"
// TaintNodePIDPressure will be added when node has pid pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodePIDPressure = "node.kubernetes.io/pid-pressure"
// TaintExternalCloudProvider sets this taint on a node to mark it as unusable,
// when kubelet is started with the "external" cloud provider, until a controller
// from the cloud-controller-manager intitializes this node, and then removes
// the taint
TaintExternalCloudProvider = "node.cloudprovider.kubernetes.io/uninitialized"
// TaintNodeShutdown when node is shutdown in external cloud provider
TaintNodeShutdown = "node.cloudprovider.kubernetes.io/shutdown"
// NodeFieldSelectorKeyNodeName ('metadata.name') uses this as node field selector key
// when selecting node by node's name.
NodeFieldSelectorKeyNodeName = api.ObjectNameField
)

View File

@ -0,0 +1,669 @@
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package api
import (
v1 "k8s.io/api/core/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderArgs) DeepCopyInto(out *ExtenderArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
*out = new(v1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderArgs.
func (in *ExtenderArgs) DeepCopy() *ExtenderArgs {
if in == nil {
return nil
}
out := new(ExtenderArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingArgs) DeepCopyInto(out *ExtenderBindingArgs) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingArgs.
func (in *ExtenderBindingArgs) DeepCopy() *ExtenderBindingArgs {
if in == nil {
return nil
}
out := new(ExtenderBindingArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingResult) DeepCopyInto(out *ExtenderBindingResult) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingResult.
func (in *ExtenderBindingResult) DeepCopy() *ExtenderBindingResult {
if in == nil {
return nil
}
out := new(ExtenderBindingResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderConfig) DeepCopyInto(out *ExtenderConfig) {
*out = *in
if in.TLSConfig != nil {
in, out := &in.TLSConfig, &out.TLSConfig
*out = new(ExtenderTLSConfig)
(*in).DeepCopyInto(*out)
}
if in.ManagedResources != nil {
in, out := &in.ManagedResources, &out.ManagedResources
*out = make([]ExtenderManagedResource, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderConfig.
func (in *ExtenderConfig) DeepCopy() *ExtenderConfig {
if in == nil {
return nil
}
out := new(ExtenderConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderFilterResult) DeepCopyInto(out *ExtenderFilterResult) {
*out = *in
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
*out = new(v1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
if in.FailedNodes != nil {
in, out := &in.FailedNodes, &out.FailedNodes
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderFilterResult.
func (in *ExtenderFilterResult) DeepCopy() *ExtenderFilterResult {
if in == nil {
return nil
}
out := new(ExtenderFilterResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderManagedResource) DeepCopyInto(out *ExtenderManagedResource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderManagedResource.
func (in *ExtenderManagedResource) DeepCopy() *ExtenderManagedResource {
if in == nil {
return nil
}
out := new(ExtenderManagedResource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionArgs) DeepCopyInto(out *ExtenderPreemptionArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
if in.NodeNameToVictims != nil {
in, out := &in.NodeNameToVictims, &out.NodeNameToVictims
*out = make(map[string]*Victims, len(*in))
for key, val := range *in {
var outVal *Victims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(Victims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionArgs.
func (in *ExtenderPreemptionArgs) DeepCopy() *ExtenderPreemptionArgs {
if in == nil {
return nil
}
out := new(ExtenderPreemptionArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionResult) DeepCopyInto(out *ExtenderPreemptionResult) {
*out = *in
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionResult.
func (in *ExtenderPreemptionResult) DeepCopy() *ExtenderPreemptionResult {
if in == nil {
return nil
}
out := new(ExtenderPreemptionResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderTLSConfig) DeepCopyInto(out *ExtenderTLSConfig) {
*out = *in
if in.CertData != nil {
in, out := &in.CertData, &out.CertData
*out = make([]byte, len(*in))
copy(*out, *in)
}
if in.KeyData != nil {
in, out := &in.KeyData, &out.KeyData
*out = make([]byte, len(*in))
copy(*out, *in)
}
if in.CAData != nil {
in, out := &in.CAData, &out.CAData
*out = make([]byte, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderTLSConfig.
func (in *ExtenderTLSConfig) DeepCopy() *ExtenderTLSConfig {
if in == nil {
return nil
}
out := new(ExtenderTLSConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in FailedNodesMap) DeepCopyInto(out *FailedNodesMap) {
{
in := &in
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedNodesMap.
func (in FailedNodesMap) DeepCopy() FailedNodesMap {
if in == nil {
return nil
}
out := new(FailedNodesMap)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HostPriority) DeepCopyInto(out *HostPriority) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriority.
func (in *HostPriority) DeepCopy() *HostPriority {
if in == nil {
return nil
}
out := new(HostPriority)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in HostPriorityList) DeepCopyInto(out *HostPriorityList) {
{
in := &in
*out = make(HostPriorityList, len(*in))
copy(*out, *in)
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriorityList.
func (in HostPriorityList) DeepCopy() HostPriorityList {
if in == nil {
return nil
}
out := new(HostPriorityList)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelPreference) DeepCopyInto(out *LabelPreference) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelPreference.
func (in *LabelPreference) DeepCopy() *LabelPreference {
if in == nil {
return nil
}
out := new(LabelPreference)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelsPresence) DeepCopyInto(out *LabelsPresence) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelsPresence.
func (in *LabelsPresence) DeepCopy() *LabelsPresence {
if in == nil {
return nil
}
out := new(LabelsPresence)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaPod) DeepCopyInto(out *MetaPod) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaPod.
func (in *MetaPod) DeepCopy() *MetaPod {
if in == nil {
return nil
}
out := new(MetaPod)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaVictims) DeepCopyInto(out *MetaVictims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*MetaPod, len(*in))
for i := range *in {
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(MetaPod)
**out = **in
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaVictims.
func (in *MetaVictims) DeepCopy() *MetaVictims {
if in == nil {
return nil
}
out := new(MetaVictims)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Policy) DeepCopyInto(out *Policy) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Predicates != nil {
in, out := &in.Predicates, &out.Predicates
*out = make([]PredicatePolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Priorities != nil {
in, out := &in.Priorities, &out.Priorities
*out = make([]PriorityPolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.ExtenderConfigs != nil {
in, out := &in.ExtenderConfigs, &out.ExtenderConfigs
*out = make([]ExtenderConfig, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Policy.
func (in *Policy) DeepCopy() *Policy {
if in == nil {
return nil
}
out := new(Policy)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *Policy) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicateArgument) DeepCopyInto(out *PredicateArgument) {
*out = *in
if in.ServiceAffinity != nil {
in, out := &in.ServiceAffinity, &out.ServiceAffinity
*out = new(ServiceAffinity)
(*in).DeepCopyInto(*out)
}
if in.LabelsPresence != nil {
in, out := &in.LabelsPresence, &out.LabelsPresence
*out = new(LabelsPresence)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicateArgument.
func (in *PredicateArgument) DeepCopy() *PredicateArgument {
if in == nil {
return nil
}
out := new(PredicateArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicatePolicy) DeepCopyInto(out *PredicatePolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
*out = new(PredicateArgument)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicatePolicy.
func (in *PredicatePolicy) DeepCopy() *PredicatePolicy {
if in == nil {
return nil
}
out := new(PredicatePolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityArgument) DeepCopyInto(out *PriorityArgument) {
*out = *in
if in.ServiceAntiAffinity != nil {
in, out := &in.ServiceAntiAffinity, &out.ServiceAntiAffinity
*out = new(ServiceAntiAffinity)
**out = **in
}
if in.LabelPreference != nil {
in, out := &in.LabelPreference, &out.LabelPreference
*out = new(LabelPreference)
**out = **in
}
if in.RequestedToCapacityRatioArguments != nil {
in, out := &in.RequestedToCapacityRatioArguments, &out.RequestedToCapacityRatioArguments
*out = new(RequestedToCapacityRatioArguments)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityArgument.
func (in *PriorityArgument) DeepCopy() *PriorityArgument {
if in == nil {
return nil
}
out := new(PriorityArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityPolicy) DeepCopyInto(out *PriorityPolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
*out = new(PriorityArgument)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityPolicy.
func (in *PriorityPolicy) DeepCopy() *PriorityPolicy {
if in == nil {
return nil
}
out := new(PriorityPolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RequestedToCapacityRatioArguments) DeepCopyInto(out *RequestedToCapacityRatioArguments) {
*out = *in
if in.UtilizationShape != nil {
in, out := &in.UtilizationShape, &out.UtilizationShape
*out = make([]UtilizationShapePoint, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RequestedToCapacityRatioArguments.
func (in *RequestedToCapacityRatioArguments) DeepCopy() *RequestedToCapacityRatioArguments {
if in == nil {
return nil
}
out := new(RequestedToCapacityRatioArguments)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAffinity) DeepCopyInto(out *ServiceAffinity) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAffinity.
func (in *ServiceAffinity) DeepCopy() *ServiceAffinity {
if in == nil {
return nil
}
out := new(ServiceAffinity)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAntiAffinity) DeepCopyInto(out *ServiceAntiAffinity) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAntiAffinity.
func (in *ServiceAntiAffinity) DeepCopy() *ServiceAntiAffinity {
if in == nil {
return nil
}
out := new(ServiceAntiAffinity)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *UtilizationShapePoint) DeepCopyInto(out *UtilizationShapePoint) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UtilizationShapePoint.
func (in *UtilizationShapePoint) DeepCopy() *UtilizationShapePoint {
if in == nil {
return nil
}
out := new(UtilizationShapePoint)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Victims) DeepCopyInto(out *Victims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*v1.Pod, len(*in))
for i := range *in {
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Victims.
func (in *Victims) DeepCopy() *Victims {
if in == nil {
return nil
}
out := new(Victims)
in.DeepCopyInto(out)
return out
}

View File

@ -0,0 +1,244 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
)
const (
// SchedulerSubsystem - subsystem name used by scheduler
SchedulerSubsystem = "scheduler"
// SchedulingLatencyName - scheduler latency metric name
SchedulingLatencyName = "scheduling_duration_seconds"
// DeprecatedSchedulingLatencyName - scheduler latency metric name which is deprecated
DeprecatedSchedulingLatencyName = "scheduling_latency_seconds"
// OperationLabel - operation label name
OperationLabel = "operation"
// Below are possible values for the operation label. Each represents a substep of e2e scheduling:
// PredicateEvaluation - predicate evaluation operation label value
PredicateEvaluation = "predicate_evaluation"
// PriorityEvaluation - priority evaluation operation label value
PriorityEvaluation = "priority_evaluation"
// PreemptionEvaluation - preemption evaluation operation label value (occurs in case of scheduling fitError).
PreemptionEvaluation = "preemption_evaluation"
// Binding - binding operation label value
Binding = "binding"
// E2eScheduling - e2e scheduling operation label value
)
// All the histogram based metrics have 1ms as size for the smallest bucket.
var (
scheduleAttempts = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "schedule_attempts_total",
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
}, []string{"result"})
// PodScheduleSuccesses counts how many pods were scheduled.
PodScheduleSuccesses = scheduleAttempts.With(prometheus.Labels{"result": "scheduled"})
// PodScheduleFailures counts how many pods could not be scheduled.
PodScheduleFailures = scheduleAttempts.With(prometheus.Labels{"result": "unschedulable"})
// PodScheduleErrors counts how many pods could not be scheduled due to a scheduler error.
PodScheduleErrors = scheduleAttempts.With(prometheus.Labels{"result": "error"})
SchedulingLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: SchedulerSubsystem,
Name: SchedulingLatencyName,
Help: "Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour,
},
[]string{OperationLabel},
)
DeprecatedSchedulingLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: SchedulerSubsystem,
Name: DeprecatedSchedulingLatencyName,
Help: "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour,
},
[]string{OperationLabel},
)
E2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_duration_seconds",
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedE2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_latency_microseconds",
Help: "(Deprecated) E2e scheduling latency in microseconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_duration_seconds",
Help: "Scheduling algorithm latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_latency_microseconds",
Help: "(Deprecated) Scheduling algorithm latency in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation_seconds",
Help: "Scheduling algorithm predicate evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation",
Help: "(Deprecated) Scheduling algorithm predicate evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation_seconds",
Help: "Scheduling algorithm priority evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation",
Help: "(Deprecated) Scheduling algorithm priority evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation_seconds",
Help: "Scheduling algorithm preemption evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation",
Help: "(Deprecated) Scheduling algorithm preemption evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
BindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "binding_duration_seconds",
Help: "Binding latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedBindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "binding_latency_microseconds",
Help: "(Deprecated) Binding latency in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
PreemptionVictims = prometheus.NewGauge(
prometheus.GaugeOpts{
Subsystem: SchedulerSubsystem,
Name: "pod_preemption_victims",
Help: "Number of selected preemption victims",
})
PreemptionAttempts = prometheus.NewCounter(
prometheus.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "total_preemption_attempts",
Help: "Total preemption attempts in the cluster till now",
})
metricsList = []prometheus.Collector{
scheduleAttempts,
SchedulingLatency,
DeprecatedSchedulingLatency,
E2eSchedulingLatency,
DeprecatedE2eSchedulingLatency,
SchedulingAlgorithmLatency,
DeprecatedSchedulingAlgorithmLatency,
BindingLatency,
DeprecatedBindingLatency,
SchedulingAlgorithmPredicateEvaluationDuration,
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration,
SchedulingAlgorithmPriorityEvaluationDuration,
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration,
SchedulingAlgorithmPremptionEvaluationDuration,
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration,
PreemptionVictims,
PreemptionAttempts,
}
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
for _, metric := range metricsList {
prometheus.MustRegister(metric)
}
persistentvolume.RegisterVolumeSchedulingMetrics()
})
}
// Reset resets metrics
func Reset() {
SchedulingLatency.Reset()
DeprecatedSchedulingLatency.Reset()
}
// SinceInMicroseconds gets the time since the specified start in microseconds.
func SinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
}
// SinceInSeconds gets the time since the specified start in seconds.
func SinceInSeconds(start time.Time) float64 {
return time.Since(start).Seconds()
}

View File

@ -0,0 +1,135 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeinfo
import (
"k8s.io/api/core/v1"
)
// DefaultBindAllHostIP defines the default ip address used to bind to all host.
const DefaultBindAllHostIP = "0.0.0.0"
// ProtocolPort represents a protocol port pair, e.g. tcp:80.
type ProtocolPort struct {
Protocol string
Port int32
}
// NewProtocolPort creates a ProtocolPort instance.
func NewProtocolPort(protocol string, port int32) *ProtocolPort {
pp := &ProtocolPort{
Protocol: protocol,
Port: port,
}
if len(pp.Protocol) == 0 {
pp.Protocol = string(v1.ProtocolTCP)
}
return pp
}
// HostPortInfo stores mapping from ip to a set of ProtocolPort
type HostPortInfo map[string]map[ProtocolPort]struct{}
// Add adds (ip, protocol, port) to HostPortInfo
func (h HostPortInfo) Add(ip, protocol string, port int32) {
if port <= 0 {
return
}
h.sanitize(&ip, &protocol)
pp := NewProtocolPort(protocol, port)
if _, ok := h[ip]; !ok {
h[ip] = map[ProtocolPort]struct{}{
*pp: {},
}
return
}
h[ip][*pp] = struct{}{}
}
// Remove removes (ip, protocol, port) from HostPortInfo
func (h HostPortInfo) Remove(ip, protocol string, port int32) {
if port <= 0 {
return
}
h.sanitize(&ip, &protocol)
pp := NewProtocolPort(protocol, port)
if m, ok := h[ip]; ok {
delete(m, *pp)
if len(h[ip]) == 0 {
delete(h, ip)
}
}
}
// Len returns the total number of (ip, protocol, port) tuple in HostPortInfo
func (h HostPortInfo) Len() int {
length := 0
for _, m := range h {
length += len(m)
}
return length
}
// CheckConflict checks if the input (ip, protocol, port) conflicts with the existing
// ones in HostPortInfo.
func (h HostPortInfo) CheckConflict(ip, protocol string, port int32) bool {
if port <= 0 {
return false
}
h.sanitize(&ip, &protocol)
pp := NewProtocolPort(protocol, port)
// If ip is 0.0.0.0 check all IP's (protocol, port) pair
if ip == DefaultBindAllHostIP {
for _, m := range h {
if _, ok := m[*pp]; ok {
return true
}
}
return false
}
// If ip isn't 0.0.0.0, only check IP and 0.0.0.0's (protocol, port) pair
for _, key := range []string{DefaultBindAllHostIP, ip} {
if m, ok := h[key]; ok {
if _, ok2 := m[*pp]; ok2 {
return true
}
}
}
return false
}
// sanitize the parameters
func (h HostPortInfo) sanitize(ip, protocol *string) {
if len(*ip) == 0 {
*ip = DefaultBindAllHostIP
}
if len(*protocol) == 0 {
*protocol = string(v1.ProtocolTCP)
}
}

View File

@ -0,0 +1,702 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeinfo
import (
"errors"
"fmt"
"sync"
"sync/atomic"
"k8s.io/klog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
)
var (
emptyResource = Resource{}
generation int64
)
// ImageStateSummary provides summarized information about the state of an image.
type ImageStateSummary struct {
// Size of the image
Size int64
// Used to track how many nodes have this image
NumNodes int
}
// NodeInfo is node level aggregated information.
type NodeInfo struct {
// Overall node information.
node *v1.Node
pods []*v1.Pod
podsWithAffinity []*v1.Pod
usedPorts HostPortInfo
// Total requested resource of all pods on this node.
// It includes assumed pods which scheduler sends binding to apiserver but
// didn't get it as scheduled yet.
requestedResource *Resource
nonzeroRequest *Resource
// We store allocatedResources (which is Node.Status.Allocatable.*) explicitly
// as int64, to avoid conversions and accessing map.
allocatableResource *Resource
// Cached taints of the node for faster lookup.
taints []v1.Taint
taintsErr error
// imageStates holds the entry of an image if and only if this image is on the node. The entry can be used for
// checking an image's existence and advanced usage (e.g., image locality scheduling policy) based on the image
// state information.
imageStates map[string]*ImageStateSummary
// TransientInfo holds the information pertaining to a scheduling cycle. This will be destructed at the end of
// scheduling cycle.
// TODO: @ravig. Remove this once we have a clear approach for message passing across predicates and priorities.
TransientInfo *TransientSchedulerInfo
// Cached conditions of node for faster lookup.
memoryPressureCondition v1.ConditionStatus
diskPressureCondition v1.ConditionStatus
pidPressureCondition v1.ConditionStatus
// Whenever NodeInfo changes, generation is bumped.
// This is used to avoid cloning it if the object didn't change.
generation int64
}
//initializeNodeTransientInfo initializes transient information pertaining to node.
func initializeNodeTransientInfo() nodeTransientInfo {
return nodeTransientInfo{AllocatableVolumesCount: 0, RequestedVolumes: 0}
}
// nextGeneration: Let's make sure history never forgets the name...
// Increments the generation number monotonically ensuring that generation numbers never collide.
// Collision of the generation numbers would be particularly problematic if a node was deleted and
// added back with the same name. See issue#63262.
func nextGeneration() int64 {
return atomic.AddInt64(&generation, 1)
}
// nodeTransientInfo contains transient node information while scheduling.
type nodeTransientInfo struct {
// AllocatableVolumesCount contains number of volumes that could be attached to node.
AllocatableVolumesCount int
// Requested number of volumes on a particular node.
RequestedVolumes int
}
// TransientSchedulerInfo is a transient structure which is destructed at the end of each scheduling cycle.
// It consists of items that are valid for a scheduling cycle and is used for message passing across predicates and
// priorities. Some examples which could be used as fields are number of volumes being used on node, current utilization
// on node etc.
// IMPORTANT NOTE: Make sure that each field in this structure is documented along with usage. Expand this structure
// only when absolutely needed as this data structure will be created and destroyed during every scheduling cycle.
type TransientSchedulerInfo struct {
TransientLock sync.Mutex
// NodeTransInfo holds the information related to nodeTransientInformation. NodeName is the key here.
TransNodeInfo nodeTransientInfo
}
// NewTransientSchedulerInfo returns a new scheduler transient structure with initialized values.
func NewTransientSchedulerInfo() *TransientSchedulerInfo {
tsi := &TransientSchedulerInfo{
TransNodeInfo: initializeNodeTransientInfo(),
}
return tsi
}
// ResetTransientSchedulerInfo resets the TransientSchedulerInfo.
func (transientSchedInfo *TransientSchedulerInfo) ResetTransientSchedulerInfo() {
transientSchedInfo.TransientLock.Lock()
defer transientSchedInfo.TransientLock.Unlock()
// Reset TransientNodeInfo.
transientSchedInfo.TransNodeInfo.AllocatableVolumesCount = 0
transientSchedInfo.TransNodeInfo.RequestedVolumes = 0
}
// Resource is a collection of compute resource.
type Resource struct {
MilliCPU int64
Memory int64
EphemeralStorage int64
// We store allowedPodNumber (which is Node.Status.Allocatable.Pods().Value())
// explicitly as int, to avoid conversions and improve performance.
AllowedPodNumber int
// ScalarResources
ScalarResources map[v1.ResourceName]int64
}
// NewResource creates a Resource from ResourceList
func NewResource(rl v1.ResourceList) *Resource {
r := &Resource{}
r.Add(rl)
return r
}
// Add adds ResourceList into Resource.
func (r *Resource) Add(rl v1.ResourceList) {
if r == nil {
return
}
for rName, rQuant := range rl {
switch rName {
case v1.ResourceCPU:
r.MilliCPU += rQuant.MilliValue()
case v1.ResourceMemory:
r.Memory += rQuant.Value()
case v1.ResourcePods:
r.AllowedPodNumber += int(rQuant.Value())
case v1.ResourceEphemeralStorage:
r.EphemeralStorage += rQuant.Value()
default:
if v1helper.IsScalarResourceName(rName) {
r.AddScalar(rName, rQuant.Value())
}
}
}
}
// ResourceList returns a resource list of this resource.
func (r *Resource) ResourceList() v1.ResourceList {
result := v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(r.MilliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(r.Memory, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(int64(r.AllowedPodNumber), resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(r.EphemeralStorage, resource.BinarySI),
}
for rName, rQuant := range r.ScalarResources {
if v1helper.IsHugePageResourceName(rName) {
result[rName] = *resource.NewQuantity(rQuant, resource.BinarySI)
} else {
result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI)
}
}
return result
}
// Clone returns a copy of this resource.
func (r *Resource) Clone() *Resource {
res := &Resource{
MilliCPU: r.MilliCPU,
Memory: r.Memory,
AllowedPodNumber: r.AllowedPodNumber,
EphemeralStorage: r.EphemeralStorage,
}
if r.ScalarResources != nil {
res.ScalarResources = make(map[v1.ResourceName]int64)
for k, v := range r.ScalarResources {
res.ScalarResources[k] = v
}
}
return res
}
// AddScalar adds a resource by a scalar value of this resource.
func (r *Resource) AddScalar(name v1.ResourceName, quantity int64) {
r.SetScalar(name, r.ScalarResources[name]+quantity)
}
// SetScalar sets a resource by a scalar value of this resource.
func (r *Resource) SetScalar(name v1.ResourceName, quantity int64) {
// Lazily allocate scalar resource map.
if r.ScalarResources == nil {
r.ScalarResources = map[v1.ResourceName]int64{}
}
r.ScalarResources[name] = quantity
}
// SetMaxResource compares with ResourceList and takes max value for each Resource.
func (r *Resource) SetMaxResource(rl v1.ResourceList) {
if r == nil {
return
}
for rName, rQuantity := range rl {
switch rName {
case v1.ResourceMemory:
if mem := rQuantity.Value(); mem > r.Memory {
r.Memory = mem
}
case v1.ResourceCPU:
if cpu := rQuantity.MilliValue(); cpu > r.MilliCPU {
r.MilliCPU = cpu
}
case v1.ResourceEphemeralStorage:
if ephemeralStorage := rQuantity.Value(); ephemeralStorage > r.EphemeralStorage {
r.EphemeralStorage = ephemeralStorage
}
default:
if v1helper.IsScalarResourceName(rName) {
value := rQuantity.Value()
if value > r.ScalarResources[rName] {
r.SetScalar(rName, value)
}
}
}
}
}
// NewNodeInfo returns a ready to use empty NodeInfo object.
// If any pods are given in arguments, their information will be aggregated in
// the returned object.
func NewNodeInfo(pods ...*v1.Pod) *NodeInfo {
ni := &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
allocatableResource: &Resource{},
TransientInfo: NewTransientSchedulerInfo(),
generation: nextGeneration(),
usedPorts: make(HostPortInfo),
imageStates: make(map[string]*ImageStateSummary),
}
for _, pod := range pods {
ni.AddPod(pod)
}
return ni
}
// Node returns overall information about this node.
func (n *NodeInfo) Node() *v1.Node {
if n == nil {
return nil
}
return n.node
}
// Pods return all pods scheduled (including assumed to be) on this node.
func (n *NodeInfo) Pods() []*v1.Pod {
if n == nil {
return nil
}
return n.pods
}
// SetPods sets all pods scheduled (including assumed to be) on this node.
func (n *NodeInfo) SetPods(pods []*v1.Pod) {
n.pods = pods
}
// UsedPorts returns used ports on this node.
func (n *NodeInfo) UsedPorts() HostPortInfo {
if n == nil {
return nil
}
return n.usedPorts
}
// SetUsedPorts sets the used ports on this node.
func (n *NodeInfo) SetUsedPorts(newUsedPorts HostPortInfo) {
n.usedPorts = newUsedPorts
}
// ImageStates returns the state information of all images.
func (n *NodeInfo) ImageStates() map[string]*ImageStateSummary {
if n == nil {
return nil
}
return n.imageStates
}
// SetImageStates sets the state information of all images.
func (n *NodeInfo) SetImageStates(newImageStates map[string]*ImageStateSummary) {
n.imageStates = newImageStates
}
// PodsWithAffinity return all pods with (anti)affinity constraints on this node.
func (n *NodeInfo) PodsWithAffinity() []*v1.Pod {
if n == nil {
return nil
}
return n.podsWithAffinity
}
// AllowedPodNumber returns the number of the allowed pods on this node.
func (n *NodeInfo) AllowedPodNumber() int {
if n == nil || n.allocatableResource == nil {
return 0
}
return n.allocatableResource.AllowedPodNumber
}
// Taints returns the taints list on this node.
func (n *NodeInfo) Taints() ([]v1.Taint, error) {
if n == nil {
return nil, nil
}
return n.taints, n.taintsErr
}
// SetTaints sets the taints list on this node.
func (n *NodeInfo) SetTaints(newTaints []v1.Taint) {
n.taints = newTaints
}
// MemoryPressureCondition returns the memory pressure condition status on this node.
func (n *NodeInfo) MemoryPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.memoryPressureCondition
}
// DiskPressureCondition returns the disk pressure condition status on this node.
func (n *NodeInfo) DiskPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.diskPressureCondition
}
// PIDPressureCondition returns the pid pressure condition status on this node.
func (n *NodeInfo) PIDPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.pidPressureCondition
}
// RequestedResource returns aggregated resource request of pods on this node.
func (n *NodeInfo) RequestedResource() Resource {
if n == nil {
return emptyResource
}
return *n.requestedResource
}
// SetRequestedResource sets the aggregated resource request of pods on this node.
func (n *NodeInfo) SetRequestedResource(newResource *Resource) {
n.requestedResource = newResource
}
// NonZeroRequest returns aggregated nonzero resource request of pods on this node.
func (n *NodeInfo) NonZeroRequest() Resource {
if n == nil {
return emptyResource
}
return *n.nonzeroRequest
}
// SetNonZeroRequest sets the aggregated nonzero resource request of pods on this node.
func (n *NodeInfo) SetNonZeroRequest(newResource *Resource) {
n.nonzeroRequest = newResource
}
// AllocatableResource returns allocatable resources on a given node.
func (n *NodeInfo) AllocatableResource() Resource {
if n == nil {
return emptyResource
}
return *n.allocatableResource
}
// SetAllocatableResource sets the allocatableResource information of given node.
func (n *NodeInfo) SetAllocatableResource(allocatableResource *Resource) {
n.allocatableResource = allocatableResource
n.generation = nextGeneration()
}
// GetGeneration returns the generation on this node.
func (n *NodeInfo) GetGeneration() int64 {
if n == nil {
return 0
}
return n.generation
}
// SetGeneration sets the generation on this node. This is for testing only.
func (n *NodeInfo) SetGeneration(newGeneration int64) {
n.generation = newGeneration
}
// Clone returns a copy of this node.
func (n *NodeInfo) Clone() *NodeInfo {
clone := &NodeInfo{
node: n.node,
requestedResource: n.requestedResource.Clone(),
nonzeroRequest: n.nonzeroRequest.Clone(),
allocatableResource: n.allocatableResource.Clone(),
taintsErr: n.taintsErr,
TransientInfo: n.TransientInfo,
memoryPressureCondition: n.memoryPressureCondition,
diskPressureCondition: n.diskPressureCondition,
pidPressureCondition: n.pidPressureCondition,
usedPorts: make(HostPortInfo),
imageStates: n.imageStates,
generation: n.generation,
}
if len(n.pods) > 0 {
clone.pods = append([]*v1.Pod(nil), n.pods...)
}
if len(n.usedPorts) > 0 {
// HostPortInfo is a map-in-map struct
// make sure it's deep copied
for ip, portMap := range n.usedPorts {
clone.usedPorts[ip] = make(map[ProtocolPort]struct{})
for protocolPort, v := range portMap {
clone.usedPorts[ip][protocolPort] = v
}
}
}
if len(n.podsWithAffinity) > 0 {
clone.podsWithAffinity = append([]*v1.Pod(nil), n.podsWithAffinity...)
}
if len(n.taints) > 0 {
clone.taints = append([]v1.Taint(nil), n.taints...)
}
return clone
}
// VolumeLimits returns volume limits associated with the node
func (n *NodeInfo) VolumeLimits() map[v1.ResourceName]int64 {
volumeLimits := map[v1.ResourceName]int64{}
for k, v := range n.AllocatableResource().ScalarResources {
if v1helper.IsAttachableVolumeResourceName(k) {
volumeLimits[k] = v
}
}
return volumeLimits
}
// String returns representation of human readable format of this NodeInfo.
func (n *NodeInfo) String() string {
podKeys := make([]string, len(n.pods))
for i, pod := range n.pods {
podKeys[i] = pod.Name
}
return fmt.Sprintf("&NodeInfo{Pods:%v, RequestedResource:%#v, NonZeroRequest: %#v, UsedPort: %#v, AllocatableResource:%#v}",
podKeys, n.requestedResource, n.nonzeroRequest, n.usedPorts, n.allocatableResource)
}
func hasPodAffinityConstraints(pod *v1.Pod) bool {
affinity := pod.Spec.Affinity
return affinity != nil && (affinity.PodAffinity != nil || affinity.PodAntiAffinity != nil)
}
// AddPod adds pod information to this NodeInfo.
func (n *NodeInfo) AddPod(pod *v1.Pod) {
res, non0CPU, non0Mem := calculateResource(pod)
n.requestedResource.MilliCPU += res.MilliCPU
n.requestedResource.Memory += res.Memory
n.requestedResource.EphemeralStorage += res.EphemeralStorage
if n.requestedResource.ScalarResources == nil && len(res.ScalarResources) > 0 {
n.requestedResource.ScalarResources = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.ScalarResources {
n.requestedResource.ScalarResources[rName] += rQuant
}
n.nonzeroRequest.MilliCPU += non0CPU
n.nonzeroRequest.Memory += non0Mem
n.pods = append(n.pods, pod)
if hasPodAffinityConstraints(pod) {
n.podsWithAffinity = append(n.podsWithAffinity, pod)
}
// Consume ports when pods added.
n.UpdateUsedPorts(pod, true)
n.generation = nextGeneration()
}
// RemovePod subtracts pod information from this NodeInfo.
func (n *NodeInfo) RemovePod(pod *v1.Pod) error {
k1, err := GetPodKey(pod)
if err != nil {
return err
}
for i := range n.podsWithAffinity {
k2, err := GetPodKey(n.podsWithAffinity[i])
if err != nil {
klog.Errorf("Cannot get pod key, err: %v", err)
continue
}
if k1 == k2 {
// delete the element
n.podsWithAffinity[i] = n.podsWithAffinity[len(n.podsWithAffinity)-1]
n.podsWithAffinity = n.podsWithAffinity[:len(n.podsWithAffinity)-1]
break
}
}
for i := range n.pods {
k2, err := GetPodKey(n.pods[i])
if err != nil {
klog.Errorf("Cannot get pod key, err: %v", err)
continue
}
if k1 == k2 {
// delete the element
n.pods[i] = n.pods[len(n.pods)-1]
n.pods = n.pods[:len(n.pods)-1]
// reduce the resource data
res, non0CPU, non0Mem := calculateResource(pod)
n.requestedResource.MilliCPU -= res.MilliCPU
n.requestedResource.Memory -= res.Memory
n.requestedResource.EphemeralStorage -= res.EphemeralStorage
if len(res.ScalarResources) > 0 && n.requestedResource.ScalarResources == nil {
n.requestedResource.ScalarResources = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.ScalarResources {
n.requestedResource.ScalarResources[rName] -= rQuant
}
n.nonzeroRequest.MilliCPU -= non0CPU
n.nonzeroRequest.Memory -= non0Mem
// Release ports when remove Pods.
n.UpdateUsedPorts(pod, false)
n.generation = nextGeneration()
return nil
}
}
return fmt.Errorf("no corresponding pod %s in pods of node %s", pod.Name, n.node.Name)
}
func calculateResource(pod *v1.Pod) (res Resource, non0CPU int64, non0Mem int64) {
resPtr := &res
for _, c := range pod.Spec.Containers {
resPtr.Add(c.Resources.Requests)
non0CPUReq, non0MemReq := priorityutil.GetNonzeroRequests(&c.Resources.Requests)
non0CPU += non0CPUReq
non0Mem += non0MemReq
// No non-zero resources for GPUs or opaque resources.
}
return
}
// UpdateUsedPorts updates the UsedPorts of NodeInfo.
func (n *NodeInfo) UpdateUsedPorts(pod *v1.Pod, add bool) {
for j := range pod.Spec.Containers {
container := &pod.Spec.Containers[j]
for k := range container.Ports {
podPort := &container.Ports[k]
if add {
n.usedPorts.Add(podPort.HostIP, string(podPort.Protocol), podPort.HostPort)
} else {
n.usedPorts.Remove(podPort.HostIP, string(podPort.Protocol), podPort.HostPort)
}
}
}
}
// SetNode sets the overall node information.
func (n *NodeInfo) SetNode(node *v1.Node) error {
n.node = node
n.allocatableResource = NewResource(node.Status.Allocatable)
n.taints = node.Spec.Taints
for i := range node.Status.Conditions {
cond := &node.Status.Conditions[i]
switch cond.Type {
case v1.NodeMemoryPressure:
n.memoryPressureCondition = cond.Status
case v1.NodeDiskPressure:
n.diskPressureCondition = cond.Status
case v1.NodePIDPressure:
n.pidPressureCondition = cond.Status
default:
// We ignore other conditions.
}
}
n.TransientInfo = NewTransientSchedulerInfo()
n.generation = nextGeneration()
return nil
}
// RemoveNode removes the overall information about the node.
func (n *NodeInfo) RemoveNode(node *v1.Node) error {
// We don't remove NodeInfo for because there can still be some pods on this node -
// this is because notifications about pods are delivered in a different watch,
// and thus can potentially be observed later, even though they happened before
// node removal. This is handled correctly in cache.go file.
n.node = nil
n.allocatableResource = &Resource{}
n.taints, n.taintsErr = nil, nil
n.memoryPressureCondition = v1.ConditionUnknown
n.diskPressureCondition = v1.ConditionUnknown
n.pidPressureCondition = v1.ConditionUnknown
n.imageStates = make(map[string]*ImageStateSummary)
n.generation = nextGeneration()
return nil
}
// FilterOutPods receives a list of pods and filters out those whose node names
// are equal to the node of this NodeInfo, but are not found in the pods of this NodeInfo.
//
// Preemption logic simulates removal of pods on a node by removing them from the
// corresponding NodeInfo. In order for the simulation to work, we call this method
// on the pods returned from SchedulerCache, so that predicate functions see
// only the pods that are not removed from the NodeInfo.
func (n *NodeInfo) FilterOutPods(pods []*v1.Pod) []*v1.Pod {
node := n.Node()
if node == nil {
return pods
}
filtered := make([]*v1.Pod, 0, len(pods))
for _, p := range pods {
if p.Spec.NodeName != node.Name {
filtered = append(filtered, p)
continue
}
// If pod is on the given node, add it to 'filtered' only if it is present in nodeInfo.
podKey, _ := GetPodKey(p)
for _, np := range n.Pods() {
npodkey, _ := GetPodKey(np)
if npodkey == podKey {
filtered = append(filtered, p)
break
}
}
}
return filtered
}
// GetPodKey returns the string key of a pod.
func GetPodKey(pod *v1.Pod) (string, error) {
uid := string(pod.UID)
if len(uid) == 0 {
return "", errors.New("Cannot get cache key for pod with empty UID")
}
return uid, nil
}
// Filter implements PodFilter interface. It returns false only if the pod node name
// matches NodeInfo.node and the pod is not found in the pods list. Otherwise,
// returns true.
func (n *NodeInfo) Filter(pod *v1.Pod) bool {
if pod.Spec.NodeName != n.node.Name {
return true
}
for _, p := range n.pods {
if p.Name == pod.Name && p.Namespace == pod.Namespace {
return true
}
}
return false
}

View File

@ -0,0 +1,78 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeinfo
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
)
// CreateNodeNameToInfoMap obtains a list of pods and pivots that list into a map where the keys are node names
// and the values are the aggregated information for that node.
func CreateNodeNameToInfoMap(pods []*v1.Pod, nodes []*v1.Node) map[string]*NodeInfo {
nodeNameToInfo := make(map[string]*NodeInfo)
for _, pod := range pods {
nodeName := pod.Spec.NodeName
if _, ok := nodeNameToInfo[nodeName]; !ok {
nodeNameToInfo[nodeName] = NewNodeInfo()
}
nodeNameToInfo[nodeName].AddPod(pod)
}
imageExistenceMap := createImageExistenceMap(nodes)
for _, node := range nodes {
if _, ok := nodeNameToInfo[node.Name]; !ok {
nodeNameToInfo[node.Name] = NewNodeInfo()
}
nodeInfo := nodeNameToInfo[node.Name]
nodeInfo.SetNode(node)
nodeInfo.imageStates = getNodeImageStates(node, imageExistenceMap)
}
return nodeNameToInfo
}
// getNodeImageStates returns the given node's image states based on the given imageExistence map.
func getNodeImageStates(node *v1.Node, imageExistenceMap map[string]sets.String) map[string]*ImageStateSummary {
imageStates := make(map[string]*ImageStateSummary)
for _, image := range node.Status.Images {
for _, name := range image.Names {
imageStates[name] = &ImageStateSummary{
Size: image.SizeBytes,
NumNodes: len(imageExistenceMap[name]),
}
}
}
return imageStates
}
// createImageExistenceMap returns a map recording on which nodes the images exist, keyed by the images' names.
func createImageExistenceMap(nodes []*v1.Node) map[string]sets.String {
imageExistenceMap := make(map[string]sets.String)
for _, node := range nodes {
for _, image := range node.Status.Images {
for _, name := range image.Names {
if _, ok := imageExistenceMap[name]; !ok {
imageExistenceMap[name] = sets.NewString(node.Name)
} else {
imageExistenceMap[name].Insert(node.Name)
}
}
}
}
return imageExistenceMap
}

View File

@ -0,0 +1,220 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"fmt"
"sync"
"sync/atomic"
"time"
ktypes "k8s.io/apimachinery/pkg/types"
"k8s.io/klog"
)
type clock interface {
Now() time.Time
}
type realClock struct{}
func (realClock) Now() time.Time {
return time.Now()
}
// backoffEntry is single threaded. in particular, it only allows a single action to be waiting on backoff at a time.
// It is also not safe to copy this object.
type backoffEntry struct {
initialized bool
podName ktypes.NamespacedName
backoff time.Duration
lastUpdate time.Time
reqInFlight int32
}
// tryLock attempts to acquire a lock via atomic compare and swap.
// returns true if the lock was acquired, false otherwise
func (b *backoffEntry) tryLock() bool {
return atomic.CompareAndSwapInt32(&b.reqInFlight, 0, 1)
}
// unlock returns the lock. panics if the lock isn't held
func (b *backoffEntry) unlock() {
if !atomic.CompareAndSwapInt32(&b.reqInFlight, 1, 0) {
panic(fmt.Sprintf("unexpected state on unlocking: %+v", b))
}
}
// backoffTime returns the Time when a backoffEntry completes backoff
func (b *backoffEntry) backoffTime() time.Time {
return b.lastUpdate.Add(b.backoff)
}
// getBackoff returns the duration until this entry completes backoff
func (b *backoffEntry) getBackoff(maxDuration time.Duration) time.Duration {
if !b.initialized {
b.initialized = true
return b.backoff
}
newDuration := b.backoff * 2
if newDuration > maxDuration {
newDuration = maxDuration
}
b.backoff = newDuration
klog.V(4).Infof("Backing off %s", newDuration.String())
return newDuration
}
// PodBackoff is used to restart a pod with back-off delay.
type PodBackoff struct {
// expiryQ stores backoffEntry orderedy by lastUpdate until they reach maxDuration and are GC'd
expiryQ *Heap
lock sync.Mutex
clock clock
defaultDuration time.Duration
maxDuration time.Duration
}
// MaxDuration returns the max time duration of the back-off.
func (p *PodBackoff) MaxDuration() time.Duration {
return p.maxDuration
}
// CreateDefaultPodBackoff creates a default pod back-off object.
func CreateDefaultPodBackoff() *PodBackoff {
return CreatePodBackoff(1*time.Second, 60*time.Second)
}
// CreatePodBackoff creates a pod back-off object by default duration and max duration.
func CreatePodBackoff(defaultDuration, maxDuration time.Duration) *PodBackoff {
return CreatePodBackoffWithClock(defaultDuration, maxDuration, realClock{})
}
// CreatePodBackoffWithClock creates a pod back-off object by default duration, max duration and clock.
func CreatePodBackoffWithClock(defaultDuration, maxDuration time.Duration, clock clock) *PodBackoff {
return &PodBackoff{
expiryQ: NewHeap(backoffEntryKeyFunc, backoffEntryCompareUpdate),
clock: clock,
defaultDuration: defaultDuration,
maxDuration: maxDuration,
}
}
// getEntry returns the backoffEntry for a given podID
func (p *PodBackoff) getEntry(podID ktypes.NamespacedName) *backoffEntry {
entry, exists, _ := p.expiryQ.GetByKey(podID.String())
var be *backoffEntry
if !exists {
be = &backoffEntry{
initialized: false,
podName: podID,
backoff: p.defaultDuration,
}
p.expiryQ.Update(be)
} else {
be = entry.(*backoffEntry)
}
return be
}
// BackoffPod updates the backoff for a podId and returns the duration until backoff completion
func (p *PodBackoff) BackoffPod(podID ktypes.NamespacedName) time.Duration {
p.lock.Lock()
defer p.lock.Unlock()
entry := p.getEntry(podID)
entry.lastUpdate = p.clock.Now()
p.expiryQ.Update(entry)
return entry.getBackoff(p.maxDuration)
}
// TryBackoffAndWait tries to acquire the backoff lock
func (p *PodBackoff) TryBackoffAndWait(podID ktypes.NamespacedName, stop <-chan struct{}) bool {
p.lock.Lock()
entry := p.getEntry(podID)
if !entry.tryLock() {
p.lock.Unlock()
return false
}
defer entry.unlock()
duration := entry.getBackoff(p.maxDuration)
p.lock.Unlock()
select {
case <-time.After(duration):
return true
case <-stop:
return false
}
}
// Gc execute garbage collection on the pod back-off.
func (p *PodBackoff) Gc() {
p.lock.Lock()
defer p.lock.Unlock()
now := p.clock.Now()
var be *backoffEntry
for {
entry := p.expiryQ.Peek()
if entry == nil {
break
}
be = entry.(*backoffEntry)
if now.Sub(be.lastUpdate) > p.maxDuration {
p.expiryQ.Pop()
} else {
break
}
}
}
// GetBackoffTime returns the time that podID completes backoff
func (p *PodBackoff) GetBackoffTime(podID ktypes.NamespacedName) (time.Time, bool) {
p.lock.Lock()
defer p.lock.Unlock()
rawBe, exists, _ := p.expiryQ.GetByKey(podID.String())
if !exists {
return time.Time{}, false
}
be := rawBe.(*backoffEntry)
return be.lastUpdate.Add(be.backoff), true
}
// ClearPodBackoff removes all tracking information for podID (clears expiry)
func (p *PodBackoff) ClearPodBackoff(podID ktypes.NamespacedName) bool {
p.lock.Lock()
defer p.lock.Unlock()
entry, exists, _ := p.expiryQ.GetByKey(podID.String())
if exists {
err := p.expiryQ.Delete(entry)
return err == nil
}
return false
}
// backoffEntryKeyFunc is the keying function used for mapping a backoffEntry to string for heap
func backoffEntryKeyFunc(b interface{}) (string, error) {
be := b.(*backoffEntry)
return be.podName.String(), nil
}
// backoffEntryCompareUpdate returns true when b1's backoff time is before b2's
func backoffEntryCompareUpdate(b1, b2 interface{}) bool {
be1 := b1.(*backoffEntry)
be2 := b2.(*backoffEntry)
return be1.lastUpdate.Before(be2.lastUpdate)
}

34
vendor/k8s.io/kubernetes/pkg/scheduler/util/clock.go generated vendored Normal file
View File

@ -0,0 +1,34 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"time"
)
// Clock provides an interface for getting the current time
type Clock interface {
Now() time.Time
}
// RealClock implements a clock using time
type RealClock struct{}
// Now returns the current time with time.Now
func (RealClock) Now() time.Time {
return time.Now()
}

236
vendor/k8s.io/kubernetes/pkg/scheduler/util/heap.go generated vendored Normal file
View File

@ -0,0 +1,236 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Below is the implementation of the a heap. The logic is pretty much the same
// as cache.heap, however, this heap does not perform synchronization. It leaves
// synchronization to the SchedulingQueue.
package util
import (
"container/heap"
"fmt"
"k8s.io/client-go/tools/cache"
)
// KeyFunc is a function type to get the key from an object.
type KeyFunc func(obj interface{}) (string, error)
type heapItem struct {
obj interface{} // The object which is stored in the heap.
index int // The index of the object's key in the Heap.queue.
}
type itemKeyValue struct {
key string
obj interface{}
}
// heapData is an internal struct that implements the standard heap interface
// and keeps the data stored in the heap.
type heapData struct {
// items is a map from key of the objects to the objects and their index.
// We depend on the property that items in the map are in the queue and vice versa.
items map[string]*heapItem
// queue implements a heap data structure and keeps the order of elements
// according to the heap invariant. The queue keeps the keys of objects stored
// in "items".
queue []string
// keyFunc is used to make the key used for queued item insertion and retrieval, and
// should be deterministic.
keyFunc KeyFunc
// lessFunc is used to compare two objects in the heap.
lessFunc LessFunc
}
var (
_ = heap.Interface(&heapData{}) // heapData is a standard heap
)
// Less compares two objects and returns true if the first one should go
// in front of the second one in the heap.
func (h *heapData) Less(i, j int) bool {
if i > len(h.queue) || j > len(h.queue) {
return false
}
itemi, ok := h.items[h.queue[i]]
if !ok {
return false
}
itemj, ok := h.items[h.queue[j]]
if !ok {
return false
}
return h.lessFunc(itemi.obj, itemj.obj)
}
// Len returns the number of items in the Heap.
func (h *heapData) Len() int { return len(h.queue) }
// Swap implements swapping of two elements in the heap. This is a part of standard
// heap interface and should never be called directly.
func (h *heapData) Swap(i, j int) {
h.queue[i], h.queue[j] = h.queue[j], h.queue[i]
item := h.items[h.queue[i]]
item.index = i
item = h.items[h.queue[j]]
item.index = j
}
// Push is supposed to be called by heap.Push only.
func (h *heapData) Push(kv interface{}) {
keyValue := kv.(*itemKeyValue)
n := len(h.queue)
h.items[keyValue.key] = &heapItem{keyValue.obj, n}
h.queue = append(h.queue, keyValue.key)
}
// Pop is supposed to be called by heap.Pop only.
func (h *heapData) Pop() interface{} {
key := h.queue[len(h.queue)-1]
h.queue = h.queue[0 : len(h.queue)-1]
item, ok := h.items[key]
if !ok {
// This is an error
return nil
}
delete(h.items, key)
return item.obj
}
// Peek is supposed to be called by heap.Peek only.
func (h *heapData) Peek() interface{} {
if len(h.queue) > 0 {
return h.items[h.queue[0]].obj
}
return nil
}
// Heap is a producer/consumer queue that implements a heap data structure.
// It can be used to implement priority queues and similar data structures.
type Heap struct {
// data stores objects and has a queue that keeps their ordering according
// to the heap invariant.
data *heapData
}
// Add inserts an item, and puts it in the queue. The item is updated if it
// already exists.
func (h *Heap) Add(obj interface{}) error {
key, err := h.data.keyFunc(obj)
if err != nil {
return cache.KeyError{Obj: obj, Err: err}
}
if _, exists := h.data.items[key]; exists {
h.data.items[key].obj = obj
heap.Fix(h.data, h.data.items[key].index)
} else {
heap.Push(h.data, &itemKeyValue{key, obj})
}
return nil
}
// AddIfNotPresent inserts an item, and puts it in the queue. If an item with
// the key is present in the map, no changes is made to the item.
func (h *Heap) AddIfNotPresent(obj interface{}) error {
key, err := h.data.keyFunc(obj)
if err != nil {
return cache.KeyError{Obj: obj, Err: err}
}
if _, exists := h.data.items[key]; !exists {
heap.Push(h.data, &itemKeyValue{key, obj})
}
return nil
}
// Update is the same as Add in this implementation. When the item does not
// exist, it is added.
func (h *Heap) Update(obj interface{}) error {
return h.Add(obj)
}
// Delete removes an item.
func (h *Heap) Delete(obj interface{}) error {
key, err := h.data.keyFunc(obj)
if err != nil {
return cache.KeyError{Obj: obj, Err: err}
}
if item, ok := h.data.items[key]; ok {
heap.Remove(h.data, item.index)
return nil
}
return fmt.Errorf("object not found")
}
// Peek returns the head of the heap without removing it.
func (h *Heap) Peek() interface{} {
return h.data.Peek()
}
// Pop returns the head of the heap and removes it.
func (h *Heap) Pop() (interface{}, error) {
obj := heap.Pop(h.data)
if obj != nil {
return obj, nil
}
return nil, fmt.Errorf("object was removed from heap data")
}
// Get returns the requested item, or sets exists=false.
func (h *Heap) Get(obj interface{}) (interface{}, bool, error) {
key, err := h.data.keyFunc(obj)
if err != nil {
return nil, false, cache.KeyError{Obj: obj, Err: err}
}
return h.GetByKey(key)
}
// GetByKey returns the requested item, or sets exists=false.
func (h *Heap) GetByKey(key string) (interface{}, bool, error) {
item, exists := h.data.items[key]
if !exists {
return nil, false, nil
}
return item.obj, true, nil
}
// List returns a list of all the items.
func (h *Heap) List() []interface{} {
list := make([]interface{}, 0, len(h.data.items))
for _, item := range h.data.items {
list = append(list, item.obj)
}
return list
}
// Len returns the number of items in the heap.
func (h *Heap) Len() int {
return len(h.data.queue)
}
// NewHeap returns a Heap which can be used to queue up items to process.
func NewHeap(keyFn KeyFunc, lessFn LessFunc) *Heap {
return &Heap{
data: &heapData{
items: map[string]*heapItem{},
queue: []string{},
keyFunc: keyFn,
lessFunc: lessFn,
},
}
}

99
vendor/k8s.io/kubernetes/pkg/scheduler/util/utils.go generated vendored Normal file
View File

@ -0,0 +1,99 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"sort"
"k8s.io/api/core/v1"
"k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/apis/scheduling"
"k8s.io/kubernetes/pkg/features"
)
// GetContainerPorts returns the used host ports of Pods: if 'port' was used, a 'port:true' pair
// will be in the result; but it does not resolve port conflict.
func GetContainerPorts(pods ...*v1.Pod) []*v1.ContainerPort {
var ports []*v1.ContainerPort
for _, pod := range pods {
for j := range pod.Spec.Containers {
container := &pod.Spec.Containers[j]
for k := range container.Ports {
ports = append(ports, &container.Ports[k])
}
}
}
return ports
}
// PodPriorityEnabled indicates whether pod priority feature is enabled.
func PodPriorityEnabled() bool {
return feature.DefaultFeatureGate.Enabled(features.PodPriority)
}
// GetPodFullName returns a name that uniquely identifies a pod.
func GetPodFullName(pod *v1.Pod) string {
// Use underscore as the delimiter because it is not allowed in pod name
// (DNS subdomain format).
return pod.Name + "_" + pod.Namespace
}
// GetPodPriority return priority of the given pod.
func GetPodPriority(pod *v1.Pod) int32 {
if pod.Spec.Priority != nil {
return *pod.Spec.Priority
}
// When priority of a running pod is nil, it means it was created at a time
// that there was no global default priority class and the priority class
// name of the pod was empty. So, we resolve to the static default priority.
return scheduling.DefaultPriorityWhenNoDefaultClassExists
}
// SortableList is a list that implements sort.Interface.
type SortableList struct {
Items []interface{}
CompFunc LessFunc
}
// LessFunc is a function that receives two items and returns true if the first
// item should be placed before the second one when the list is sorted.
type LessFunc func(item1, item2 interface{}) bool
var _ = sort.Interface(&SortableList{})
func (l *SortableList) Len() int { return len(l.Items) }
func (l *SortableList) Less(i, j int) bool {
return l.CompFunc(l.Items[i], l.Items[j])
}
func (l *SortableList) Swap(i, j int) {
l.Items[i], l.Items[j] = l.Items[j], l.Items[i]
}
// Sort sorts the items in the list using the given CompFunc. Item1 is placed
// before Item2 when CompFunc(Item1, Item2) returns true.
func (l *SortableList) Sort() {
sort.Sort(l)
}
// HigherPriorityPod return true when priority of the first pod is higher than
// the second one. It takes arguments of the type "interface{}" to be used with
// SortableList, but expects those arguments to be *v1.Pod.
func HigherPriorityPod(pod1, pod2 interface{}) bool {
return GetPodPriority(pod1.(*v1.Pod)) > GetPodPriority(pod2.(*v1.Pod))
}

View File

@ -0,0 +1,61 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package volumebinder
import (
"time"
"k8s.io/api/core/v1"
coreinformers "k8s.io/client-go/informers/core/v1"
storageinformers "k8s.io/client-go/informers/storage/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
)
// VolumeBinder sets up the volume binding library
type VolumeBinder struct {
Binder persistentvolume.SchedulerVolumeBinder
}
// NewVolumeBinder sets up the volume binding library and binding queue
func NewVolumeBinder(
client clientset.Interface,
nodeInformer coreinformers.NodeInformer,
pvcInformer coreinformers.PersistentVolumeClaimInformer,
pvInformer coreinformers.PersistentVolumeInformer,
storageClassInformer storageinformers.StorageClassInformer,
bindTimeout time.Duration) *VolumeBinder {
return &VolumeBinder{
Binder: persistentvolume.NewVolumeBinder(client, nodeInformer, pvcInformer, pvInformer, storageClassInformer, bindTimeout),
}
}
// NewFakeVolumeBinder sets up a fake volume binder and binding queue
func NewFakeVolumeBinder(config *persistentvolume.FakeVolumeBinderConfig) *VolumeBinder {
return &VolumeBinder{
Binder: persistentvolume.NewFakeVolumeBinder(config),
}
}
// DeletePodBindings will delete the cached volume bindings for the given pod.
func (b *VolumeBinder) DeletePodBindings(pod *v1.Pod) {
cache := b.Binder.GetBindingsCache()
if cache != nil && pod != nil {
cache.DeleteBindings(pod)
}
}