Update to kube v1.17

Signed-off-by: Humble Chirammal <hchiramm@redhat.com>
This commit is contained in:
Humble Chirammal
2020-01-14 16:08:55 +05:30
committed by mergify[bot]
parent 327fcd1b1b
commit 3af1e26d7c
1710 changed files with 289562 additions and 168638 deletions

View File

@ -19,83 +19,122 @@ package predicates
import (
"fmt"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/util/rand"
utilfeature "k8s.io/apiserver/pkg/util/feature"
corelisters "k8s.io/client-go/listers/core/v1"
storagelisters "k8s.io/client-go/listers/storage/v1"
csitrans "k8s.io/csi-translation-lib"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/features"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
// InTreeToCSITranslator contains methods required to check migratable status
// and perform translations from InTree PV's to CSI
type InTreeToCSITranslator interface {
IsPVMigratable(pv *v1.PersistentVolume) bool
IsMigratableIntreePluginByName(inTreePluginName string) bool
GetInTreePluginNameFromSpec(pv *v1.PersistentVolume, vol *v1.Volume) (string, error)
GetCSINameFromInTreeName(pluginName string) (string, error)
TranslateInTreePVToCSI(pv *v1.PersistentVolume) (*v1.PersistentVolume, error)
}
// CSIMaxVolumeLimitChecker defines predicate needed for counting CSI volumes
type CSIMaxVolumeLimitChecker struct {
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
scInfo StorageClassInfo
csiNodeLister storagelisters.CSINodeLister
pvLister corelisters.PersistentVolumeLister
pvcLister corelisters.PersistentVolumeClaimLister
scLister storagelisters.StorageClassLister
randomVolumeIDPrefix string
translator InTreeToCSITranslator
}
// NewCSIMaxVolumeLimitPredicate returns a predicate for counting CSI volumes
func NewCSIMaxVolumeLimitPredicate(
pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo, scInfo StorageClassInfo) FitPredicate {
csiNodeLister storagelisters.CSINodeLister, pvLister corelisters.PersistentVolumeLister, pvcLister corelisters.PersistentVolumeClaimLister, scLister storagelisters.StorageClassLister) FitPredicate {
c := &CSIMaxVolumeLimitChecker{
pvInfo: pvInfo,
pvcInfo: pvcInfo,
scInfo: scInfo,
csiNodeLister: csiNodeLister,
pvLister: pvLister,
pvcLister: pvcLister,
scLister: scLister,
randomVolumeIDPrefix: rand.String(32),
translator: csitrans.New(),
}
return c.attachableLimitPredicate
}
func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
pod *v1.Pod, meta PredicateMetadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
// if feature gate is disable we return
if !utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
return true, nil, nil
func getVolumeLimits(nodeInfo *schedulernodeinfo.NodeInfo, csiNode *storagev1.CSINode) map[v1.ResourceName]int64 {
// TODO: stop getting values from Node object in v1.18
nodeVolumeLimits := nodeInfo.VolumeLimits()
if csiNode != nil {
for i := range csiNode.Spec.Drivers {
d := csiNode.Spec.Drivers[i]
if d.Allocatable != nil && d.Allocatable.Count != nil {
// TODO: drop GetCSIAttachLimitKey once we don't get values from Node object (v1.18)
k := v1.ResourceName(volumeutil.GetCSIAttachLimitKey(d.Name))
nodeVolumeLimits[k] = int64(*d.Allocatable.Count)
}
}
}
// If a pod doesn't have any volume attached to it, the predicate will always be true.
// Thus we make a fast path for it, to avoid unnecessary computations in this case.
return nodeVolumeLimits
}
func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
// If the new pod doesn't have any volume attached to it, the predicate will always be true
if len(pod.Spec.Volumes) == 0 {
return true, nil, nil
}
nodeVolumeLimits := nodeInfo.VolumeLimits()
// if node does not have volume limits this predicate should exit
if len(nodeVolumeLimits) == 0 {
return true, nil, nil
node := nodeInfo.Node()
if node == nil {
return false, nil, fmt.Errorf("node not found")
}
// If CSINode doesn't exist, the predicate may read the limits from Node object
csiNode, err := c.csiNodeLister.Get(node.Name)
if err != nil {
// TODO: return the error once CSINode is created by default (2 releases)
klog.V(5).Infof("Could not get a CSINode object for the node: %v", err)
}
// a map of unique volume name/csi volume handle and volume limit key
newVolumes := make(map[string]string)
if err := c.filterAttachableVolumes(pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
if err := c.filterAttachableVolumes(csiNode, pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
return false, nil, err
}
// If the pod doesn't have any new CSI volumes, the predicate will always be true
if len(newVolumes) == 0 {
return true, nil, nil
}
// a map of unique volume name/csi volume handle and volume limit key
// If the node doesn't have volume limits, the predicate will always be true
nodeVolumeLimits := getVolumeLimits(nodeInfo, csiNode)
if len(nodeVolumeLimits) == 0 {
return true, nil, nil
}
attachedVolumes := make(map[string]string)
for _, existingPod := range nodeInfo.Pods() {
if err := c.filterAttachableVolumes(existingPod.Spec.Volumes, existingPod.Namespace, attachedVolumes); err != nil {
if err := c.filterAttachableVolumes(csiNode, existingPod.Spec.Volumes, existingPod.Namespace, attachedVolumes); err != nil {
return false, nil, err
}
}
newVolumeCount := map[string]int{}
attachedVolumeCount := map[string]int{}
for volumeName, volumeLimitKey := range attachedVolumes {
if _, ok := newVolumes[volumeName]; ok {
delete(newVolumes, volumeName)
for volumeUniqueName, volumeLimitKey := range attachedVolumes {
if _, ok := newVolumes[volumeUniqueName]; ok {
// Don't count single volume used in multiple pods more than once
delete(newVolumes, volumeUniqueName)
}
attachedVolumeCount[volumeLimitKey]++
}
newVolumeCount := map[string]int{}
for _, volumeLimitKey := range newVolumes {
newVolumeCount[volumeLimitKey]++
}
@ -114,8 +153,7 @@ func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
}
func (c *CSIMaxVolumeLimitChecker) filterAttachableVolumes(
volumes []v1.Volume, namespace string, result map[string]string) error {
csiNode *storagev1.CSINode, volumes []v1.Volume, namespace string, result map[string]string) error {
for _, vol := range volumes {
// CSI volumes can only be used as persistent volumes
if vol.PersistentVolumeClaim == nil {
@ -127,77 +165,121 @@ func (c *CSIMaxVolumeLimitChecker) filterAttachableVolumes(
return fmt.Errorf("PersistentVolumeClaim had no name")
}
pvc, err := c.pvcInfo.GetPersistentVolumeClaimInfo(namespace, pvcName)
pvc, err := c.pvcLister.PersistentVolumeClaims(namespace).Get(pvcName)
if err != nil {
klog.V(4).Infof("Unable to look up PVC info for %s/%s", namespace, pvcName)
klog.V(5).Infof("Unable to look up PVC info for %s/%s", namespace, pvcName)
continue
}
driverName, volumeHandle := c.getCSIDriver(pvc)
// if we can't find driver name or volume handle - we don't count this volume.
driverName, volumeHandle := c.getCSIDriverInfo(csiNode, pvc)
if driverName == "" || volumeHandle == "" {
klog.V(5).Infof("Could not find a CSI driver name or volume handle, not counting volume")
continue
}
volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
result[volumeHandle] = volumeLimitKey
volumeUniqueName := fmt.Sprintf("%s/%s", driverName, volumeHandle)
volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
result[volumeUniqueName] = volumeLimitKey
}
return nil
}
func (c *CSIMaxVolumeLimitChecker) getCSIDriver(pvc *v1.PersistentVolumeClaim) (string, string) {
// getCSIDriverInfo returns the CSI driver name and volume ID of a given PVC.
// If the PVC is from a migrated in-tree plugin, this function will return
// the information of the CSI driver that the plugin has been migrated to.
func (c *CSIMaxVolumeLimitChecker) getCSIDriverInfo(csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
pvName := pvc.Spec.VolumeName
namespace := pvc.Namespace
pvcName := pvc.Name
placeHolderCSIDriver := ""
placeHolderHandle := ""
if pvName == "" {
klog.V(5).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName)
return c.getDriverNameFromSC(pvc)
return c.getCSIDriverInfoFromSC(csiNode, pvc)
}
pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName)
pv, err := c.pvLister.Get(pvName)
if err != nil {
klog.V(4).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
klog.V(5).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
// If we can't fetch PV associated with PVC, may be it got deleted
// or PVC was prebound to a PVC that hasn't been created yet.
// fallback to using StorageClass for volume counting
return c.getDriverNameFromSC(pvc)
return c.getCSIDriverInfoFromSC(csiNode, pvc)
}
csiSource := pv.Spec.PersistentVolumeSource.CSI
if csiSource == nil {
klog.V(5).Infof("Not considering non-CSI volume %s/%s", namespace, pvcName)
return placeHolderCSIDriver, placeHolderHandle
// We make a fast path for non-CSI volumes that aren't migratable
if !c.translator.IsPVMigratable(pv) {
return "", ""
}
pluginName, err := c.translator.GetInTreePluginNameFromSpec(pv, nil)
if err != nil {
klog.V(5).Infof("Unable to look up plugin name from PV spec: %v", err)
return "", ""
}
if !isCSIMigrationOn(csiNode, pluginName) {
klog.V(5).Infof("CSI Migration of plugin %s is not enabled", pluginName)
return "", ""
}
csiPV, err := c.translator.TranslateInTreePVToCSI(pv)
if err != nil {
klog.V(5).Infof("Unable to translate in-tree volume to CSI: %v", err)
return "", ""
}
if csiPV.Spec.PersistentVolumeSource.CSI == nil {
klog.V(5).Infof("Unable to get a valid volume source for translated PV %s", pvName)
return "", ""
}
csiSource = csiPV.Spec.PersistentVolumeSource.CSI
}
return csiSource.Driver, csiSource.VolumeHandle
}
func (c *CSIMaxVolumeLimitChecker) getDriverNameFromSC(pvc *v1.PersistentVolumeClaim) (string, string) {
// getCSIDriverInfoFromSC returns the CSI driver name and a random volume ID of a given PVC's StorageClass.
func (c *CSIMaxVolumeLimitChecker) getCSIDriverInfoFromSC(csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
namespace := pvc.Namespace
pvcName := pvc.Name
scName := pvc.Spec.StorageClassName
scName := v1helper.GetPersistentVolumeClaimClass(pvc)
placeHolderCSIDriver := ""
placeHolderHandle := ""
if scName == nil {
// if StorageClass is not set or found, then PVC must be using immediate binding mode
// and hence it must be bound before scheduling. So it is safe to not count it.
klog.V(5).Infof("pvc %s/%s has no storageClass", namespace, pvcName)
return placeHolderCSIDriver, placeHolderHandle
// If StorageClass is not set or not found, then PVC must be using immediate binding mode
// and hence it must be bound before scheduling. So it is safe to not count it.
if scName == "" {
klog.V(5).Infof("PVC %s/%s has no StorageClass", namespace, pvcName)
return "", ""
}
storageClass, err := c.scInfo.GetStorageClassInfo(*scName)
storageClass, err := c.scLister.Get(scName)
if err != nil {
klog.V(5).Infof("no storage %s found for pvc %s/%s", *scName, namespace, pvcName)
return placeHolderCSIDriver, placeHolderHandle
klog.V(5).Infof("Could not get StorageClass for PVC %s/%s: %v", namespace, pvcName, err)
return "", ""
}
// We use random prefix to avoid conflict with volume-ids. If PVC is bound in the middle
// predicate and there is another pod(on same node) that uses same volume then we will overcount
// We use random prefix to avoid conflict with volume IDs. If PVC is bound during the execution of the
// predicate and there is another pod on the same node that uses same volume, then we will overcount
// the volume and consider both volumes as different.
volumeHandle := fmt.Sprintf("%s-%s/%s", c.randomVolumeIDPrefix, namespace, pvcName)
return storageClass.Provisioner, volumeHandle
provisioner := storageClass.Provisioner
if c.translator.IsMigratableIntreePluginByName(provisioner) {
if !isCSIMigrationOn(csiNode, provisioner) {
klog.V(5).Infof("CSI Migration of plugin %s is not enabled", provisioner)
return "", ""
}
driverName, err := c.translator.GetCSINameFromInTreeName(provisioner)
if err != nil {
klog.V(5).Infof("Unable to look up driver name from plugin name: %v", err)
return "", ""
}
return driverName, volumeHandle
}
return provisioner, volumeHandle
}

View File

@ -75,11 +75,45 @@ var (
ErrVolumeNodeConflict = newPredicateFailureError("VolumeNodeAffinityConflict", "node(s) had volume node affinity conflict")
// ErrVolumeBindConflict is used for VolumeBindingNoMatch predicate error.
ErrVolumeBindConflict = newPredicateFailureError("VolumeBindingNoMatch", "node(s) didn't find available persistent volumes to bind")
// ErrTopologySpreadConstraintsNotMatch is used for EvenPodsSpread predicate error.
ErrTopologySpreadConstraintsNotMatch = newPredicateFailureError("EvenPodsSpreadNotMatch", "node(s) didn't match pod topology spread constraints")
// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
// as ErrFakePredicate.
ErrFakePredicate = newPredicateFailureError("FakePredicateError", "Nodes failed the fake predicate")
)
var unresolvablePredicateFailureErrors = map[PredicateFailureReason]struct{}{
ErrNodeSelectorNotMatch: {},
ErrPodAffinityRulesNotMatch: {},
ErrPodNotMatchHostName: {},
ErrTaintsTolerationsNotMatch: {},
ErrNodeLabelPresenceViolated: {},
// Node conditions won't change when scheduler simulates removal of preemption victims.
// So, it is pointless to try nodes that have not been able to host the pod due to node
// conditions. These include ErrNodeNotReady, ErrNodeUnderPIDPressure, ErrNodeUnderMemoryPressure, ....
ErrNodeNotReady: {},
ErrNodeNetworkUnavailable: {},
ErrNodeUnderDiskPressure: {},
ErrNodeUnderPIDPressure: {},
ErrNodeUnderMemoryPressure: {},
ErrNodeUnschedulable: {},
ErrNodeUnknownCondition: {},
ErrVolumeZoneConflict: {},
ErrVolumeNodeConflict: {},
ErrVolumeBindConflict: {},
}
// UnresolvablePredicateExists checks if there is at least one unresolvable predicate failure reason, if true
// returns the first one in the list.
func UnresolvablePredicateExists(reasons []PredicateFailureReason) PredicateFailureReason {
for _, r := range reasons {
if _, ok := unresolvablePredicateFailureErrors[r]; ok {
return r
}
}
return nil
}
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {

View File

@ -19,35 +19,31 @@ package predicates
import (
"context"
"fmt"
"math"
"sync"
"k8s.io/klog"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
// PredicateMetadata interface represents anything that can access a predicate metadata.
type PredicateMetadata interface {
ShallowCopy() PredicateMetadata
AddPod(addedPod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) error
RemovePod(deletedPod *v1.Pod) error
// Metadata interface represents anything that can access a predicate metadata.
type Metadata interface {
ShallowCopy() Metadata
AddPod(addedPod *v1.Pod, node *v1.Node) error
RemovePod(deletedPod *v1.Pod, node *v1.Node) error
}
// PredicateMetadataProducer is a function that computes predicate metadata for a given pod.
type PredicateMetadataProducer func(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo) PredicateMetadata
// PredicateMetadataFactory defines a factory of predicate metadata.
type PredicateMetadataFactory struct {
podLister algorithm.PodLister
}
// MetadataProducer is a function that computes predicate metadata for a given pod.
type MetadataProducer func(pod *v1.Pod, sharedLister schedulerlisters.SharedLister) Metadata
// AntiAffinityTerm's topology key value used in predicate metadata
type topologyPair struct {
@ -66,14 +62,130 @@ type topologyPairsMaps struct {
podToTopologyPairs map[string]topologyPairSet
}
// NOTE: When new fields are added/removed or logic is changed, please make sure that
// RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes.
type predicateMetadata struct {
pod *v1.Pod
podBestEffort bool
podRequest *schedulernodeinfo.Resource
podPorts []*v1.ContainerPort
type criticalPath struct {
// topologyValue denotes the topology value mapping to topology key.
topologyValue string
// matchNum denotes the number of matching pods.
matchNum int32
}
// CAVEAT: the reason that `[2]criticalPath` can work is based on the implementation of current
// preemption algorithm, in particular the following 2 facts:
// Fact 1: we only preempt pods on the same node, instead of pods on multiple nodes.
// Fact 2: each node is evaluated on a separate copy of the metadata during its preemption cycle.
// If we plan to turn to a more complex algorithm like "arbitrary pods on multiple nodes", this
// structure needs to be revisited.
type criticalPaths [2]criticalPath
func newCriticalPaths() *criticalPaths {
return &criticalPaths{{matchNum: math.MaxInt32}, {matchNum: math.MaxInt32}}
}
func (paths *criticalPaths) update(tpVal string, num int32) {
// first verify if `tpVal` exists or not
i := -1
if tpVal == paths[0].topologyValue {
i = 0
} else if tpVal == paths[1].topologyValue {
i = 1
}
if i >= 0 {
// `tpVal` exists
paths[i].matchNum = num
if paths[0].matchNum > paths[1].matchNum {
// swap paths[0] and paths[1]
paths[0], paths[1] = paths[1], paths[0]
}
} else {
// `tpVal` doesn't exist
if num < paths[0].matchNum {
// update paths[1] with paths[0]
paths[1] = paths[0]
// update paths[0]
paths[0].topologyValue, paths[0].matchNum = tpVal, num
} else if num < paths[1].matchNum {
// update paths[1]
paths[1].topologyValue, paths[1].matchNum = tpVal, num
}
}
}
// evenPodsSpreadMetadata combines tpKeyToCriticalPaths and tpPairToMatchNum
// to represent:
// (1) critical paths where the least pods are matched on each spread constraint.
// (2) number of pods matched on each spread constraint.
type evenPodsSpreadMetadata struct {
constraints []topologySpreadConstraint
// We record 2 critical paths instead of all critical paths here.
// criticalPaths[0].matchNum always holds the minimum matching number.
// criticalPaths[1].matchNum is always greater or equal to criticalPaths[0].matchNum, but
// it's not guaranteed to be the 2nd minimum match number.
tpKeyToCriticalPaths map[string]*criticalPaths
// tpPairToMatchNum is keyed with topologyPair, and valued with the number of matching pods.
tpPairToMatchNum map[topologyPair]int32
}
// topologySpreadConstraint is an internal version for a hard (DoNotSchedule
// unsatisfiable constraint action) v1.TopologySpreadConstraint and where the
// selector is parsed.
type topologySpreadConstraint struct {
maxSkew int32
topologyKey string
selector labels.Selector
}
type serviceAffinityMetadata struct {
matchingPodList []*v1.Pod
matchingPodServices []*v1.Service
}
func (m *serviceAffinityMetadata) addPod(addedPod *v1.Pod, pod *v1.Pod, node *v1.Node) {
// If addedPod is in the same namespace as the pod, update the list
// of matching pods if applicable.
if m == nil || addedPod.Namespace != pod.Namespace {
return
}
selector := CreateSelectorFromLabels(pod.Labels)
if selector.Matches(labels.Set(addedPod.Labels)) {
m.matchingPodList = append(m.matchingPodList, addedPod)
}
}
func (m *serviceAffinityMetadata) removePod(deletedPod *v1.Pod, node *v1.Node) {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
if m == nil ||
len(m.matchingPodList) == 0 ||
deletedPod.Namespace != m.matchingPodList[0].Namespace {
return
}
for i, pod := range m.matchingPodList {
if schedutil.GetPodFullName(pod) == deletedPodFullName {
m.matchingPodList = append(m.matchingPodList[:i], m.matchingPodList[i+1:]...)
break
}
}
}
func (m *serviceAffinityMetadata) clone() *serviceAffinityMetadata {
if m == nil {
return nil
}
copy := serviceAffinityMetadata{}
copy.matchingPodServices = append([]*v1.Service(nil),
m.matchingPodServices...)
copy.matchingPodList = append([]*v1.Pod(nil),
m.matchingPodList...)
return &copy
}
type podAffinityMetadata struct {
topologyPairsAntiAffinityPodsMap *topologyPairsMaps
// A map of topology pairs to a list of Pods that can potentially match
// the affinity terms of the "pod" and its inverse.
@ -81,9 +193,70 @@ type predicateMetadata struct {
// A map of topology pairs to a list of Pods that can potentially match
// the anti-affinity terms of the "pod" and its inverse.
topologyPairsPotentialAntiAffinityPods *topologyPairsMaps
serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service
}
func (m *podAffinityMetadata) addPod(addedPod *v1.Pod, pod *v1.Pod, node *v1.Node) error {
// Add matching anti-affinity terms of the addedPod to the map.
topologyPairsMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, addedPod, node)
if err != nil {
return err
}
m.topologyPairsAntiAffinityPodsMap.appendMaps(topologyPairsMaps)
// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
affinity := pod.Spec.Affinity
podNodeName := addedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
// It is assumed that when the added pod matches affinity of the pod, all the terms must match,
// this should be changed when the implementation of targetPodMatchesAffinityOfPod/podMatchesAffinityTermProperties
// is changed
if targetPodMatchesAffinityOfPod(pod, addedPod) {
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
for _, term := range affinityTerms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
m.topologyPairsPotentialAffinityPods.addTopologyPair(pair, addedPod)
}
}
}
if targetPodMatchesAntiAffinityOfPod(pod, addedPod) {
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
for _, term := range antiAffinityTerms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
m.topologyPairsPotentialAntiAffinityPods.addTopologyPair(pair, addedPod)
}
}
}
}
return nil
}
func (m *podAffinityMetadata) removePod(deletedPod *v1.Pod) {
if m == nil {
return
}
m.topologyPairsAntiAffinityPodsMap.removePod(deletedPod)
// Delete pod from the matching affinity or anti-affinity topology pairs maps.
m.topologyPairsPotentialAffinityPods.removePod(deletedPod)
m.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod)
}
func (m *podAffinityMetadata) clone() *podAffinityMetadata {
if m == nil {
return nil
}
copy := podAffinityMetadata{}
copy.topologyPairsPotentialAffinityPods = m.topologyPairsPotentialAffinityPods.clone()
copy.topologyPairsPotentialAntiAffinityPods = m.topologyPairsPotentialAntiAffinityPods.clone()
copy.topologyPairsAntiAffinityPodsMap = m.topologyPairsAntiAffinityPodsMap.clone()
return &copy
}
type podFitsResourcesMetadata struct {
// ignoredExtendedResources is a set of extended resource names that will
// be ignored in the PodFitsResources predicate.
//
@ -91,72 +264,129 @@ type predicateMetadata struct {
// which should be accounted only by the extenders. This set is synthesized
// from scheduler extender configuration and does not change per pod.
ignoredExtendedResources sets.String
podRequest *schedulernodeinfo.Resource
}
// Ensure that predicateMetadata implements algorithm.PredicateMetadata.
var _ PredicateMetadata = &predicateMetadata{}
func (m *podFitsResourcesMetadata) clone() *podFitsResourcesMetadata {
if m == nil {
return nil
}
copy := podFitsResourcesMetadata{}
copy.ignoredExtendedResources = m.ignoredExtendedResources
copy.podRequest = m.podRequest
return &copy
}
type podFitsHostPortsMetadata struct {
podPorts []*v1.ContainerPort
}
func (m *podFitsHostPortsMetadata) clone() *podFitsHostPortsMetadata {
if m == nil {
return nil
}
copy := podFitsHostPortsMetadata{}
copy.podPorts = append([]*v1.ContainerPort(nil), m.podPorts...)
return &copy
}
// NOTE: When new fields are added/removed or logic is changed, please make sure that
// RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes.
type predicateMetadata struct {
pod *v1.Pod
podBestEffort bool
// evenPodsSpreadMetadata holds info of the minimum match number on each topology spread constraint,
// and the match number of all valid topology pairs.
evenPodsSpreadMetadata *evenPodsSpreadMetadata
serviceAffinityMetadata *serviceAffinityMetadata
podAffinityMetadata *podAffinityMetadata
podFitsResourcesMetadata *podFitsResourcesMetadata
podFitsHostPortsMetadata *podFitsHostPortsMetadata
}
// Ensure that predicateMetadata implements algorithm.Metadata.
var _ Metadata = &predicateMetadata{}
// predicateMetadataProducer function produces predicate metadata. It is stored in a global variable below
// and used to modify the return values of PredicateMetadataProducer
// and used to modify the return values of MetadataProducer
type predicateMetadataProducer func(pm *predicateMetadata)
var predicateMetadataProducers = make(map[string]predicateMetadataProducer)
// RegisterPredicateMetadataProducer registers a PredicateMetadataProducer.
// RegisterPredicateMetadataProducer registers a MetadataProducer.
func RegisterPredicateMetadataProducer(predicateName string, precomp predicateMetadataProducer) {
predicateMetadataProducers[predicateName] = precomp
}
// EmptyPredicateMetadataProducer returns a no-op MetadataProducer type.
func EmptyPredicateMetadataProducer(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo) PredicateMetadata {
// EmptyMetadataProducer returns a no-op MetadataProducer type.
func EmptyMetadataProducer(pod *v1.Pod, sharedLister schedulerlisters.SharedLister) Metadata {
return nil
}
// RegisterPredicateMetadataProducerWithExtendedResourceOptions registers a
// PredicateMetadataProducer that creates predicate metadata with the provided
// MetadataProducer that creates predicate metadata with the provided
// options for extended resources.
//
// See the comments in "predicateMetadata" for the explanation of the options.
func RegisterPredicateMetadataProducerWithExtendedResourceOptions(ignoredExtendedResources sets.String) {
RegisterPredicateMetadataProducer("PredicateWithExtendedResourceOptions", func(pm *predicateMetadata) {
pm.ignoredExtendedResources = ignoredExtendedResources
pm.podFitsResourcesMetadata.ignoredExtendedResources = ignoredExtendedResources
})
}
// NewPredicateMetadataFactory creates a PredicateMetadataFactory.
func NewPredicateMetadataFactory(podLister algorithm.PodLister) PredicateMetadataProducer {
factory := &PredicateMetadataFactory{
podLister,
}
return factory.GetMetadata
}
// MetadataProducerFactory is a factory to produce Metadata.
type MetadataProducerFactory struct{}
// GetMetadata returns the predicateMetadata used which will be used by various predicates.
func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInfoMap map[string]*schedulernodeinfo.NodeInfo) PredicateMetadata {
// GetPredicateMetadata returns the predicateMetadata which will be used by various predicates.
func (f *MetadataProducerFactory) GetPredicateMetadata(pod *v1.Pod, sharedLister schedulerlisters.SharedLister) Metadata {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
// existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity
existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, nodeNameToInfoMap)
var allNodes []*schedulernodeinfo.NodeInfo
var havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo
if sharedLister != nil {
var err error
allNodes, err = sharedLister.NodeInfos().List()
if err != nil {
klog.Errorf("failed to list NodeInfos: %v", err)
return nil
}
havePodsWithAffinityNodes, err = sharedLister.NodeInfos().HavePodsWithAffinityList()
if err != nil {
klog.Errorf("failed to list NodeInfos: %v", err)
return nil
}
}
// evenPodsSpreadMetadata represents how existing pods match "pod"
// on its spread constraints
evenPodsSpreadMetadata, err := getEvenPodsSpreadMetadata(pod, allNodes)
if err != nil {
klog.Errorf("Error calculating spreadConstraintsMap: %v", err)
return nil
}
// incomingPodAffinityMap will be used later for efficient check on incoming pod's affinity
// incomingPodAntiAffinityMap will be used later for efficient check on incoming pod's anti-affinity
incomingPodAffinityMap, incomingPodAntiAffinityMap, err := getTPMapMatchingIncomingAffinityAntiAffinity(pod, nodeNameToInfoMap)
podAffinityMetadata, err := getPodAffinityMetadata(pod, allNodes, havePodsWithAffinityNodes)
if err != nil {
klog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
klog.Errorf("Error calculating podAffinityMetadata: %v", err)
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
topologyPairsPotentialAffinityPods: incomingPodAffinityMap,
topologyPairsPotentialAntiAffinityPods: incomingPodAntiAffinityMap,
topologyPairsAntiAffinityPodsMap: existingPodAntiAffinityMap,
pod: pod,
evenPodsSpreadMetadata: evenPodsSpreadMetadata,
podAffinityMetadata: podAffinityMetadata,
podFitsResourcesMetadata: getPodFitsResourcesMetedata(pod),
podFitsHostPortsMetadata: getPodFitsHostPortsMetadata(pod),
}
for predicateName, precomputeFunc := range predicateMetadataProducers {
klog.V(10).Infof("Precompute: %v", predicateName)
@ -165,152 +395,287 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
return predicateMetadata
}
// returns a pointer to a new topologyPairsMaps
func newTopologyPairsMaps() *topologyPairsMaps {
return &topologyPairsMaps{topologyPairToPods: make(map[topologyPair]podSet),
podToTopologyPairs: make(map[string]topologyPairSet)}
func getPodFitsHostPortsMetadata(pod *v1.Pod) *podFitsHostPortsMetadata {
return &podFitsHostPortsMetadata{
podPorts: schedutil.GetContainerPorts(pod),
}
}
func (topologyPairsMaps *topologyPairsMaps) addTopologyPair(pair topologyPair, pod *v1.Pod) {
podFullName := schedutil.GetPodFullName(pod)
if topologyPairsMaps.topologyPairToPods[pair] == nil {
topologyPairsMaps.topologyPairToPods[pair] = make(map[*v1.Pod]struct{})
func getPodFitsResourcesMetedata(pod *v1.Pod) *podFitsResourcesMetadata {
return &podFitsResourcesMetadata{
podRequest: GetResourceRequest(pod),
}
topologyPairsMaps.topologyPairToPods[pair][pod] = struct{}{}
if topologyPairsMaps.podToTopologyPairs[podFullName] == nil {
topologyPairsMaps.podToTopologyPairs[podFullName] = make(map[topologyPair]struct{})
}
topologyPairsMaps.podToTopologyPairs[podFullName][pair] = struct{}{}
}
func (topologyPairsMaps *topologyPairsMaps) removePod(deletedPod *v1.Pod) {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
for pair := range topologyPairsMaps.podToTopologyPairs[deletedPodFullName] {
delete(topologyPairsMaps.topologyPairToPods[pair], deletedPod)
if len(topologyPairsMaps.topologyPairToPods[pair]) == 0 {
delete(topologyPairsMaps.topologyPairToPods, pair)
func getPodAffinityMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo, havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo) (*podAffinityMetadata, error) {
// existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity
existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, havePodsWithAffinityNodes)
if err != nil {
return nil, err
}
// incomingPodAffinityMap will be used later for efficient check on incoming pod's affinity
// incomingPodAntiAffinityMap will be used later for efficient check on incoming pod's anti-affinity
incomingPodAffinityMap, incomingPodAntiAffinityMap, err := getTPMapMatchingIncomingAffinityAntiAffinity(pod, allNodes)
if err != nil {
return nil, err
}
return &podAffinityMetadata{
topologyPairsPotentialAffinityPods: incomingPodAffinityMap,
topologyPairsPotentialAntiAffinityPods: incomingPodAntiAffinityMap,
topologyPairsAntiAffinityPodsMap: existingPodAntiAffinityMap,
}, nil
}
func getEvenPodsSpreadMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (*evenPodsSpreadMetadata, error) {
// We have feature gating in APIServer to strip the spec
// so don't need to re-check feature gate, just check length of constraints.
constraints, err := filterHardTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints)
if err != nil {
return nil, err
}
if len(constraints) == 0 {
return nil, nil
}
var lock sync.Mutex
// TODO(Huang-Wei): It might be possible to use "make(map[topologyPair]*int32)".
// In that case, need to consider how to init each tpPairToCount[pair] in an atomic fashion.
m := evenPodsSpreadMetadata{
constraints: constraints,
tpKeyToCriticalPaths: make(map[string]*criticalPaths, len(constraints)),
tpPairToMatchNum: make(map[topologyPair]int32),
}
addTopologyPairMatchNum := func(pair topologyPair, num int32) {
lock.Lock()
m.tpPairToMatchNum[pair] += num
lock.Unlock()
}
processNode := func(i int) {
nodeInfo := allNodes[i]
node := nodeInfo.Node()
if node == nil {
klog.Error("node not found")
return
}
// In accordance to design, if NodeAffinity or NodeSelector is defined,
// spreading is applied to nodes that pass those filters.
if !PodMatchesNodeSelectorAndAffinityTerms(pod, node) {
return
}
// Ensure current node's labels contains all topologyKeys in 'constraints'.
if !NodeLabelsMatchSpreadConstraints(node.Labels, constraints) {
return
}
for _, constraint := range constraints {
matchTotal := int32(0)
// nodeInfo.Pods() can be empty; or all pods don't fit
for _, existingPod := range nodeInfo.Pods() {
if existingPod.Namespace != pod.Namespace {
continue
}
if constraint.selector.Matches(labels.Set(existingPod.Labels)) {
matchTotal++
}
}
pair := topologyPair{key: constraint.topologyKey, value: node.Labels[constraint.topologyKey]}
addTopologyPairMatchNum(pair, matchTotal)
}
}
delete(topologyPairsMaps.podToTopologyPairs, deletedPodFullName)
workqueue.ParallelizeUntil(context.Background(), 16, len(allNodes), processNode)
// calculate min match for each topology pair
for i := 0; i < len(constraints); i++ {
key := constraints[i].topologyKey
m.tpKeyToCriticalPaths[key] = newCriticalPaths()
}
for pair, num := range m.tpPairToMatchNum {
m.tpKeyToCriticalPaths[pair.key].update(pair.value, num)
}
return &m, nil
}
func (topologyPairsMaps *topologyPairsMaps) appendMaps(toAppend *topologyPairsMaps) {
func filterHardTopologySpreadConstraints(constraints []v1.TopologySpreadConstraint) ([]topologySpreadConstraint, error) {
var result []topologySpreadConstraint
for _, c := range constraints {
if c.WhenUnsatisfiable == v1.DoNotSchedule {
selector, err := metav1.LabelSelectorAsSelector(c.LabelSelector)
if err != nil {
return nil, err
}
result = append(result, topologySpreadConstraint{
maxSkew: c.MaxSkew,
topologyKey: c.TopologyKey,
selector: selector,
})
}
}
return result, nil
}
// NodeLabelsMatchSpreadConstraints checks if ALL topology keys in spread constraints are present in node labels.
func NodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints []topologySpreadConstraint) bool {
for _, c := range constraints {
if _, ok := nodeLabels[c.topologyKey]; !ok {
return false
}
}
return true
}
// returns a pointer to a new topologyPairsMaps
func newTopologyPairsMaps() *topologyPairsMaps {
return &topologyPairsMaps{
topologyPairToPods: make(map[topologyPair]podSet),
podToTopologyPairs: make(map[string]topologyPairSet),
}
}
func (m *topologyPairsMaps) addTopologyPair(pair topologyPair, pod *v1.Pod) {
podFullName := schedutil.GetPodFullName(pod)
if m.topologyPairToPods[pair] == nil {
m.topologyPairToPods[pair] = make(map[*v1.Pod]struct{})
}
m.topologyPairToPods[pair][pod] = struct{}{}
if m.podToTopologyPairs[podFullName] == nil {
m.podToTopologyPairs[podFullName] = make(map[topologyPair]struct{})
}
m.podToTopologyPairs[podFullName][pair] = struct{}{}
}
func (m *topologyPairsMaps) removePod(deletedPod *v1.Pod) {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
for pair := range m.podToTopologyPairs[deletedPodFullName] {
delete(m.topologyPairToPods[pair], deletedPod)
if len(m.topologyPairToPods[pair]) == 0 {
delete(m.topologyPairToPods, pair)
}
}
delete(m.podToTopologyPairs, deletedPodFullName)
}
func (m *topologyPairsMaps) appendMaps(toAppend *topologyPairsMaps) {
if toAppend == nil {
return
}
for pair := range toAppend.topologyPairToPods {
for pod := range toAppend.topologyPairToPods[pair] {
topologyPairsMaps.addTopologyPair(pair, pod)
m.addTopologyPair(pair, pod)
}
}
}
func (m *topologyPairsMaps) clone() *topologyPairsMaps {
copy := newTopologyPairsMaps()
copy.appendMaps(m)
return copy
}
func (m *evenPodsSpreadMetadata) addPod(addedPod, preemptorPod *v1.Pod, node *v1.Node) {
m.updatePod(addedPod, preemptorPod, node, 1)
}
func (m *evenPodsSpreadMetadata) removePod(deletedPod, preemptorPod *v1.Pod, node *v1.Node) {
m.updatePod(deletedPod, preemptorPod, node, -1)
}
func (m *evenPodsSpreadMetadata) updatePod(updatedPod, preemptorPod *v1.Pod, node *v1.Node, delta int32) {
if m == nil || updatedPod.Namespace != preemptorPod.Namespace || node == nil {
return
}
if !NodeLabelsMatchSpreadConstraints(node.Labels, m.constraints) {
return
}
podLabelSet := labels.Set(updatedPod.Labels)
for _, constraint := range m.constraints {
if !constraint.selector.Matches(podLabelSet) {
continue
}
k, v := constraint.topologyKey, node.Labels[constraint.topologyKey]
pair := topologyPair{key: k, value: v}
m.tpPairToMatchNum[pair] = m.tpPairToMatchNum[pair] + delta
m.tpKeyToCriticalPaths[k].update(v, m.tpPairToMatchNum[pair])
}
}
func (m *evenPodsSpreadMetadata) clone() *evenPodsSpreadMetadata {
// c could be nil when EvenPodsSpread feature is disabled
if m == nil {
return nil
}
cp := evenPodsSpreadMetadata{
// constraints are shared because they don't change.
constraints: m.constraints,
tpKeyToCriticalPaths: make(map[string]*criticalPaths, len(m.tpKeyToCriticalPaths)),
tpPairToMatchNum: make(map[topologyPair]int32, len(m.tpPairToMatchNum)),
}
for tpKey, paths := range m.tpKeyToCriticalPaths {
cp.tpKeyToCriticalPaths[tpKey] = &criticalPaths{paths[0], paths[1]}
}
for tpPair, matchNum := range m.tpPairToMatchNum {
copyPair := topologyPair{key: tpPair.key, value: tpPair.value}
cp.tpPairToMatchNum[copyPair] = matchNum
}
return &cp
}
// RemovePod changes predicateMetadata assuming that the given `deletedPod` is
// deleted from the system.
func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod, node *v1.Node) error {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
if deletedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("deletedPod and meta.pod must not be the same")
}
meta.topologyPairsAntiAffinityPodsMap.removePod(deletedPod)
// Delete pod from the matching affinity or anti-affinity topology pairs maps.
meta.topologyPairsPotentialAffinityPods.removePod(deletedPod)
meta.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod)
// All pods in the serviceAffinityMatchingPodList are in the same namespace.
// So, if the namespace of the first one is not the same as the namespace of the
// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
if meta.serviceAffinityInUse &&
len(meta.serviceAffinityMatchingPodList) > 0 &&
deletedPod.Namespace == meta.serviceAffinityMatchingPodList[0].Namespace {
for i, pod := range meta.serviceAffinityMatchingPodList {
if schedutil.GetPodFullName(pod) == deletedPodFullName {
meta.serviceAffinityMatchingPodList = append(
meta.serviceAffinityMatchingPodList[:i],
meta.serviceAffinityMatchingPodList[i+1:]...)
break
}
}
}
meta.podAffinityMetadata.removePod(deletedPod)
meta.evenPodsSpreadMetadata.removePod(deletedPod, meta.pod, node)
meta.serviceAffinityMetadata.removePod(deletedPod, node)
return nil
}
// AddPod changes predicateMetadata assuming that `newPod` is added to the
// AddPod changes predicateMetadata assuming that the given `addedPod` is added to the
// system.
func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) error {
func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, node *v1.Node) error {
addedPodFullName := schedutil.GetPodFullName(addedPod)
if addedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("addedPod and meta.pod must not be the same")
}
if nodeInfo.Node() == nil {
return fmt.Errorf("invalid node in nodeInfo")
if node == nil {
return fmt.Errorf("node not found")
}
// Add matching anti-affinity terms of the addedPod to the map.
topologyPairsMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(meta.pod, addedPod, nodeInfo.Node())
if err != nil {
if err := meta.podAffinityMetadata.addPod(addedPod, meta.pod, node); err != nil {
return err
}
meta.topologyPairsAntiAffinityPodsMap.appendMaps(topologyPairsMaps)
// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
affinity := meta.pod.Spec.Affinity
podNodeName := addedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
podNode := nodeInfo.Node()
// It is assumed that when the added pod matches affinity of the meta.pod, all the terms must match,
// this should be changed when the implementation of targetPodMatchesAffinityOfPod/podMatchesAffinityTermProperties
// is changed
if targetPodMatchesAffinityOfPod(meta.pod, addedPod) {
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
for _, term := range affinityTerms {
if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
meta.topologyPairsPotentialAffinityPods.addTopologyPair(pair, addedPod)
}
}
}
if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) {
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
for _, term := range antiAffinityTerms {
if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
meta.topologyPairsPotentialAntiAffinityPods.addTopologyPair(pair, addedPod)
}
}
}
}
// If addedPod is in the same namespace as the meta.pod, update the list
// of matching pods if applicable.
if meta.serviceAffinityInUse && addedPod.Namespace == meta.pod.Namespace {
selector := CreateSelectorFromLabels(meta.pod.Labels)
if selector.Matches(labels.Set(addedPod.Labels)) {
meta.serviceAffinityMatchingPodList = append(meta.serviceAffinityMatchingPodList,
addedPod)
}
}
// Update meta.evenPodsSpreadMetadata if meta.pod has hard spread constraints
// and addedPod matches that
meta.evenPodsSpreadMetadata.addPod(addedPod, meta.pod, node)
meta.serviceAffinityMetadata.addPod(addedPod, meta.pod, node)
return nil
}
// ShallowCopy copies a metadata struct into a new struct and creates a copy of
// its maps and slices, but it does not copy the contents of pointer values.
func (meta *predicateMetadata) ShallowCopy() PredicateMetadata {
func (meta *predicateMetadata) ShallowCopy() Metadata {
newPredMeta := &predicateMetadata{
pod: meta.pod,
podBestEffort: meta.podBestEffort,
podRequest: meta.podRequest,
serviceAffinityInUse: meta.serviceAffinityInUse,
ignoredExtendedResources: meta.ignoredExtendedResources,
pod: meta.pod,
podBestEffort: meta.podBestEffort,
}
newPredMeta.podPorts = append([]*v1.ContainerPort(nil), meta.podPorts...)
newPredMeta.topologyPairsPotentialAffinityPods = newTopologyPairsMaps()
newPredMeta.topologyPairsPotentialAffinityPods.appendMaps(meta.topologyPairsPotentialAffinityPods)
newPredMeta.topologyPairsPotentialAntiAffinityPods = newTopologyPairsMaps()
newPredMeta.topologyPairsPotentialAntiAffinityPods.appendMaps(meta.topologyPairsPotentialAntiAffinityPods)
newPredMeta.topologyPairsAntiAffinityPodsMap = newTopologyPairsMaps()
newPredMeta.topologyPairsAntiAffinityPodsMap.appendMaps(meta.topologyPairsAntiAffinityPodsMap)
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
meta.serviceAffinityMatchingPodServices...)
newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
meta.serviceAffinityMatchingPodList...)
return (PredicateMetadata)(newPredMeta)
newPredMeta.podFitsHostPortsMetadata = meta.podFitsHostPortsMetadata.clone()
newPredMeta.podAffinityMetadata = meta.podAffinityMetadata.clone()
newPredMeta.evenPodsSpreadMetadata = meta.evenPodsSpreadMetadata.clone()
newPredMeta.serviceAffinityMetadata = meta.serviceAffinityMetadata.clone()
newPredMeta.podFitsResourcesMetadata = meta.podFitsResourcesMetadata.clone()
return (Metadata)(newPredMeta)
}
type affinityTermProperties struct {
@ -365,15 +730,9 @@ func podMatchesAnyAffinityTermProperties(pod *v1.Pod, properties []*affinityTerm
// getTPMapMatchingExistingAntiAffinity calculates the following for each existing pod on each node:
// (1) Whether it has PodAntiAffinity
// (2) Whether any AffinityTerm matches the incoming pod
func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulernodeinfo.NodeInfo) (*topologyPairsMaps, error) {
allNodeNames := make([]string, 0, len(nodeInfoMap))
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
}
func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (*topologyPairsMaps, error) {
errCh := schedutil.NewErrorChannel()
var lock sync.Mutex
var firstError error
topologyMaps := newTopologyPairsMaps()
appendTopologyPairsMaps := func(toAppend *topologyPairsMaps) {
@ -381,54 +740,48 @@ func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*s
defer lock.Unlock()
topologyMaps.appendMaps(toAppend)
}
catchError := func(err error) {
lock.Lock()
defer lock.Unlock()
if firstError == nil {
firstError = err
}
}
ctx, cancel := context.WithCancel(context.Background())
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
nodeInfo := allNodes[i]
node := nodeInfo.Node()
if node == nil {
catchError(fmt.Errorf("node not found"))
klog.Error("node not found")
return
}
for _, existingPod := range nodeInfo.PodsWithAffinity() {
existingPodTopologyMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, existingPod, node)
if err != nil {
catchError(err)
cancel()
errCh.SendErrorWithCancel(err, cancel)
return
}
appendTopologyPairsMaps(existingPodTopologyMaps)
if existingPodTopologyMaps != nil {
appendTopologyPairsMaps(existingPodTopologyMaps)
}
}
}
workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processNode)
return topologyMaps, firstError
workqueue.ParallelizeUntil(ctx, 16, len(allNodes), processNode)
if err := errCh.ReceiveError(); err != nil {
return nil, err
}
return topologyMaps, nil
}
// getTPMapMatchingIncomingAffinityAntiAffinity finds existing Pods that match affinity terms of the given "pod".
// It returns a topologyPairsMaps that are checked later by the affinity
// predicate. With this topologyPairsMaps available, the affinity predicate does not
// need to check all the pods in the cluster.
func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulernodeinfo.NodeInfo) (topologyPairsAffinityPodsMaps *topologyPairsMaps, topologyPairsAntiAffinityPodsMaps *topologyPairsMaps, err error) {
func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (topologyPairsAffinityPodsMaps *topologyPairsMaps, topologyPairsAntiAffinityPodsMaps *topologyPairsMaps, err error) {
affinity := pod.Spec.Affinity
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return newTopologyPairsMaps(), newTopologyPairsMaps(), nil
}
allNodeNames := make([]string, 0, len(nodeInfoMap))
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
}
errCh := schedutil.NewErrorChannel()
var lock sync.Mutex
var firstError error
topologyPairsAffinityPodsMaps = newTopologyPairsMaps()
topologyPairsAntiAffinityPodsMaps = newTopologyPairsMaps()
appendResult := func(nodeName string, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps *topologyPairsMaps) {
@ -442,28 +795,21 @@ func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[s
}
}
catchError := func(err error) {
lock.Lock()
defer lock.Unlock()
if firstError == nil {
firstError = err
}
}
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
affinityProperties, err := getAffinityTermProperties(pod, affinityTerms)
if err != nil {
return nil, nil, err
}
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
ctx, cancel := context.WithCancel(context.Background())
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
nodeInfo := allNodes[i]
node := nodeInfo.Node()
if node == nil {
catchError(fmt.Errorf("nodeInfo.Node is nil"))
klog.Error("node not found")
return
}
nodeTopologyPairsAffinityPodsMaps := newTopologyPairsMaps()
@ -483,8 +829,7 @@ func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[s
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
catchError(err)
cancel()
errCh.SendErrorWithCancel(err, cancel)
return
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
@ -495,12 +840,18 @@ func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[s
}
}
}
if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 || len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
appendResult(node.Name, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps)
}
}
workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processNode)
return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, firstError
workqueue.ParallelizeUntil(ctx, 16, len(allNodes), processNode)
if err := errCh.ReceiveError(); err != nil {
return nil, nil, err
}
return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, nil
}
// targetPodMatchesAffinityOfPod returns true if "targetPod" matches ALL affinity terms of

File diff suppressed because it is too large Load Diff

View File

@ -1,85 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
)
// FakePersistentVolumeClaimInfo declares a []v1.PersistentVolumeClaim type for testing.
type FakePersistentVolumeClaimInfo []v1.PersistentVolumeClaim
// GetPersistentVolumeClaimInfo gets PVC matching the namespace and PVC ID.
func (pvcs FakePersistentVolumeClaimInfo) GetPersistentVolumeClaimInfo(namespace string, pvcID string) (*v1.PersistentVolumeClaim, error) {
for _, pvc := range pvcs {
if pvc.Name == pvcID && pvc.Namespace == namespace {
return &pvc, nil
}
}
return nil, fmt.Errorf("Unable to find persistent volume claim: %s/%s", namespace, pvcID)
}
// FakeNodeInfo declares a v1.Node type for testing.
type FakeNodeInfo v1.Node
// GetNodeInfo return a fake node info object.
func (n FakeNodeInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
node := v1.Node(n)
return &node, nil
}
// FakeNodeListInfo declares a []v1.Node type for testing.
type FakeNodeListInfo []v1.Node
// GetNodeInfo returns a fake node object in the fake nodes.
func (nodes FakeNodeListInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
for _, node := range nodes {
if node.Name == nodeName {
return &node, nil
}
}
return nil, fmt.Errorf("Unable to find node: %s", nodeName)
}
// FakePersistentVolumeInfo declares a []v1.PersistentVolume type for testing.
type FakePersistentVolumeInfo []v1.PersistentVolume
// GetPersistentVolumeInfo returns a fake PV object in the fake PVs by PV ID.
func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.PersistentVolume, error) {
for _, pv := range pvs {
if pv.Name == pvID {
return &pv, nil
}
}
return nil, fmt.Errorf("Unable to find persistent volume: %s", pvID)
}
// FakeStorageClassInfo declares a []storagev1.StorageClass type for testing.
type FakeStorageClassInfo []storagev1.StorageClass
// GetStorageClassInfo returns a fake storage class object in the fake storage classes by name.
func (classes FakeStorageClassInfo) GetStorageClassInfo(name string) (*storagev1.StorageClass, error) {
for _, sc := range classes {
if sc.Name == name {
return &sc, nil
}
}
return nil, fmt.Errorf("Unable to find storage class: %s", name)
}

View File

@ -17,8 +17,15 @@ limitations under the License.
package predicates
import (
"k8s.io/api/core/v1"
"strings"
v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
csilibplugins "k8s.io/csi-translation-lib/plugins"
"k8s.io/kubernetes/pkg/features"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
@ -60,7 +67,7 @@ func FilterPodsByNamespace(pods []*v1.Pod, ns string) []*v1.Pod {
// CreateSelectorFromLabels is used to define a selector that corresponds to the keys in a map.
func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
if aL == nil || len(aL) == 0 {
if len(aL) == 0 {
return labels.Everything()
}
return labels.Set(aL).AsSelector()
@ -87,3 +94,56 @@ func SetPredicatesOrderingDuringTest(value []string) func() {
predicatesOrdering = origVal
}
}
// isCSIMigrationOn returns a boolean value indicating whether
// the CSI migration has been enabled for a particular storage plugin.
func isCSIMigrationOn(csiNode *storagev1.CSINode, pluginName string) bool {
if csiNode == nil || len(pluginName) == 0 {
return false
}
// In-tree storage to CSI driver migration feature should be enabled,
// along with the plugin-specific one
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigration) {
return false
}
switch pluginName {
case csilibplugins.AWSEBSInTreePluginName:
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationAWS) {
return false
}
case csilibplugins.GCEPDInTreePluginName:
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationGCE) {
return false
}
case csilibplugins.AzureDiskInTreePluginName:
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationAzureDisk) {
return false
}
case csilibplugins.CinderInTreePluginName:
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationOpenStack) {
return false
}
default:
return false
}
// The plugin name should be listed in the CSINode object annotation.
// This indicates that the plugin has been migrated to a CSI driver in the node.
csiNodeAnn := csiNode.GetAnnotations()
if csiNodeAnn == nil {
return false
}
var mpaSet sets.String
mpa := csiNodeAnn[v1.MigratedPluginsAnnotationKey]
if len(mpa) == 0 {
mpaSet = sets.NewString()
} else {
tok := strings.Split(mpa, ",")
mpaSet = sets.NewString(tok...)
}
return mpaSet.Has(pluginName)
}

View File

@ -16,7 +16,10 @@ limitations under the License.
package util
import "k8s.io/api/core/v1"
import (
v1 "k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
)
// For each of these resources, a pod that doesn't request the resource explicitly
// will be treated as having requested the amount indicated below, for the purpose
@ -26,27 +29,50 @@ import "k8s.io/api/core/v1"
// consuming no resources whatsoever. We chose these values to be similar to the
// resources that we give to cluster addon pods (#10653). But they are pretty arbitrary.
// As described in #11713, we use request instead of limit to deal with resource requirements.
const (
// DefaultMilliCPURequest defines default milli cpu request number.
DefaultMilliCPURequest int64 = 100 // 0.1 core
// DefaultMemoryRequest defines default memory request size.
DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
)
// DefaultMilliCPURequest defines default milli cpu request number.
const DefaultMilliCPURequest int64 = 100 // 0.1 core
// DefaultMemoryRequest defines default memory request size.
const DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
// GetNonzeroRequests returns the default resource request if none is found or
// GetNonzeroRequests returns the default cpu and memory resource request if none is found or
// what is provided on the request.
func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
var outMilliCPU, outMemory int64
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceCPU]; !found {
outMilliCPU = DefaultMilliCPURequest
} else {
outMilliCPU = requests.Cpu().MilliValue()
}
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceMemory]; !found {
outMemory = DefaultMemoryRequest
} else {
outMemory = requests.Memory().Value()
}
return outMilliCPU, outMemory
return GetNonzeroRequestForResource(v1.ResourceCPU, requests),
GetNonzeroRequestForResource(v1.ResourceMemory, requests)
}
// GetNonzeroRequestForResource returns the default resource request if none is found or
// what is provided on the request.
func GetNonzeroRequestForResource(resource v1.ResourceName, requests *v1.ResourceList) int64 {
switch resource {
case v1.ResourceCPU:
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceCPU]; !found {
return DefaultMilliCPURequest
}
return requests.Cpu().MilliValue()
case v1.ResourceMemory:
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceMemory]; !found {
return DefaultMemoryRequest
}
return requests.Memory().Value()
case v1.ResourceEphemeralStorage:
quantity, found := (*requests)[v1.ResourceEphemeralStorage]
if !found {
return 0
}
return quantity.Value()
default:
if v1helper.IsScalarResourceName(resource) {
quantity, found := (*requests)[resource]
if !found {
return 0
}
return quantity.Value()
}
}
return 0
}

View File

@ -18,7 +18,7 @@ package algorithm
import (
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
extenderv1 "k8s.io/kubernetes/pkg/scheduler/apis/extender/v1"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
@ -34,12 +34,12 @@ type SchedulerExtender interface {
// the list of failed nodes and failure reasons.
Filter(pod *v1.Pod,
nodes []*v1.Node, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo,
) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
) (filteredNodes []*v1.Node, failedNodesMap extenderv1.FailedNodesMap, err error)
// Prioritize based on extender-implemented priority functions. The returned scores & weight
// are used to compute the weighted score for an extender. The weighted scores are added to
// the scores computed by Kubernetes scheduler. The total scores are used to do the host selection.
Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *schedulerapi.HostPriorityList, weight int, err error)
// the scores computed by Kubernetes scheduler. The total scores are used to do the host selection.
Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *extenderv1.HostPriorityList, weight int64, err error)
// Bind delegates the action of binding a pod to a node to the extender.
Bind(binding *v1.Binding) error
@ -61,9 +61,9 @@ type SchedulerExtender interface {
// 2. A different set of victim pod for every given candidate node after preemption phase of extender.
ProcessPreemption(
pod *v1.Pod,
nodeToVictims map[*v1.Node]*schedulerapi.Victims,
nodeToVictims map[*v1.Node]*extenderv1.Victims,
nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo,
) (map[*v1.Node]*schedulerapi.Victims, error)
) (map[*v1.Node]*extenderv1.Victims, error)
// SupportsPreemption returns if the scheduler extender support preemption or not.
SupportsPreemption() bool

View File

@ -17,68 +17,22 @@ limitations under the License.
package algorithm
import (
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
policyv1beta1 "k8s.io/api/policy/v1beta1"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
appslisters "k8s.io/client-go/listers/apps/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/kubernetes/pkg/apis/apps"
api "k8s.io/kubernetes/pkg/apis/core"
)
// NodeFieldSelectorKeys is a map that: the key are node field selector keys; the values are
// NodeFieldSelectorKeys is a map that: the keys are node field selector keys; the values are
// the functions to get the value of the node field.
var NodeFieldSelectorKeys = map[string]func(*v1.Node) string{
schedulerapi.NodeFieldSelectorKeyNodeName: func(n *v1.Node) string { return n.Name },
api.ObjectNameField: func(n *v1.Node) string { return n.Name },
}
// NodeLister interface represents anything that can list nodes for a scheduler.
type NodeLister interface {
// We explicitly return []*v1.Node, instead of v1.NodeList, to avoid
// performing expensive copies that are unneeded.
List() ([]*v1.Node, error)
}
// PodFilter is a function to filter a pod. If pod passed return true else return false.
type PodFilter func(*v1.Pod) bool
// PodLister interface represents anything that can list pods for a scheduler.
type PodLister interface {
// We explicitly return []*v1.Pod, instead of v1.PodList, to avoid
// performing expensive copies that are unneeded.
List(labels.Selector) ([]*v1.Pod, error)
// This is similar to "List()", but the returned slice does not
// contain pods that don't pass `podFilter`.
FilteredList(podFilter PodFilter, selector labels.Selector) ([]*v1.Pod, error)
}
// ServiceLister interface represents anything that can produce a list of services; the list is consumed by a scheduler.
type ServiceLister interface {
// Lists all the services
List(labels.Selector) ([]*v1.Service, error)
// Gets the services for the given pod
GetPodServices(*v1.Pod) ([]*v1.Service, error)
}
// ControllerLister interface represents anything that can produce a list of ReplicationController; the list is consumed by a scheduler.
type ControllerLister interface {
// Lists all the replication controllers
List(labels.Selector) ([]*v1.ReplicationController, error)
// Gets the services for the given pod
GetPodControllers(*v1.Pod) ([]*v1.ReplicationController, error)
}
// ReplicaSetLister interface represents anything that can produce a list of ReplicaSet; the list is consumed by a scheduler.
type ReplicaSetLister interface {
// Gets the replicasets for the given pod
GetPodReplicaSets(*v1.Pod) ([]*apps.ReplicaSet, error)
}
// PDBLister interface represents anything that can list PodDisruptionBudget objects.
type PDBLister interface {
// List() returns a list of PodDisruptionBudgets matching the selector.
List(labels.Selector) ([]*policyv1beta1.PodDisruptionBudget, error)
}
var _ ControllerLister = &EmptyControllerLister{}
var _ corelisters.ReplicationControllerLister = &EmptyControllerLister{}
// EmptyControllerLister implements ControllerLister on []v1.ReplicationController returning empty data
type EmptyControllerLister struct{}
@ -93,28 +47,53 @@ func (f EmptyControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1
return nil, nil
}
var _ ReplicaSetLister = &EmptyReplicaSetLister{}
// ReplicationControllers returns nil
func (f EmptyControllerLister) ReplicationControllers(namespace string) corelisters.ReplicationControllerNamespaceLister {
return nil
}
var _ appslisters.ReplicaSetLister = &EmptyReplicaSetLister{}
// EmptyReplicaSetLister implements ReplicaSetLister on []extensions.ReplicaSet returning empty data
type EmptyReplicaSetLister struct{}
// GetPodReplicaSets returns nil
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*apps.ReplicaSet, err error) {
// List returns nil
func (f EmptyReplicaSetLister) List(labels.Selector) ([]*appsv1.ReplicaSet, error) {
return nil, nil
}
// GetPodReplicaSets returns nil
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*appsv1.ReplicaSet, err error) {
return nil, nil
}
// ReplicaSets returns nil
func (f EmptyReplicaSetLister) ReplicaSets(namespace string) appslisters.ReplicaSetNamespaceLister {
return nil
}
// StatefulSetLister interface represents anything that can produce a list of StatefulSet; the list is consumed by a scheduler.
type StatefulSetLister interface {
// Gets the StatefulSet for the given pod.
GetPodStatefulSets(*v1.Pod) ([]*apps.StatefulSet, error)
}
var _ StatefulSetLister = &EmptyStatefulSetLister{}
var _ appslisters.StatefulSetLister = &EmptyStatefulSetLister{}
// EmptyStatefulSetLister implements StatefulSetLister on []apps.StatefulSet returning empty data.
type EmptyStatefulSetLister struct{}
// GetPodStatefulSets of EmptyStatefulSetLister returns nil.
func (f EmptyStatefulSetLister) GetPodStatefulSets(pod *v1.Pod) (sss []*apps.StatefulSet, err error) {
// List returns nil
func (f EmptyStatefulSetLister) List(labels.Selector) ([]*appsv1.StatefulSet, error) {
return nil, nil
}
// GetPodStatefulSets of EmptyStatefulSetLister returns nil.
func (f EmptyStatefulSetLister) GetPodStatefulSets(pod *v1.Pod) (sss []*appsv1.StatefulSet, err error) {
return nil, nil
}
// StatefulSets returns nil
func (f EmptyStatefulSetLister) StatefulSets(namespace string) appslisters.StatefulSetNamespaceLister {
return nil
}

View File

@ -1,55 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
)
// Scheme is the default instance of runtime.Scheme to which types in the Kubernetes API are already registered.
// TODO: remove this, scheduler should not have its own scheme.
var Scheme = runtime.NewScheme()
// SchemeGroupVersion is group version used to register these objects
// TODO this should be in the "scheduler" group
var SchemeGroupVersion = schema.GroupVersion{Group: "", Version: runtime.APIVersionInternal}
var (
// SchemeBuilder defines a SchemeBuilder object.
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
// AddToScheme is used to add stored functions to scheme.
AddToScheme = SchemeBuilder.AddToScheme
)
func init() {
if err := addKnownTypes(Scheme); err != nil {
// Programmer error.
panic(err)
}
}
func addKnownTypes(scheme *runtime.Scheme) error {
if err := scheme.AddIgnoredConversionType(&metav1.TypeMeta{}, &metav1.TypeMeta{}); err != nil {
return err
}
scheme.AddKnownTypes(SchemeGroupVersion,
&Policy{},
)
return nil
}

View File

@ -1,354 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
)
const (
// MaxUint defines the max unsigned int value.
MaxUint = ^uint(0)
// MaxInt defines the max signed int value.
MaxInt = int(MaxUint >> 1)
// MaxTotalPriority defines the max total priority value.
MaxTotalPriority = MaxInt
// MaxPriority defines the max priority value.
MaxPriority = 10
// MaxWeight defines the max weight value.
MaxWeight = MaxInt / MaxPriority
// DefaultPercentageOfNodesToScore defines the percentage of nodes of all nodes
// that once found feasible, the scheduler stops looking for more nodes.
DefaultPercentageOfNodesToScore = 50
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// Policy describes a struct of a policy resource in api.
type Policy struct {
metav1.TypeMeta
// Holds the information to configure the fit predicate functions.
// If unspecified, the default predicate functions will be applied.
// If empty list, all predicates (except the mandatory ones) will be
// bypassed.
Predicates []PredicatePolicy
// Holds the information to configure the priority functions.
// If unspecified, the default priority functions will be applied.
// If empty list, all priority functions will be bypassed.
Priorities []PriorityPolicy
// Holds the information to communicate with the extender(s)
ExtenderConfigs []ExtenderConfig
// RequiredDuringScheduling affinity is not symmetric, but there is an implicit PreferredDuringScheduling affinity rule
// corresponding to every RequiredDuringScheduling affinity rule.
// HardPodAffinitySymmetricWeight represents the weight of implicit PreferredDuringScheduling affinity rule, in the range 1-100.
HardPodAffinitySymmetricWeight int32
// When AlwaysCheckAllPredicates is set to true, scheduler checks all
// the configured predicates even after one or more of them fails.
// When the flag is set to false, scheduler skips checking the rest
// of the predicates after it finds one predicate that failed.
AlwaysCheckAllPredicates bool
}
// PredicatePolicy describes a struct of a predicate policy.
type PredicatePolicy struct {
// Identifier of the predicate policy
// For a custom predicate, the name can be user-defined
// For the Kubernetes provided predicates, the name is the identifier of the pre-defined predicate
Name string
// Holds the parameters to configure the given predicate
Argument *PredicateArgument
}
// PriorityPolicy describes a struct of a priority policy.
type PriorityPolicy struct {
// Identifier of the priority policy
// For a custom priority, the name can be user-defined
// For the Kubernetes provided priority functions, the name is the identifier of the pre-defined priority function
Name string
// The numeric multiplier for the node scores that the priority function generates
// The weight should be a positive integer
Weight int
// Holds the parameters to configure the given priority function
Argument *PriorityArgument
}
// PredicateArgument represents the arguments to configure predicate functions in scheduler policy configuration.
// Only one of its members may be specified
type PredicateArgument struct {
// The predicate that provides affinity for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAffinity *ServiceAffinity
// The predicate that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelsPresence *LabelsPresence
}
// PriorityArgument represents the arguments to configure priority functions in scheduler policy configuration.
// Only one of its members may be specified
type PriorityArgument struct {
// The priority function that ensures a good spread (anti-affinity) for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAntiAffinity *ServiceAntiAffinity
// The priority function that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelPreference *LabelPreference
// The RequestedToCapacityRatio priority function is parametrized with function shape.
RequestedToCapacityRatioArguments *RequestedToCapacityRatioArguments
}
// ServiceAffinity holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type ServiceAffinity struct {
// The list of labels that identify node "groups"
// All of the labels should match for the node to be considered a fit for hosting the pod
Labels []string
}
// LabelsPresence holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type LabelsPresence struct {
// The list of labels that identify node "groups"
// All of the labels should be either present (or absent) for the node to be considered a fit for hosting the pod
Labels []string
// The boolean flag that indicates whether the labels should be present or absent from the node
Presence bool
}
// ServiceAntiAffinity holds the parameters that are used to configure the corresponding priority function
type ServiceAntiAffinity struct {
// Used to identify node "groups"
Label string
}
// LabelPreference holds the parameters that are used to configure the corresponding priority function
type LabelPreference struct {
// Used to identify node "groups"
Label string
// This is a boolean flag
// If true, higher priority is given to nodes that have the label
// If false, higher priority is given to nodes that do not have the label
Presence bool
}
// RequestedToCapacityRatioArguments holds arguments specific to RequestedToCapacityRatio priority function
type RequestedToCapacityRatioArguments struct {
// Array of point defining priority function shape
UtilizationShape []UtilizationShapePoint
}
// UtilizationShapePoint represents single point of priority function shape
type UtilizationShapePoint struct {
// Utilization (x axis). Valid values are 0 to 100. Fully utilized node maps to 100.
Utilization int
// Score assigned to given utilization (y axis). Valid values are 0 to 10.
Score int
}
// ExtenderManagedResource describes the arguments of extended resources
// managed by an extender.
type ExtenderManagedResource struct {
// Name is the extended resource name.
Name v1.ResourceName
// IgnoredByScheduler indicates whether kube-scheduler should ignore this
// resource when applying predicates.
IgnoredByScheduler bool
}
// ExtenderTLSConfig contains settings to enable TLS with extender
type ExtenderTLSConfig struct {
// Server should be accessed without verifying the TLS certificate. For testing only.
Insecure bool
// ServerName is passed to the server for SNI and is used in the client to check server
// certificates against. If ServerName is empty, the hostname used to contact the
// server is used.
ServerName string
// Server requires TLS client certificate authentication
CertFile string
// Server requires TLS client certificate authentication
KeyFile string
// Trusted root certificates for server
CAFile string
// CertData holds PEM-encoded bytes (typically read from a client certificate file).
// CertData takes precedence over CertFile
CertData []byte
// KeyData holds PEM-encoded bytes (typically read from a client certificate key file).
// KeyData takes precedence over KeyFile
KeyData []byte
// CAData holds PEM-encoded bytes (typically read from a root certificates bundle).
// CAData takes precedence over CAFile
CAData []byte
}
// ExtenderConfig holds the parameters used to communicate with the extender. If a verb is unspecified/empty,
// it is assumed that the extender chose not to provide that extension.
type ExtenderConfig struct {
// URLPrefix at which the extender is available
URLPrefix string
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
FilterVerb string
// Verb for the preempt call, empty if not supported. This verb is appended to the URLPrefix when issuing the preempt call to extender.
PreemptVerb string
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
PrioritizeVerb string
// The numeric multiplier for the node scores that the prioritize call generates.
// The weight should be a positive integer
Weight int
// Verb for the bind call, empty if not supported. This verb is appended to the URLPrefix when issuing the bind call to extender.
// If this method is implemented by the extender, it is the extender's responsibility to bind the pod to apiserver. Only one extender
// can implement this function.
BindVerb string
// EnableHTTPS specifies whether https should be used to communicate with the extender
EnableHTTPS bool
// TLSConfig specifies the transport layer security config
TLSConfig *ExtenderTLSConfig
// HTTPTimeout specifies the timeout duration for a call to the extender. Filter timeout fails the scheduling of the pod. Prioritize
// timeout is ignored, k8s/other extenders priorities are used to select the node.
HTTPTimeout time.Duration
// NodeCacheCapable specifies that the extender is capable of caching node information,
// so the scheduler should only send minimal information about the eligible nodes
// assuming that the extender already cached full details of all nodes in the cluster
NodeCacheCapable bool
// ManagedResources is a list of extended resources that are managed by
// this extender.
// - A pod will be sent to the extender on the Filter, Prioritize and Bind
// (if the extender is the binder) phases iff the pod requests at least
// one of the extended resources in this list. If empty or unspecified,
// all pods will be sent to this extender.
// - If IgnoredByScheduler is set to true for a resource, kube-scheduler
// will skip checking the resource in predicates.
// +optional
ManagedResources []ExtenderManagedResource
// Ignorable specifies if the extender is ignorable, i.e. scheduling should not
// fail when the extender returns an error or is not reachable.
Ignorable bool
}
// ExtenderPreemptionResult represents the result returned by preemption phase of extender.
type ExtenderPreemptionResult struct {
NodeNameToMetaVictims map[string]*MetaVictims
}
// ExtenderPreemptionArgs represents the arguments needed by the extender to preempt pods on nodes.
type ExtenderPreemptionArgs struct {
// Pod being scheduled
Pod *v1.Pod
// Victims map generated by scheduler preemption phase
// Only set NodeNameToMetaVictims if ExtenderConfig.NodeCacheCapable == true. Otherwise, only set NodeNameToVictims.
NodeNameToVictims map[string]*Victims
NodeNameToMetaVictims map[string]*MetaVictims
}
// Victims represents:
// pods: a group of pods expected to be preempted.
// numPDBViolations: the count of violations of PodDisruptionBudget
type Victims struct {
Pods []*v1.Pod
NumPDBViolations int
}
// MetaPod represent identifier for a v1.Pod
type MetaPod struct {
UID string
}
// MetaVictims represents:
// pods: a group of pods expected to be preempted.
// Only Pod identifiers will be sent and user are expect to get v1.Pod in their own way.
// numPDBViolations: the count of violations of PodDisruptionBudget
type MetaVictims struct {
Pods []*MetaPod
NumPDBViolations int
}
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
// nodes for a pod.
type ExtenderArgs struct {
// Pod being scheduled
Pod *v1.Pod
// List of candidate nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
// List of candidate node names where the pod can be scheduled; to be
// populated only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string
}
// FailedNodesMap represents the filtered out nodes, with node names and failure messages
type FailedNodesMap map[string]string
// ExtenderFilterResult represents the results of a filter call to an extender
type ExtenderFilterResult struct {
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string
// Filtered out nodes where the pod can't be scheduled and the failure messages
FailedNodes FailedNodesMap
// Error message indicating failure
Error string
}
// ExtenderBindingArgs represents the arguments to an extender for binding a pod to a node.
type ExtenderBindingArgs struct {
// PodName is the name of the pod being bound
PodName string
// PodNamespace is the namespace of the pod being bound
PodNamespace string
// PodUID is the UID of the pod being bound
PodUID types.UID
// Node selected by the scheduler
Node string
}
// ExtenderBindingResult represents the result of binding of a pod to a node from an extender.
type ExtenderBindingResult struct {
// Error message indicating failure
Error string
}
// HostPriority represents the priority of scheduling to a particular host, higher priority is better.
type HostPriority struct {
// Name of the host
Host string
// Score associated with the host
Score int
}
// HostPriorityList declares a []HostPriority type.
type HostPriorityList []HostPriority
func (h HostPriorityList) Len() int {
return len(h)
}
func (h HostPriorityList) Less(i, j int) bool {
if h[i].Score == h[j].Score {
return h[i].Host < h[j].Host
}
return h[i].Score < h[j].Score
}
func (h HostPriorityList) Swap(i, j int) {
h[i], h[j] = h[j], h[i]
}

View File

@ -1,72 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
api "k8s.io/kubernetes/pkg/apis/core"
)
const (
// TaintNodeNotReady will be added when node is not ready
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes ready.
TaintNodeNotReady = "node.kubernetes.io/not-ready"
// TaintNodeUnreachable will be added when node becomes unreachable
// (corresponding to NodeReady status ConditionUnknown)
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes reachable (NodeReady status ConditionTrue).
TaintNodeUnreachable = "node.kubernetes.io/unreachable"
// TaintNodeUnschedulable will be added when node becomes unschedulable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node becomes scheduable.
TaintNodeUnschedulable = "node.kubernetes.io/unschedulable"
// TaintNodeMemoryPressure will be added when node has memory pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough memory.
TaintNodeMemoryPressure = "node.kubernetes.io/memory-pressure"
// TaintNodeDiskPressure will be added when node has disk pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodeDiskPressure = "node.kubernetes.io/disk-pressure"
// TaintNodeNetworkUnavailable will be added when node's network is unavailable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when network becomes ready.
TaintNodeNetworkUnavailable = "node.kubernetes.io/network-unavailable"
// TaintNodePIDPressure will be added when node has pid pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodePIDPressure = "node.kubernetes.io/pid-pressure"
// TaintExternalCloudProvider sets this taint on a node to mark it as unusable,
// when kubelet is started with the "external" cloud provider, until a controller
// from the cloud-controller-manager intitializes this node, and then removes
// the taint
TaintExternalCloudProvider = "node.cloudprovider.kubernetes.io/uninitialized"
// TaintNodeShutdown when node is shutdown in external cloud provider
TaintNodeShutdown = "node.cloudprovider.kubernetes.io/shutdown"
// NodeFieldSelectorKeyNodeName ('metadata.name') uses this as node field selector key
// when selecting node by node's name.
NodeFieldSelectorKeyNodeName = api.ObjectNameField
)

View File

@ -1,669 +0,0 @@
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package api
import (
v1 "k8s.io/api/core/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderArgs) DeepCopyInto(out *ExtenderArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
*out = new(v1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderArgs.
func (in *ExtenderArgs) DeepCopy() *ExtenderArgs {
if in == nil {
return nil
}
out := new(ExtenderArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingArgs) DeepCopyInto(out *ExtenderBindingArgs) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingArgs.
func (in *ExtenderBindingArgs) DeepCopy() *ExtenderBindingArgs {
if in == nil {
return nil
}
out := new(ExtenderBindingArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingResult) DeepCopyInto(out *ExtenderBindingResult) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingResult.
func (in *ExtenderBindingResult) DeepCopy() *ExtenderBindingResult {
if in == nil {
return nil
}
out := new(ExtenderBindingResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderConfig) DeepCopyInto(out *ExtenderConfig) {
*out = *in
if in.TLSConfig != nil {
in, out := &in.TLSConfig, &out.TLSConfig
*out = new(ExtenderTLSConfig)
(*in).DeepCopyInto(*out)
}
if in.ManagedResources != nil {
in, out := &in.ManagedResources, &out.ManagedResources
*out = make([]ExtenderManagedResource, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderConfig.
func (in *ExtenderConfig) DeepCopy() *ExtenderConfig {
if in == nil {
return nil
}
out := new(ExtenderConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderFilterResult) DeepCopyInto(out *ExtenderFilterResult) {
*out = *in
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
*out = new(v1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
if in.FailedNodes != nil {
in, out := &in.FailedNodes, &out.FailedNodes
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderFilterResult.
func (in *ExtenderFilterResult) DeepCopy() *ExtenderFilterResult {
if in == nil {
return nil
}
out := new(ExtenderFilterResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderManagedResource) DeepCopyInto(out *ExtenderManagedResource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderManagedResource.
func (in *ExtenderManagedResource) DeepCopy() *ExtenderManagedResource {
if in == nil {
return nil
}
out := new(ExtenderManagedResource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionArgs) DeepCopyInto(out *ExtenderPreemptionArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
if in.NodeNameToVictims != nil {
in, out := &in.NodeNameToVictims, &out.NodeNameToVictims
*out = make(map[string]*Victims, len(*in))
for key, val := range *in {
var outVal *Victims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(Victims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionArgs.
func (in *ExtenderPreemptionArgs) DeepCopy() *ExtenderPreemptionArgs {
if in == nil {
return nil
}
out := new(ExtenderPreemptionArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionResult) DeepCopyInto(out *ExtenderPreemptionResult) {
*out = *in
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionResult.
func (in *ExtenderPreemptionResult) DeepCopy() *ExtenderPreemptionResult {
if in == nil {
return nil
}
out := new(ExtenderPreemptionResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderTLSConfig) DeepCopyInto(out *ExtenderTLSConfig) {
*out = *in
if in.CertData != nil {
in, out := &in.CertData, &out.CertData
*out = make([]byte, len(*in))
copy(*out, *in)
}
if in.KeyData != nil {
in, out := &in.KeyData, &out.KeyData
*out = make([]byte, len(*in))
copy(*out, *in)
}
if in.CAData != nil {
in, out := &in.CAData, &out.CAData
*out = make([]byte, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderTLSConfig.
func (in *ExtenderTLSConfig) DeepCopy() *ExtenderTLSConfig {
if in == nil {
return nil
}
out := new(ExtenderTLSConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in FailedNodesMap) DeepCopyInto(out *FailedNodesMap) {
{
in := &in
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedNodesMap.
func (in FailedNodesMap) DeepCopy() FailedNodesMap {
if in == nil {
return nil
}
out := new(FailedNodesMap)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HostPriority) DeepCopyInto(out *HostPriority) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriority.
func (in *HostPriority) DeepCopy() *HostPriority {
if in == nil {
return nil
}
out := new(HostPriority)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in HostPriorityList) DeepCopyInto(out *HostPriorityList) {
{
in := &in
*out = make(HostPriorityList, len(*in))
copy(*out, *in)
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriorityList.
func (in HostPriorityList) DeepCopy() HostPriorityList {
if in == nil {
return nil
}
out := new(HostPriorityList)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelPreference) DeepCopyInto(out *LabelPreference) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelPreference.
func (in *LabelPreference) DeepCopy() *LabelPreference {
if in == nil {
return nil
}
out := new(LabelPreference)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelsPresence) DeepCopyInto(out *LabelsPresence) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelsPresence.
func (in *LabelsPresence) DeepCopy() *LabelsPresence {
if in == nil {
return nil
}
out := new(LabelsPresence)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaPod) DeepCopyInto(out *MetaPod) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaPod.
func (in *MetaPod) DeepCopy() *MetaPod {
if in == nil {
return nil
}
out := new(MetaPod)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaVictims) DeepCopyInto(out *MetaVictims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*MetaPod, len(*in))
for i := range *in {
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(MetaPod)
**out = **in
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaVictims.
func (in *MetaVictims) DeepCopy() *MetaVictims {
if in == nil {
return nil
}
out := new(MetaVictims)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Policy) DeepCopyInto(out *Policy) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Predicates != nil {
in, out := &in.Predicates, &out.Predicates
*out = make([]PredicatePolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Priorities != nil {
in, out := &in.Priorities, &out.Priorities
*out = make([]PriorityPolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.ExtenderConfigs != nil {
in, out := &in.ExtenderConfigs, &out.ExtenderConfigs
*out = make([]ExtenderConfig, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Policy.
func (in *Policy) DeepCopy() *Policy {
if in == nil {
return nil
}
out := new(Policy)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *Policy) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicateArgument) DeepCopyInto(out *PredicateArgument) {
*out = *in
if in.ServiceAffinity != nil {
in, out := &in.ServiceAffinity, &out.ServiceAffinity
*out = new(ServiceAffinity)
(*in).DeepCopyInto(*out)
}
if in.LabelsPresence != nil {
in, out := &in.LabelsPresence, &out.LabelsPresence
*out = new(LabelsPresence)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicateArgument.
func (in *PredicateArgument) DeepCopy() *PredicateArgument {
if in == nil {
return nil
}
out := new(PredicateArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicatePolicy) DeepCopyInto(out *PredicatePolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
*out = new(PredicateArgument)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicatePolicy.
func (in *PredicatePolicy) DeepCopy() *PredicatePolicy {
if in == nil {
return nil
}
out := new(PredicatePolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityArgument) DeepCopyInto(out *PriorityArgument) {
*out = *in
if in.ServiceAntiAffinity != nil {
in, out := &in.ServiceAntiAffinity, &out.ServiceAntiAffinity
*out = new(ServiceAntiAffinity)
**out = **in
}
if in.LabelPreference != nil {
in, out := &in.LabelPreference, &out.LabelPreference
*out = new(LabelPreference)
**out = **in
}
if in.RequestedToCapacityRatioArguments != nil {
in, out := &in.RequestedToCapacityRatioArguments, &out.RequestedToCapacityRatioArguments
*out = new(RequestedToCapacityRatioArguments)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityArgument.
func (in *PriorityArgument) DeepCopy() *PriorityArgument {
if in == nil {
return nil
}
out := new(PriorityArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityPolicy) DeepCopyInto(out *PriorityPolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
*out = new(PriorityArgument)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityPolicy.
func (in *PriorityPolicy) DeepCopy() *PriorityPolicy {
if in == nil {
return nil
}
out := new(PriorityPolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RequestedToCapacityRatioArguments) DeepCopyInto(out *RequestedToCapacityRatioArguments) {
*out = *in
if in.UtilizationShape != nil {
in, out := &in.UtilizationShape, &out.UtilizationShape
*out = make([]UtilizationShapePoint, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RequestedToCapacityRatioArguments.
func (in *RequestedToCapacityRatioArguments) DeepCopy() *RequestedToCapacityRatioArguments {
if in == nil {
return nil
}
out := new(RequestedToCapacityRatioArguments)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAffinity) DeepCopyInto(out *ServiceAffinity) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAffinity.
func (in *ServiceAffinity) DeepCopy() *ServiceAffinity {
if in == nil {
return nil
}
out := new(ServiceAffinity)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAntiAffinity) DeepCopyInto(out *ServiceAntiAffinity) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAntiAffinity.
func (in *ServiceAntiAffinity) DeepCopy() *ServiceAntiAffinity {
if in == nil {
return nil
}
out := new(ServiceAntiAffinity)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *UtilizationShapePoint) DeepCopyInto(out *UtilizationShapePoint) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UtilizationShapePoint.
func (in *UtilizationShapePoint) DeepCopy() *UtilizationShapePoint {
if in == nil {
return nil
}
out := new(UtilizationShapePoint)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Victims) DeepCopyInto(out *Victims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*v1.Pod, len(*in))
for i := range *in {
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Victims.
func (in *Victims) DeepCopy() *Victims {
if in == nil {
return nil
}
out := new(Victims)
in.DeepCopyInto(out)
return out
}

View File

@ -1,5 +1,5 @@
/*
Copyright 2016 The Kubernetes Authors.
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -16,5 +16,5 @@ limitations under the License.
// +k8s:deepcopy-gen=package
// Package api contains scheduler API objects.
package api // import "k8s.io/kubernetes/pkg/scheduler/api"
// Package v1 contains scheduler API objects.
package v1 // import "k8s.io/kubernetes/pkg/scheduler/apis/extender/v1"

View File

@ -0,0 +1,126 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
)
const (
// MinExtenderPriority defines the min priority value for extender.
MinExtenderPriority int64 = 0
// MaxExtenderPriority defines the max priority value for extender.
MaxExtenderPriority int64 = 10
)
// ExtenderPreemptionResult represents the result returned by preemption phase of extender.
type ExtenderPreemptionResult struct {
NodeNameToMetaVictims map[string]*MetaVictims
}
// ExtenderPreemptionArgs represents the arguments needed by the extender to preempt pods on nodes.
type ExtenderPreemptionArgs struct {
// Pod being scheduled
Pod *v1.Pod
// Victims map generated by scheduler preemption phase
// Only set NodeNameToMetaVictims if ExtenderConfig.NodeCacheCapable == true. Otherwise, only set NodeNameToVictims.
NodeNameToVictims map[string]*Victims
NodeNameToMetaVictims map[string]*MetaVictims
}
// Victims represents:
// pods: a group of pods expected to be preempted.
// numPDBViolations: the count of violations of PodDisruptionBudget
type Victims struct {
Pods []*v1.Pod
NumPDBViolations int64
}
// MetaPod represent identifier for a v1.Pod
type MetaPod struct {
UID string
}
// MetaVictims represents:
// pods: a group of pods expected to be preempted.
// Only Pod identifiers will be sent and user are expect to get v1.Pod in their own way.
// numPDBViolations: the count of violations of PodDisruptionBudget
type MetaVictims struct {
Pods []*MetaPod
NumPDBViolations int64
}
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
// nodes for a pod.
type ExtenderArgs struct {
// Pod being scheduled
Pod *v1.Pod
// List of candidate nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
// List of candidate node names where the pod can be scheduled; to be
// populated only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string
}
// FailedNodesMap represents the filtered out nodes, with node names and failure messages
type FailedNodesMap map[string]string
// ExtenderFilterResult represents the results of a filter call to an extender
type ExtenderFilterResult struct {
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string
// Filtered out nodes where the pod can't be scheduled and the failure messages
FailedNodes FailedNodesMap
// Error message indicating failure
Error string
}
// ExtenderBindingArgs represents the arguments to an extender for binding a pod to a node.
type ExtenderBindingArgs struct {
// PodName is the name of the pod being bound
PodName string
// PodNamespace is the namespace of the pod being bound
PodNamespace string
// PodUID is the UID of the pod being bound
PodUID types.UID
// Node selected by the scheduler
Node string
}
// ExtenderBindingResult represents the result of binding of a pod to a node from an extender.
type ExtenderBindingResult struct {
// Error message indicating failure
Error string
}
// HostPriority represents the priority of scheduling to a particular host, higher priority is better.
type HostPriority struct {
// Name of the host
Host string
// Score associated with the host
Score int64
}
// HostPriorityList declares a []HostPriority type.
type HostPriorityList []HostPriority

View File

@ -0,0 +1,339 @@
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package v1
import (
corev1 "k8s.io/api/core/v1"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderArgs) DeepCopyInto(out *ExtenderArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
*out = new(corev1.Pod)
(*in).DeepCopyInto(*out)
}
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
*out = new(corev1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderArgs.
func (in *ExtenderArgs) DeepCopy() *ExtenderArgs {
if in == nil {
return nil
}
out := new(ExtenderArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingArgs) DeepCopyInto(out *ExtenderBindingArgs) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingArgs.
func (in *ExtenderBindingArgs) DeepCopy() *ExtenderBindingArgs {
if in == nil {
return nil
}
out := new(ExtenderBindingArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingResult) DeepCopyInto(out *ExtenderBindingResult) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingResult.
func (in *ExtenderBindingResult) DeepCopy() *ExtenderBindingResult {
if in == nil {
return nil
}
out := new(ExtenderBindingResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderFilterResult) DeepCopyInto(out *ExtenderFilterResult) {
*out = *in
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
*out = new(corev1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
if in.FailedNodes != nil {
in, out := &in.FailedNodes, &out.FailedNodes
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderFilterResult.
func (in *ExtenderFilterResult) DeepCopy() *ExtenderFilterResult {
if in == nil {
return nil
}
out := new(ExtenderFilterResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionArgs) DeepCopyInto(out *ExtenderPreemptionArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
*out = new(corev1.Pod)
(*in).DeepCopyInto(*out)
}
if in.NodeNameToVictims != nil {
in, out := &in.NodeNameToVictims, &out.NodeNameToVictims
*out = make(map[string]*Victims, len(*in))
for key, val := range *in {
var outVal *Victims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(Victims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionArgs.
func (in *ExtenderPreemptionArgs) DeepCopy() *ExtenderPreemptionArgs {
if in == nil {
return nil
}
out := new(ExtenderPreemptionArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionResult) DeepCopyInto(out *ExtenderPreemptionResult) {
*out = *in
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionResult.
func (in *ExtenderPreemptionResult) DeepCopy() *ExtenderPreemptionResult {
if in == nil {
return nil
}
out := new(ExtenderPreemptionResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in FailedNodesMap) DeepCopyInto(out *FailedNodesMap) {
{
in := &in
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedNodesMap.
func (in FailedNodesMap) DeepCopy() FailedNodesMap {
if in == nil {
return nil
}
out := new(FailedNodesMap)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HostPriority) DeepCopyInto(out *HostPriority) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriority.
func (in *HostPriority) DeepCopy() *HostPriority {
if in == nil {
return nil
}
out := new(HostPriority)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in HostPriorityList) DeepCopyInto(out *HostPriorityList) {
{
in := &in
*out = make(HostPriorityList, len(*in))
copy(*out, *in)
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriorityList.
func (in HostPriorityList) DeepCopy() HostPriorityList {
if in == nil {
return nil
}
out := new(HostPriorityList)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaPod) DeepCopyInto(out *MetaPod) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaPod.
func (in *MetaPod) DeepCopy() *MetaPod {
if in == nil {
return nil
}
out := new(MetaPod)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaVictims) DeepCopyInto(out *MetaVictims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*MetaPod, len(*in))
for i := range *in {
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(MetaPod)
**out = **in
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaVictims.
func (in *MetaVictims) DeepCopy() *MetaVictims {
if in == nil {
return nil
}
out := new(MetaVictims)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Victims) DeepCopyInto(out *Victims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*corev1.Pod, len(*in))
for i := range *in {
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(corev1.Pod)
(*in).DeepCopyInto(*out)
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Victims.
func (in *Victims) DeepCopy() *Victims {
if in == nil {
return nil
}
out := new(Victims)
in.DeepCopyInto(out)
return out
}

View File

@ -0,0 +1,51 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package listers
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// PodFilter is a function to filter a pod. If pod passed return true else return false.
type PodFilter func(*v1.Pod) bool
// PodLister interface represents anything that can list pods for a scheduler.
type PodLister interface {
// Returns the list of pods.
List(labels.Selector) ([]*v1.Pod, error)
// This is similar to "List()", but the returned slice does not
// contain pods that don't pass `podFilter`.
FilteredList(podFilter PodFilter, selector labels.Selector) ([]*v1.Pod, error)
}
// NodeInfoLister interface represents anything that can list/get NodeInfo objects from node name.
type NodeInfoLister interface {
// Returns the list of NodeInfos.
List() ([]*schedulernodeinfo.NodeInfo, error)
// Returns the list of NodeInfos of nodes with pods with affinity terms.
HavePodsWithAffinityList() ([]*schedulernodeinfo.NodeInfo, error)
// Returns the NodeInfo of the given node name.
Get(nodeName string) (*schedulernodeinfo.NodeInfo, error)
}
// SharedLister groups scheduler-specific listers.
type SharedLister interface {
Pods() PodLister
NodeInfos() NodeInfoLister
}

View File

@ -1,72 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
)
// MetricRecorder represents a metric recorder which takes action when the
// metric Inc(), Dec() and Clear()
type MetricRecorder interface {
Inc()
Dec()
Clear()
}
var _ MetricRecorder = &PendingPodsRecorder{}
// PendingPodsRecorder is an implementation of MetricRecorder
type PendingPodsRecorder struct {
recorder prometheus.Gauge
}
// NewActivePodsRecorder returns ActivePods in a Prometheus metric fashion
func NewActivePodsRecorder() *PendingPodsRecorder {
return &PendingPodsRecorder{
recorder: ActivePods,
}
}
// NewUnschedulablePodsRecorder returns UnschedulablePods in a Prometheus metric fashion
func NewUnschedulablePodsRecorder() *PendingPodsRecorder {
return &PendingPodsRecorder{
recorder: UnschedulablePods,
}
}
// NewBackoffPodsRecorder returns BackoffPods in a Prometheus metric fashion
func NewBackoffPodsRecorder() *PendingPodsRecorder {
return &PendingPodsRecorder{
recorder: BackoffPods,
}
}
// Inc increases a metric counter by 1, in an atomic way
func (r *PendingPodsRecorder) Inc() {
r.recorder.Inc()
}
// Dec decreases a metric counter by 1, in an atomic way
func (r *PendingPodsRecorder) Dec() {
r.recorder.Dec()
}
// Clear set a metric counter to 0, in an atomic way
func (r *PendingPodsRecorder) Clear() {
r.recorder.Set(float64(0))
}

View File

@ -1,255 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling"
)
const (
// SchedulerSubsystem - subsystem name used by scheduler
SchedulerSubsystem = "scheduler"
// SchedulingLatencyName - scheduler latency metric name
SchedulingLatencyName = "scheduling_duration_seconds"
// DeprecatedSchedulingLatencyName - scheduler latency metric name which is deprecated
DeprecatedSchedulingLatencyName = "scheduling_latency_seconds"
// OperationLabel - operation label name
OperationLabel = "operation"
// Below are possible values for the operation label. Each represents a substep of e2e scheduling:
// PredicateEvaluation - predicate evaluation operation label value
PredicateEvaluation = "predicate_evaluation"
// PriorityEvaluation - priority evaluation operation label value
PriorityEvaluation = "priority_evaluation"
// PreemptionEvaluation - preemption evaluation operation label value (occurs in case of scheduling fitError).
PreemptionEvaluation = "preemption_evaluation"
// Binding - binding operation label value
Binding = "binding"
// E2eScheduling - e2e scheduling operation label value
)
// All the histogram based metrics have 1ms as size for the smallest bucket.
var (
scheduleAttempts = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "schedule_attempts_total",
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
}, []string{"result"})
// PodScheduleSuccesses counts how many pods were scheduled.
PodScheduleSuccesses = scheduleAttempts.With(prometheus.Labels{"result": "scheduled"})
// PodScheduleFailures counts how many pods could not be scheduled.
PodScheduleFailures = scheduleAttempts.With(prometheus.Labels{"result": "unschedulable"})
// PodScheduleErrors counts how many pods could not be scheduled due to a scheduler error.
PodScheduleErrors = scheduleAttempts.With(prometheus.Labels{"result": "error"})
SchedulingLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: SchedulerSubsystem,
Name: SchedulingLatencyName,
Help: "Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour,
},
[]string{OperationLabel},
)
DeprecatedSchedulingLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: SchedulerSubsystem,
Name: DeprecatedSchedulingLatencyName,
Help: "(Deprecated) Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour,
},
[]string{OperationLabel},
)
E2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_duration_seconds",
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedE2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_latency_microseconds",
Help: "(Deprecated) E2e scheduling latency in microseconds (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_duration_seconds",
Help: "Scheduling algorithm latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_latency_microseconds",
Help: "(Deprecated) Scheduling algorithm latency in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation_seconds",
Help: "Scheduling algorithm predicate evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation",
Help: "(Deprecated) Scheduling algorithm predicate evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation_seconds",
Help: "Scheduling algorithm priority evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation",
Help: "(Deprecated) Scheduling algorithm priority evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation_seconds",
Help: "Scheduling algorithm preemption evaluation duration in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation",
Help: "(Deprecated) Scheduling algorithm preemption evaluation duration in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
BindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "binding_duration_seconds",
Help: "Binding latency in seconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
},
)
DeprecatedBindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "binding_latency_microseconds",
Help: "(Deprecated) Binding latency in microseconds",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
PreemptionVictims = prometheus.NewGauge(
prometheus.GaugeOpts{
Subsystem: SchedulerSubsystem,
Name: "pod_preemption_victims",
Help: "Number of selected preemption victims",
})
PreemptionAttempts = prometheus.NewCounter(
prometheus.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "total_preemption_attempts",
Help: "Total preemption attempts in the cluster till now",
})
pendingPods = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: SchedulerSubsystem,
Name: "pending_pods",
Help: "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.",
}, []string{"queue"})
ActivePods = pendingPods.With(prometheus.Labels{"queue": "active"})
BackoffPods = pendingPods.With(prometheus.Labels{"queue": "backoff"})
UnschedulablePods = pendingPods.With(prometheus.Labels{"queue": "unschedulable"})
metricsList = []prometheus.Collector{
scheduleAttempts,
SchedulingLatency,
DeprecatedSchedulingLatency,
E2eSchedulingLatency,
DeprecatedE2eSchedulingLatency,
SchedulingAlgorithmLatency,
DeprecatedSchedulingAlgorithmLatency,
BindingLatency,
DeprecatedBindingLatency,
SchedulingAlgorithmPredicateEvaluationDuration,
DeprecatedSchedulingAlgorithmPredicateEvaluationDuration,
SchedulingAlgorithmPriorityEvaluationDuration,
DeprecatedSchedulingAlgorithmPriorityEvaluationDuration,
SchedulingAlgorithmPremptionEvaluationDuration,
DeprecatedSchedulingAlgorithmPremptionEvaluationDuration,
PreemptionVictims,
PreemptionAttempts,
pendingPods,
}
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
for _, metric := range metricsList {
prometheus.MustRegister(metric)
}
volumescheduling.RegisterVolumeSchedulingMetrics()
})
}
// Reset resets metrics
func Reset() {
SchedulingLatency.Reset()
DeprecatedSchedulingLatency.Reset()
}
// SinceInMicroseconds gets the time since the specified start in microseconds.
func SinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
}
// SinceInSeconds gets the time since the specified start in seconds.
func SinceInSeconds(start time.Time) float64 {
return time.Since(start).Seconds()
}

View File

@ -22,11 +22,12 @@ import (
"sync"
"sync/atomic"
"k8s.io/klog"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
)
@ -52,11 +53,14 @@ type NodeInfo struct {
podsWithAffinity []*v1.Pod
usedPorts HostPortInfo
// Total requested resource of all pods on this node.
// It includes assumed pods which scheduler sends binding to apiserver but
// didn't get it as scheduled yet.
// Total requested resources of all pods on this node. This includes assumed
// pods, which scheduler has sent for binding, but may not be scheduled yet.
requestedResource *Resource
nonzeroRequest *Resource
// Total requested resources of all pods on this node with a minimum value
// applied to each container's CPU and memory requests. This does not reflect
// the actual resource requests for this node, but is used to avoid scheduling
// many zero-request pods onto one node.
nonzeroRequest *Resource
// We store allocatedResources (which is Node.Status.Allocatable.*) explicitly
// as int64, to avoid conversions and accessing map.
allocatableResource *Resource
@ -353,30 +357,6 @@ func (n *NodeInfo) SetTaints(newTaints []v1.Taint) {
n.taints = newTaints
}
// MemoryPressureCondition returns the memory pressure condition status on this node.
func (n *NodeInfo) MemoryPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.memoryPressureCondition
}
// DiskPressureCondition returns the disk pressure condition status on this node.
func (n *NodeInfo) DiskPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.diskPressureCondition
}
// PIDPressureCondition returns the pid pressure condition status on this node.
func (n *NodeInfo) PIDPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.pidPressureCondition
}
// RequestedResource returns aggregated resource request of pods on this node.
func (n *NodeInfo) RequestedResource() Resource {
if n == nil {
@ -586,6 +566,19 @@ func calculateResource(pod *v1.Pod) (res Resource, non0CPU int64, non0Mem int64)
// No non-zero resources for GPUs or opaque resources.
}
// If Overhead is being utilized, add to the total requests for the pod
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodOverhead) {
resPtr.Add(pod.Spec.Overhead)
if _, found := pod.Spec.Overhead[v1.ResourceCPU]; found {
non0CPU += pod.Spec.Overhead.Cpu().MilliValue()
}
if _, found := pod.Spec.Overhead[v1.ResourceMemory]; found {
non0Mem += pod.Spec.Overhead.Memory().Value()
}
}
return
}
@ -665,7 +658,10 @@ func (n *NodeInfo) FilterOutPods(pods []*v1.Pod) []*v1.Pod {
continue
}
// If pod is on the given node, add it to 'filtered' only if it is present in nodeInfo.
podKey, _ := GetPodKey(p)
podKey, err := GetPodKey(p)
if err != nil {
continue
}
for _, np := range n.Pods() {
npodkey, _ := GetPodKey(np)
if npodkey == podKey {

View File

@ -1,78 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeinfo
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
)
// CreateNodeNameToInfoMap obtains a list of pods and pivots that list into a map where the keys are node names
// and the values are the aggregated information for that node.
func CreateNodeNameToInfoMap(pods []*v1.Pod, nodes []*v1.Node) map[string]*NodeInfo {
nodeNameToInfo := make(map[string]*NodeInfo)
for _, pod := range pods {
nodeName := pod.Spec.NodeName
if _, ok := nodeNameToInfo[nodeName]; !ok {
nodeNameToInfo[nodeName] = NewNodeInfo()
}
nodeNameToInfo[nodeName].AddPod(pod)
}
imageExistenceMap := createImageExistenceMap(nodes)
for _, node := range nodes {
if _, ok := nodeNameToInfo[node.Name]; !ok {
nodeNameToInfo[node.Name] = NewNodeInfo()
}
nodeInfo := nodeNameToInfo[node.Name]
nodeInfo.SetNode(node)
nodeInfo.imageStates = getNodeImageStates(node, imageExistenceMap)
}
return nodeNameToInfo
}
// getNodeImageStates returns the given node's image states based on the given imageExistence map.
func getNodeImageStates(node *v1.Node, imageExistenceMap map[string]sets.String) map[string]*ImageStateSummary {
imageStates := make(map[string]*ImageStateSummary)
for _, image := range node.Status.Images {
for _, name := range image.Names {
imageStates[name] = &ImageStateSummary{
Size: image.SizeBytes,
NumNodes: len(imageExistenceMap[name]),
}
}
}
return imageStates
}
// createImageExistenceMap returns a map recording on which nodes the images exist, keyed by the images' names.
func createImageExistenceMap(nodes []*v1.Node) map[string]sets.String {
imageExistenceMap := make(map[string]sets.String)
for _, node := range nodes {
for _, image := range node.Status.Images {
for _, name := range image.Names {
if _, ok := imageExistenceMap[name]; !ok {
imageExistenceMap[name] = sets.NewString(node.Name)
} else {
imageExistenceMap[name].Insert(node.Name)
}
}
}
}
return imageExistenceMap
}

View File

@ -0,0 +1,59 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import "context"
// ErrorChannel supports non-blocking send and receive operation to capture error.
// A maximum of one error is kept in the channel and the rest of the errors sent
// are ignored, unless the existing error is received and the channel becomes empty
// again.
type ErrorChannel struct {
errCh chan error
}
// SendError sends an error without blocking the sender.
func (e *ErrorChannel) SendError(err error) {
select {
case e.errCh <- err:
default:
}
}
// SendErrorWithCancel sends an error without blocking the sender and calls
// cancel function.
func (e *ErrorChannel) SendErrorWithCancel(err error, cancel context.CancelFunc) {
e.SendError(err)
cancel()
}
// ReceiveError receives an error from channel without blocking on the receiver.
func (e *ErrorChannel) ReceiveError() error {
select {
case err := <-e.errCh:
return err
default:
return nil
}
}
// NewErrorChannel returns a new ErrorChannel.
func NewErrorChannel() *ErrorChannel {
return &ErrorChannel{
errCh: make(chan error, 1),
}
}

View File

@ -1,258 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Below is the implementation of the a heap. The logic is pretty much the same
// as cache.heap, however, this heap does not perform synchronization. It leaves
// synchronization to the SchedulingQueue.
package util
import (
"container/heap"
"fmt"
"k8s.io/client-go/tools/cache"
"k8s.io/kubernetes/pkg/scheduler/metrics"
)
// KeyFunc is a function type to get the key from an object.
type KeyFunc func(obj interface{}) (string, error)
type heapItem struct {
obj interface{} // The object which is stored in the heap.
index int // The index of the object's key in the Heap.queue.
}
type itemKeyValue struct {
key string
obj interface{}
}
// heapData is an internal struct that implements the standard heap interface
// and keeps the data stored in the heap.
type heapData struct {
// items is a map from key of the objects to the objects and their index.
// We depend on the property that items in the map are in the queue and vice versa.
items map[string]*heapItem
// queue implements a heap data structure and keeps the order of elements
// according to the heap invariant. The queue keeps the keys of objects stored
// in "items".
queue []string
// keyFunc is used to make the key used for queued item insertion and retrieval, and
// should be deterministic.
keyFunc KeyFunc
// lessFunc is used to compare two objects in the heap.
lessFunc LessFunc
}
var (
_ = heap.Interface(&heapData{}) // heapData is a standard heap
)
// Less compares two objects and returns true if the first one should go
// in front of the second one in the heap.
func (h *heapData) Less(i, j int) bool {
if i > len(h.queue) || j > len(h.queue) {
return false
}
itemi, ok := h.items[h.queue[i]]
if !ok {
return false
}
itemj, ok := h.items[h.queue[j]]
if !ok {
return false
}
return h.lessFunc(itemi.obj, itemj.obj)
}
// Len returns the number of items in the Heap.
func (h *heapData) Len() int { return len(h.queue) }
// Swap implements swapping of two elements in the heap. This is a part of standard
// heap interface and should never be called directly.
func (h *heapData) Swap(i, j int) {
h.queue[i], h.queue[j] = h.queue[j], h.queue[i]
item := h.items[h.queue[i]]
item.index = i
item = h.items[h.queue[j]]
item.index = j
}
// Push is supposed to be called by heap.Push only.
func (h *heapData) Push(kv interface{}) {
keyValue := kv.(*itemKeyValue)
n := len(h.queue)
h.items[keyValue.key] = &heapItem{keyValue.obj, n}
h.queue = append(h.queue, keyValue.key)
}
// Pop is supposed to be called by heap.Pop only.
func (h *heapData) Pop() interface{} {
key := h.queue[len(h.queue)-1]
h.queue = h.queue[0 : len(h.queue)-1]
item, ok := h.items[key]
if !ok {
// This is an error
return nil
}
delete(h.items, key)
return item.obj
}
// Peek is supposed to be called by heap.Peek only.
func (h *heapData) Peek() interface{} {
if len(h.queue) > 0 {
return h.items[h.queue[0]].obj
}
return nil
}
// Heap is a producer/consumer queue that implements a heap data structure.
// It can be used to implement priority queues and similar data structures.
type Heap struct {
// data stores objects and has a queue that keeps their ordering according
// to the heap invariant.
data *heapData
// metricRecorder updates the counter when elements of a heap get added or
// removed, and it does nothing if it's nil
metricRecorder metrics.MetricRecorder
}
// Add inserts an item, and puts it in the queue. The item is updated if it
// already exists.
func (h *Heap) Add(obj interface{}) error {
key, err := h.data.keyFunc(obj)
if err != nil {
return cache.KeyError{Obj: obj, Err: err}
}
if _, exists := h.data.items[key]; exists {
h.data.items[key].obj = obj
heap.Fix(h.data, h.data.items[key].index)
} else {
heap.Push(h.data, &itemKeyValue{key, obj})
if h.metricRecorder != nil {
h.metricRecorder.Inc()
}
}
return nil
}
// AddIfNotPresent inserts an item, and puts it in the queue. If an item with
// the key is present in the map, no changes is made to the item.
func (h *Heap) AddIfNotPresent(obj interface{}) error {
key, err := h.data.keyFunc(obj)
if err != nil {
return cache.KeyError{Obj: obj, Err: err}
}
if _, exists := h.data.items[key]; !exists {
heap.Push(h.data, &itemKeyValue{key, obj})
if h.metricRecorder != nil {
h.metricRecorder.Inc()
}
}
return nil
}
// Update is the same as Add in this implementation. When the item does not
// exist, it is added.
func (h *Heap) Update(obj interface{}) error {
return h.Add(obj)
}
// Delete removes an item.
func (h *Heap) Delete(obj interface{}) error {
key, err := h.data.keyFunc(obj)
if err != nil {
return cache.KeyError{Obj: obj, Err: err}
}
if item, ok := h.data.items[key]; ok {
heap.Remove(h.data, item.index)
if h.metricRecorder != nil {
h.metricRecorder.Dec()
}
return nil
}
return fmt.Errorf("object not found")
}
// Peek returns the head of the heap without removing it.
func (h *Heap) Peek() interface{} {
return h.data.Peek()
}
// Pop returns the head of the heap and removes it.
func (h *Heap) Pop() (interface{}, error) {
obj := heap.Pop(h.data)
if obj != nil {
if h.metricRecorder != nil {
h.metricRecorder.Dec()
}
return obj, nil
}
return nil, fmt.Errorf("object was removed from heap data")
}
// Get returns the requested item, or sets exists=false.
func (h *Heap) Get(obj interface{}) (interface{}, bool, error) {
key, err := h.data.keyFunc(obj)
if err != nil {
return nil, false, cache.KeyError{Obj: obj, Err: err}
}
return h.GetByKey(key)
}
// GetByKey returns the requested item, or sets exists=false.
func (h *Heap) GetByKey(key string) (interface{}, bool, error) {
item, exists := h.data.items[key]
if !exists {
return nil, false, nil
}
return item.obj, true, nil
}
// List returns a list of all the items.
func (h *Heap) List() []interface{} {
list := make([]interface{}, 0, len(h.data.items))
for _, item := range h.data.items {
list = append(list, item.obj)
}
return list
}
// Len returns the number of items in the heap.
func (h *Heap) Len() int {
return len(h.data.queue)
}
// NewHeap returns a Heap which can be used to queue up items to process.
func NewHeap(keyFn KeyFunc, lessFn LessFunc) *Heap {
return NewHeapWithRecorder(keyFn, lessFn, nil)
}
// NewHeapWithRecorder wraps an optional metricRecorder to compose a Heap object.
func NewHeapWithRecorder(keyFn KeyFunc, lessFn LessFunc, metricRecorder metrics.MetricRecorder) *Heap {
return &Heap{
data: &heapData{
items: map[string]*heapItem{},
queue: []string{},
keyFunc: keyFn,
lessFunc: lessFn,
},
metricRecorder: metricRecorder,
}
}

View File

@ -17,16 +17,13 @@ limitations under the License.
package util
import (
"sort"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/apis/scheduling"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/api"
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
extenderv1 "k8s.io/kubernetes/pkg/scheduler/apis/extender/v1"
)
// GetContainerPorts returns the used host ports of Pods: if 'port' was used, a 'port:true' pair
@ -44,11 +41,6 @@ func GetContainerPorts(pods ...*v1.Pod) []*v1.ContainerPort {
return ports
}
// PodPriorityEnabled indicates whether pod priority feature is enabled.
func PodPriorityEnabled() bool {
return feature.DefaultFeatureGate.Enabled(features.PodPriority)
}
// GetPodFullName returns a name that uniquely identifies a pod.
func GetPodFullName(pod *v1.Pod) string {
// Use underscore as the delimiter because it is not allowed in pod name
@ -56,17 +48,6 @@ func GetPodFullName(pod *v1.Pod) string {
return pod.Name + "_" + pod.Namespace
}
// GetPodPriority returns priority of the given pod.
func GetPodPriority(pod *v1.Pod) int32 {
if pod.Spec.Priority != nil {
return *pod.Spec.Priority
}
// When priority of a running pod is nil, it means it was created at a time
// that there was no global default priority class and the priority class
// name of the pod was empty. So, we resolve to the static default priority.
return scheduling.DefaultPriorityWhenNoDefaultClassExists
}
// GetPodStartTime returns start time of the given pod.
func GetPodStartTime(pod *v1.Pod) *metav1.Time {
if pod.Status.StartTime != nil {
@ -80,9 +61,13 @@ func GetPodStartTime(pod *v1.Pod) *metav1.Time {
return &metav1.Time{Time: time.Now()}
}
// lessFunc is a function that receives two items and returns true if the first
// item should be placed before the second one when the list is sorted.
type lessFunc = func(item1, item2 interface{}) bool
// GetEarliestPodStartTime returns the earliest start time of all pods that
// have the highest priority among all victims.
func GetEarliestPodStartTime(victims *api.Victims) *metav1.Time {
func GetEarliestPodStartTime(victims *extenderv1.Victims) *metav1.Time {
if len(victims.Pods) == 0 {
// should not reach here.
klog.Errorf("victims.Pods is empty. Should not reach here.")
@ -90,15 +75,15 @@ func GetEarliestPodStartTime(victims *api.Victims) *metav1.Time {
}
earliestPodStartTime := GetPodStartTime(victims.Pods[0])
highestPriority := GetPodPriority(victims.Pods[0])
maxPriority := podutil.GetPodPriority(victims.Pods[0])
for _, pod := range victims.Pods {
if GetPodPriority(pod) == highestPriority {
if podutil.GetPodPriority(pod) == maxPriority {
if GetPodStartTime(pod).Before(earliestPodStartTime) {
earliestPodStartTime = GetPodStartTime(pod)
}
} else if GetPodPriority(pod) > highestPriority {
highestPriority = GetPodPriority(pod)
} else if podutil.GetPodPriority(pod) > maxPriority {
maxPriority = podutil.GetPodPriority(pod)
earliestPodStartTime = GetPodStartTime(pod)
}
}
@ -106,43 +91,15 @@ func GetEarliestPodStartTime(victims *api.Victims) *metav1.Time {
return earliestPodStartTime
}
// SortableList is a list that implements sort.Interface.
type SortableList struct {
Items []interface{}
CompFunc LessFunc
}
// LessFunc is a function that receives two items and returns true if the first
// item should be placed before the second one when the list is sorted.
type LessFunc func(item1, item2 interface{}) bool
var _ = sort.Interface(&SortableList{})
func (l *SortableList) Len() int { return len(l.Items) }
func (l *SortableList) Less(i, j int) bool {
return l.CompFunc(l.Items[i], l.Items[j])
}
func (l *SortableList) Swap(i, j int) {
l.Items[i], l.Items[j] = l.Items[j], l.Items[i]
}
// Sort sorts the items in the list using the given CompFunc. Item1 is placed
// before Item2 when CompFunc(Item1, Item2) returns true.
func (l *SortableList) Sort() {
sort.Sort(l)
}
// MoreImportantPod return true when priority of the first pod is higher than
// the second one. If two pods' priorities are equal, compare their StartTime.
// It takes arguments of the type "interface{}" to be used with SortableList,
// but expects those arguments to be *v1.Pod.
func MoreImportantPod(pod1, pod2 interface{}) bool {
p1 := GetPodPriority(pod1.(*v1.Pod))
p2 := GetPodPriority(pod2.(*v1.Pod))
func MoreImportantPod(pod1, pod2 *v1.Pod) bool {
p1 := podutil.GetPodPriority(pod1)
p2 := podutil.GetPodPriority(pod2)
if p1 != p2 {
return p1 > p2
}
return GetPodStartTime(pod1.(*v1.Pod)).Before(GetPodStartTime(pod2.(*v1.Pod)))
return GetPodStartTime(pod1).Before(GetPodStartTime(pod2))
}

View File

@ -19,7 +19,7 @@ package volumebinder
import (
"time"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
coreinformers "k8s.io/client-go/informers/core/v1"
storageinformers "k8s.io/client-go/informers/storage/v1"
clientset "k8s.io/client-go/kubernetes"
@ -35,13 +35,14 @@ type VolumeBinder struct {
func NewVolumeBinder(
client clientset.Interface,
nodeInformer coreinformers.NodeInformer,
csiNodeInformer storageinformers.CSINodeInformer,
pvcInformer coreinformers.PersistentVolumeClaimInformer,
pvInformer coreinformers.PersistentVolumeInformer,
storageClassInformer storageinformers.StorageClassInformer,
bindTimeout time.Duration) *VolumeBinder {
return &VolumeBinder{
Binder: volumescheduling.NewVolumeBinder(client, nodeInformer, pvcInformer, pvInformer, storageClassInformer, bindTimeout),
Binder: volumescheduling.NewVolumeBinder(client, nodeInformer, csiNodeInformer, pvcInformer, pvInformer, storageClassInformer, bindTimeout),
}
}