rebase: update kubernetes to v1.20.0

updated kubernetes packages to latest
release.

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
Madhu Rajanna
2020-12-17 17:58:29 +05:30
committed by mergify[bot]
parent 4abe128bd8
commit 83559144b1
1624 changed files with 247222 additions and 160270 deletions

View File

@ -1,50 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"legacy_types.go",
"register.go",
"types.go",
"zz_generated.deepcopy.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/apis/config",
visibility = ["//visibility:public"],
deps = [
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/component-base/config:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/scheduler/apis/config/scheme:all-srcs",
"//pkg/scheduler/apis/config/testing:all-srcs",
"//pkg/scheduler/apis/config/v1:all-srcs",
"//pkg/scheduler/apis/config/v1alpha1:all-srcs",
"//pkg/scheduler/apis/config/v1alpha2:all-srcs",
"//pkg/scheduler/apis/config/validation:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = ["types_test.go"],
embed = [":go_default_library"],
deps = ["//vendor/github.com/google/go-cmp/cmp:go_default_library"],
)

View File

@ -1,13 +0,0 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- api-approvers
- sig-scheduling-maintainers
- sttts
- luxas
reviewers:
- sig-scheduling
- api-reviewers
- dixudx
- luxas
- sttts

View File

@ -1,20 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// +k8s:deepcopy-gen=package
// +groupName=kubescheduler.config.k8s.io
package config // import "k8s.io/kubernetes/pkg/scheduler/apis/config"

View File

@ -1,234 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// Policy describes a struct of a policy resource in api.
type Policy struct {
metav1.TypeMeta
// Holds the information to configure the fit predicate functions.
// If unspecified, the default predicate functions will be applied.
// If empty list, all predicates (except the mandatory ones) will be
// bypassed.
Predicates []PredicatePolicy
// Holds the information to configure the priority functions.
// If unspecified, the default priority functions will be applied.
// If empty list, all priority functions will be bypassed.
Priorities []PriorityPolicy
// Holds the information to communicate with the extender(s)
Extenders []Extender
// RequiredDuringScheduling affinity is not symmetric, but there is an implicit PreferredDuringScheduling affinity rule
// corresponding to every RequiredDuringScheduling affinity rule.
// HardPodAffinitySymmetricWeight represents the weight of implicit PreferredDuringScheduling affinity rule, in the range 1-100.
HardPodAffinitySymmetricWeight int32
// When AlwaysCheckAllPredicates is set to true, scheduler checks all
// the configured predicates even after one or more of them fails.
// When the flag is set to false, scheduler skips checking the rest
// of the predicates after it finds one predicate that failed.
AlwaysCheckAllPredicates bool
}
// PredicatePolicy describes a struct of a predicate policy.
type PredicatePolicy struct {
// Identifier of the predicate policy
// For a custom predicate, the name can be user-defined
// For the Kubernetes provided predicates, the name is the identifier of the pre-defined predicate
Name string
// Holds the parameters to configure the given predicate
Argument *PredicateArgument
}
// PriorityPolicy describes a struct of a priority policy.
type PriorityPolicy struct {
// Identifier of the priority policy
// For a custom priority, the name can be user-defined
// For the Kubernetes provided priority functions, the name is the identifier of the pre-defined priority function
Name string
// The numeric multiplier for the node scores that the priority function generates
// The weight should be a positive integer
Weight int64
// Holds the parameters to configure the given priority function
Argument *PriorityArgument
}
// PredicateArgument represents the arguments to configure predicate functions in scheduler policy configuration.
// Only one of its members may be specified
type PredicateArgument struct {
// The predicate that provides affinity for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAffinity *ServiceAffinity
// The predicate that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelsPresence *LabelsPresence
}
// PriorityArgument represents the arguments to configure priority functions in scheduler policy configuration.
// Only one of its members may be specified
type PriorityArgument struct {
// The priority function that ensures a good spread (anti-affinity) for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAntiAffinity *ServiceAntiAffinity
// The priority function that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelPreference *LabelPreference
// The RequestedToCapacityRatio priority function is parametrized with function shape.
RequestedToCapacityRatioArguments *RequestedToCapacityRatioArguments
}
// ServiceAffinity holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type ServiceAffinity struct {
// The list of labels that identify node "groups"
// All of the labels should match for the node to be considered a fit for hosting the pod
Labels []string
}
// LabelsPresence holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type LabelsPresence struct {
// The list of labels that identify node "groups"
// All of the labels should be either present (or absent) for the node to be considered a fit for hosting the pod
Labels []string
// The boolean flag that indicates whether the labels should be present or absent from the node
Presence bool
}
// ServiceAntiAffinity holds the parameters that are used to configure the corresponding priority function
type ServiceAntiAffinity struct {
// Used to identify node "groups"
Label string
}
// LabelPreference holds the parameters that are used to configure the corresponding priority function
type LabelPreference struct {
// Used to identify node "groups"
Label string
// This is a boolean flag
// If true, higher priority is given to nodes that have the label
// If false, higher priority is given to nodes that do not have the label
Presence bool
}
// RequestedToCapacityRatioArguments holds arguments specific to RequestedToCapacityRatio priority function.
type RequestedToCapacityRatioArguments struct {
// Array of point defining priority function shape.
Shape []UtilizationShapePoint `json:"shape"`
Resources []ResourceSpec `json:"resources,omitempty"`
}
// UtilizationShapePoint represents single point of priority function shape
type UtilizationShapePoint struct {
// Utilization (x axis). Valid values are 0 to 100. Fully utilized node maps to 100.
Utilization int32
// Score assigned to given utilization (y axis). Valid values are 0 to 10.
Score int32
}
// ResourceSpec represents single resource for bin packing of priority RequestedToCapacityRatioArguments.
type ResourceSpec struct {
// Name of the resource to be managed by RequestedToCapacityRatio function.
Name string
// Weight of the resource.
Weight int64
}
// ExtenderManagedResource describes the arguments of extended resources
// managed by an extender.
type ExtenderManagedResource struct {
// Name is the extended resource name.
Name string
// IgnoredByScheduler indicates whether kube-scheduler should ignore this
// resource when applying predicates.
IgnoredByScheduler bool
}
// ExtenderTLSConfig contains settings to enable TLS with extender
type ExtenderTLSConfig struct {
// Server should be accessed without verifying the TLS certificate. For testing only.
Insecure bool
// ServerName is passed to the server for SNI and is used in the client to check server
// certificates against. If ServerName is empty, the hostname used to contact the
// server is used.
ServerName string
// Server requires TLS client certificate authentication
CertFile string
// Server requires TLS client certificate authentication
KeyFile string
// Trusted root certificates for server
CAFile string
// CertData holds PEM-encoded bytes (typically read from a client certificate file).
// CertData takes precedence over CertFile
CertData []byte
// KeyData holds PEM-encoded bytes (typically read from a client certificate key file).
// KeyData takes precedence over KeyFile
KeyData []byte
// CAData holds PEM-encoded bytes (typically read from a root certificates bundle).
// CAData takes precedence over CAFile
CAData []byte
}
// Extender holds the parameters used to communicate with the extender. If a verb is unspecified/empty,
// it is assumed that the extender chose not to provide that extension.
type Extender struct {
// URLPrefix at which the extender is available
URLPrefix string
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
FilterVerb string
// Verb for the preempt call, empty if not supported. This verb is appended to the URLPrefix when issuing the preempt call to extender.
PreemptVerb string
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
PrioritizeVerb string
// The numeric multiplier for the node scores that the prioritize call generates.
// The weight should be a positive integer
Weight int64
// Verb for the bind call, empty if not supported. This verb is appended to the URLPrefix when issuing the bind call to extender.
// If this method is implemented by the extender, it is the extender's responsibility to bind the pod to apiserver. Only one extender
// can implement this function.
BindVerb string
// EnableHTTPS specifies whether https should be used to communicate with the extender
EnableHTTPS bool
// TLSConfig specifies the transport layer security config
TLSConfig *ExtenderTLSConfig
// HTTPTimeout specifies the timeout duration for a call to the extender. Filter timeout fails the scheduling of the pod. Prioritize
// timeout is ignored, k8s/other extenders priorities are used to select the node.
HTTPTimeout time.Duration
// NodeCacheCapable specifies that the extender is capable of caching node information,
// so the scheduler should only send minimal information about the eligible nodes
// assuming that the extender already cached full details of all nodes in the cluster
NodeCacheCapable bool
// ManagedResources is a list of extended resources that are managed by
// this extender.
// - A pod will be sent to the extender on the Filter, Prioritize and Bind
// (if the extender is the binder) phases iff the pod requests at least
// one of the extended resources in this list. If empty or unspecified,
// all pods will be sent to this extender.
// - If IgnoredByScheduler is set to true for a resource, kube-scheduler
// will skip checking the resource in predicates.
// +optional
ManagedResources []ExtenderManagedResource
// Ignorable specifies if the extender is ignorable, i.e. scheduling should not
// fail when the extender returns an error or is not reachable.
Ignorable bool
}

View File

@ -1,45 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
)
// GroupName is the group name used in this package
const GroupName = "kubescheduler.config.k8s.io"
// SchemeGroupVersion is group version used to register these objects
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
var (
// SchemeBuilder is the scheme builder with scheme init functions to run for this API package
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
// AddToScheme is a global function that registers this API group & version to a scheme
AddToScheme = SchemeBuilder.AddToScheme
)
// addKnownTypes registers known types to the given scheme
func addKnownTypes(scheme *runtime.Scheme) error {
scheme.AddKnownTypes(SchemeGroupVersion,
&KubeSchedulerConfiguration{},
&Policy{},
)
scheme.AddKnownTypes(schema.GroupVersion{Group: "", Version: runtime.APIVersionInternal}, &Policy{})
return nil
}

View File

@ -1,339 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"math"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
componentbaseconfig "k8s.io/component-base/config"
)
const (
// SchedulerDefaultLockObjectNamespace defines default scheduler lock object namespace ("kube-system")
SchedulerDefaultLockObjectNamespace string = metav1.NamespaceSystem
// SchedulerDefaultLockObjectName defines default scheduler lock object name ("kube-scheduler")
SchedulerDefaultLockObjectName = "kube-scheduler"
// SchedulerPolicyConfigMapKey defines the key of the element in the
// scheduler's policy ConfigMap that contains scheduler's policy config.
SchedulerPolicyConfigMapKey = "policy.cfg"
// SchedulerDefaultProviderName defines the default provider names
SchedulerDefaultProviderName = "DefaultProvider"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// KubeSchedulerConfiguration configures a scheduler
type KubeSchedulerConfiguration struct {
metav1.TypeMeta
// AlgorithmSource specifies the scheduler algorithm source.
// TODO(#87526): Remove AlgorithmSource from this package
// DEPRECATED: AlgorithmSource is removed in the v1alpha2 ComponentConfig
AlgorithmSource SchedulerAlgorithmSource
// LeaderElection defines the configuration of leader election client.
LeaderElection KubeSchedulerLeaderElectionConfiguration
// ClientConnection specifies the kubeconfig file and client connection
// settings for the proxy server to use when communicating with the apiserver.
ClientConnection componentbaseconfig.ClientConnectionConfiguration
// HealthzBindAddress is the IP address and port for the health check server to serve on,
// defaulting to 0.0.0.0:10251
HealthzBindAddress string
// MetricsBindAddress is the IP address and port for the metrics server to
// serve on, defaulting to 0.0.0.0:10251.
MetricsBindAddress string
// DebuggingConfiguration holds configuration for Debugging related features
// TODO: We might wanna make this a substruct like Debugging componentbaseconfig.DebuggingConfiguration
componentbaseconfig.DebuggingConfiguration
// DisablePreemption disables the pod preemption feature.
DisablePreemption bool
// PercentageOfNodeToScore is the percentage of all nodes that once found feasible
// for running a pod, the scheduler stops its search for more feasible nodes in
// the cluster. This helps improve scheduler's performance. Scheduler always tries to find
// at least "minFeasibleNodesToFind" feasible nodes no matter what the value of this flag is.
// Example: if the cluster size is 500 nodes and the value of this flag is 30,
// then scheduler stops finding further feasible nodes once it finds 150 feasible ones.
// When the value is 0, default percentage (5%--50% based on the size of the cluster) of the
// nodes will be scored.
PercentageOfNodesToScore int32
// Duration to wait for a binding operation to complete before timing out
// Value must be non-negative integer. The value zero indicates no waiting.
// If this value is nil, the default value will be used.
BindTimeoutSeconds int64
// PodInitialBackoffSeconds is the initial backoff for unschedulable pods.
// If specified, it must be greater than 0. If this value is null, the default value (1s)
// will be used.
PodInitialBackoffSeconds int64
// PodMaxBackoffSeconds is the max backoff for unschedulable pods.
// If specified, it must be greater than or equal to podInitialBackoffSeconds. If this value is null,
// the default value (10s) will be used.
PodMaxBackoffSeconds int64
// Profiles are scheduling profiles that kube-scheduler supports. Pods can
// choose to be scheduled under a particular profile by setting its associated
// scheduler name. Pods that don't specify any scheduler name are scheduled
// with the "default-scheduler" profile, if present here.
Profiles []KubeSchedulerProfile
// Extenders are the list of scheduler extenders, each holding the values of how to communicate
// with the extender. These extenders are shared by all scheduler profiles.
Extenders []Extender
}
// KubeSchedulerProfile is a scheduling profile.
type KubeSchedulerProfile struct {
// SchedulerName is the name of the scheduler associated to this profile.
// If SchedulerName matches with the pod's "spec.schedulerName", then the pod
// is scheduled with this profile.
SchedulerName string
// Plugins specify the set of plugins that should be enabled or disabled.
// Enabled plugins are the ones that should be enabled in addition to the
// default plugins. Disabled plugins are any of the default plugins that
// should be disabled.
// When no enabled or disabled plugin is specified for an extension point,
// default plugins for that extension point will be used if there is any.
// If a QueueSort plugin is specified, the same QueueSort Plugin and
// PluginConfig must be specified for all profiles.
Plugins *Plugins
// PluginConfig is an optional set of custom plugin arguments for each plugin.
// Omitting config args for a plugin is equivalent to using the default config
// for that plugin.
PluginConfig []PluginConfig
}
// SchedulerAlgorithmSource is the source of a scheduler algorithm. One source
// field must be specified, and source fields are mutually exclusive.
type SchedulerAlgorithmSource struct {
// Policy is a policy based algorithm source.
Policy *SchedulerPolicySource
// Provider is the name of a scheduling algorithm provider to use.
Provider *string
}
// SchedulerPolicySource configures a means to obtain a scheduler Policy. One
// source field must be specified, and source fields are mutually exclusive.
type SchedulerPolicySource struct {
// File is a file policy source.
File *SchedulerPolicyFileSource
// ConfigMap is a config map policy source.
ConfigMap *SchedulerPolicyConfigMapSource
}
// SchedulerPolicyFileSource is a policy serialized to disk and accessed via
// path.
type SchedulerPolicyFileSource struct {
// Path is the location of a serialized policy.
Path string
}
// SchedulerPolicyConfigMapSource is a policy serialized into a config map value
// under the SchedulerPolicyConfigMapKey key.
type SchedulerPolicyConfigMapSource struct {
// Namespace is the namespace of the policy config map.
Namespace string
// Name is the name of the policy config map.
Name string
}
// KubeSchedulerLeaderElectionConfiguration expands LeaderElectionConfiguration
// to include scheduler specific configuration.
type KubeSchedulerLeaderElectionConfiguration struct {
componentbaseconfig.LeaderElectionConfiguration
}
// Plugins include multiple extension points. When specified, the list of plugins for
// a particular extension point are the only ones enabled. If an extension point is
// omitted from the config, then the default set of plugins is used for that extension point.
// Enabled plugins are called in the order specified here, after default plugins. If they need to
// be invoked before default plugins, default plugins must be disabled and re-enabled here in desired order.
type Plugins struct {
// QueueSort is a list of plugins that should be invoked when sorting pods in the scheduling queue.
QueueSort *PluginSet
// PreFilter is a list of plugins that should be invoked at "PreFilter" extension point of the scheduling framework.
PreFilter *PluginSet
// Filter is a list of plugins that should be invoked when filtering out nodes that cannot run the Pod.
Filter *PluginSet
// PreScore is a list of plugins that are invoked before scoring.
PreScore *PluginSet
// Score is a list of plugins that should be invoked when ranking nodes that have passed the filtering phase.
Score *PluginSet
// Reserve is a list of plugins invoked when reserving a node to run the pod.
Reserve *PluginSet
// Permit is a list of plugins that control binding of a Pod. These plugins can prevent or delay binding of a Pod.
Permit *PluginSet
// PreBind is a list of plugins that should be invoked before a pod is bound.
PreBind *PluginSet
// Bind is a list of plugins that should be invoked at "Bind" extension point of the scheduling framework.
// The scheduler call these plugins in order. Scheduler skips the rest of these plugins as soon as one returns success.
Bind *PluginSet
// PostBind is a list of plugins that should be invoked after a pod is successfully bound.
PostBind *PluginSet
// Unreserve is a list of plugins invoked when a pod that was previously reserved is rejected in a later phase.
Unreserve *PluginSet
}
// PluginSet specifies enabled and disabled plugins for an extension point.
// If an array is empty, missing, or nil, default plugins at that extension point will be used.
type PluginSet struct {
// Enabled specifies plugins that should be enabled in addition to default plugins.
// These are called after default plugins and in the same order specified here.
Enabled []Plugin
// Disabled specifies default plugins that should be disabled.
// When all default plugins need to be disabled, an array containing only one "*" should be provided.
Disabled []Plugin
}
// Plugin specifies a plugin name and its weight when applicable. Weight is used only for Score plugins.
type Plugin struct {
// Name defines the name of plugin
Name string
// Weight defines the weight of plugin, only used for Score plugins.
Weight int32
}
// PluginConfig specifies arguments that should be passed to a plugin at the time of initialization.
// A plugin that is invoked at multiple extension points is initialized once. Args can have arbitrary structure.
// It is up to the plugin to process these Args.
type PluginConfig struct {
// Name defines the name of plugin being configured
Name string
// Args defines the arguments passed to the plugins at the time of initialization. Args can have arbitrary structure.
Args runtime.Unknown
}
/*
* NOTE: The following variables and methods are intentionally left out of the staging mirror.
*/
const (
// DefaultPercentageOfNodesToScore defines the percentage of nodes of all nodes
// that once found feasible, the scheduler stops looking for more nodes.
// A value of 0 means adaptive, meaning the scheduler figures out a proper default.
DefaultPercentageOfNodesToScore = 0
// MaxCustomPriorityScore is the max score UtilizationShapePoint expects.
MaxCustomPriorityScore int64 = 10
// MaxTotalScore is the maximum total score.
MaxTotalScore int64 = math.MaxInt64
// MaxWeight defines the max weight value allowed for custom PriorityPolicy
MaxWeight = MaxTotalScore / MaxCustomPriorityScore
)
func appendPluginSet(dst *PluginSet, src *PluginSet) *PluginSet {
if dst == nil {
dst = &PluginSet{}
}
if src != nil {
dst.Enabled = append(dst.Enabled, src.Enabled...)
dst.Disabled = append(dst.Disabled, src.Disabled...)
}
return dst
}
// Append appends src Plugins to current Plugins. If a PluginSet is nil, it will
// be created.
func (p *Plugins) Append(src *Plugins) {
if p == nil || src == nil {
return
}
p.QueueSort = appendPluginSet(p.QueueSort, src.QueueSort)
p.PreFilter = appendPluginSet(p.PreFilter, src.PreFilter)
p.Filter = appendPluginSet(p.Filter, src.Filter)
p.PreScore = appendPluginSet(p.PreScore, src.PreScore)
p.Score = appendPluginSet(p.Score, src.Score)
p.Reserve = appendPluginSet(p.Reserve, src.Reserve)
p.Permit = appendPluginSet(p.Permit, src.Permit)
p.PreBind = appendPluginSet(p.PreBind, src.PreBind)
p.Bind = appendPluginSet(p.Bind, src.Bind)
p.PostBind = appendPluginSet(p.PostBind, src.PostBind)
p.Unreserve = appendPluginSet(p.Unreserve, src.Unreserve)
}
// Apply merges the plugin configuration from custom plugins, handling disabled sets.
func (p *Plugins) Apply(customPlugins *Plugins) {
if customPlugins == nil {
return
}
p.QueueSort = mergePluginSets(p.QueueSort, customPlugins.QueueSort)
p.PreFilter = mergePluginSets(p.PreFilter, customPlugins.PreFilter)
p.Filter = mergePluginSets(p.Filter, customPlugins.Filter)
p.PreScore = mergePluginSets(p.PreScore, customPlugins.PreScore)
p.Score = mergePluginSets(p.Score, customPlugins.Score)
p.Reserve = mergePluginSets(p.Reserve, customPlugins.Reserve)
p.Permit = mergePluginSets(p.Permit, customPlugins.Permit)
p.PreBind = mergePluginSets(p.PreBind, customPlugins.PreBind)
p.Bind = mergePluginSets(p.Bind, customPlugins.Bind)
p.PostBind = mergePluginSets(p.PostBind, customPlugins.PostBind)
p.Unreserve = mergePluginSets(p.Unreserve, customPlugins.Unreserve)
}
func mergePluginSets(defaultPluginSet, customPluginSet *PluginSet) *PluginSet {
if customPluginSet == nil {
customPluginSet = &PluginSet{}
}
if defaultPluginSet == nil {
defaultPluginSet = &PluginSet{}
}
disabledPlugins := sets.NewString()
for _, disabledPlugin := range customPluginSet.Disabled {
disabledPlugins.Insert(disabledPlugin.Name)
}
enabledPlugins := []Plugin{}
if !disabledPlugins.Has("*") {
for _, defaultEnabledPlugin := range defaultPluginSet.Enabled {
if disabledPlugins.Has(defaultEnabledPlugin.Name) {
continue
}
enabledPlugins = append(enabledPlugins, defaultEnabledPlugin)
}
}
enabledPlugins = append(enabledPlugins, customPluginSet.Enabled...)
return &PluginSet{Enabled: enabledPlugins}
}

View File

@ -1,677 +0,0 @@
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package config
import (
runtime "k8s.io/apimachinery/pkg/runtime"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Extender) DeepCopyInto(out *Extender) {
*out = *in
if in.TLSConfig != nil {
in, out := &in.TLSConfig, &out.TLSConfig
*out = new(ExtenderTLSConfig)
(*in).DeepCopyInto(*out)
}
if in.ManagedResources != nil {
in, out := &in.ManagedResources, &out.ManagedResources
*out = make([]ExtenderManagedResource, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extender.
func (in *Extender) DeepCopy() *Extender {
if in == nil {
return nil
}
out := new(Extender)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderManagedResource) DeepCopyInto(out *ExtenderManagedResource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderManagedResource.
func (in *ExtenderManagedResource) DeepCopy() *ExtenderManagedResource {
if in == nil {
return nil
}
out := new(ExtenderManagedResource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderTLSConfig) DeepCopyInto(out *ExtenderTLSConfig) {
*out = *in
if in.CertData != nil {
in, out := &in.CertData, &out.CertData
*out = make([]byte, len(*in))
copy(*out, *in)
}
if in.KeyData != nil {
in, out := &in.KeyData, &out.KeyData
*out = make([]byte, len(*in))
copy(*out, *in)
}
if in.CAData != nil {
in, out := &in.CAData, &out.CAData
*out = make([]byte, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderTLSConfig.
func (in *ExtenderTLSConfig) DeepCopy() *ExtenderTLSConfig {
if in == nil {
return nil
}
out := new(ExtenderTLSConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *KubeSchedulerConfiguration) DeepCopyInto(out *KubeSchedulerConfiguration) {
*out = *in
out.TypeMeta = in.TypeMeta
in.AlgorithmSource.DeepCopyInto(&out.AlgorithmSource)
out.LeaderElection = in.LeaderElection
out.ClientConnection = in.ClientConnection
out.DebuggingConfiguration = in.DebuggingConfiguration
if in.Profiles != nil {
in, out := &in.Profiles, &out.Profiles
*out = make([]KubeSchedulerProfile, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Extenders != nil {
in, out := &in.Extenders, &out.Extenders
*out = make([]Extender, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerConfiguration.
func (in *KubeSchedulerConfiguration) DeepCopy() *KubeSchedulerConfiguration {
if in == nil {
return nil
}
out := new(KubeSchedulerConfiguration)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *KubeSchedulerConfiguration) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *KubeSchedulerLeaderElectionConfiguration) DeepCopyInto(out *KubeSchedulerLeaderElectionConfiguration) {
*out = *in
out.LeaderElectionConfiguration = in.LeaderElectionConfiguration
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerLeaderElectionConfiguration.
func (in *KubeSchedulerLeaderElectionConfiguration) DeepCopy() *KubeSchedulerLeaderElectionConfiguration {
if in == nil {
return nil
}
out := new(KubeSchedulerLeaderElectionConfiguration)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *KubeSchedulerProfile) DeepCopyInto(out *KubeSchedulerProfile) {
*out = *in
if in.Plugins != nil {
in, out := &in.Plugins, &out.Plugins
*out = new(Plugins)
(*in).DeepCopyInto(*out)
}
if in.PluginConfig != nil {
in, out := &in.PluginConfig, &out.PluginConfig
*out = make([]PluginConfig, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerProfile.
func (in *KubeSchedulerProfile) DeepCopy() *KubeSchedulerProfile {
if in == nil {
return nil
}
out := new(KubeSchedulerProfile)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelPreference) DeepCopyInto(out *LabelPreference) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelPreference.
func (in *LabelPreference) DeepCopy() *LabelPreference {
if in == nil {
return nil
}
out := new(LabelPreference)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelsPresence) DeepCopyInto(out *LabelsPresence) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelsPresence.
func (in *LabelsPresence) DeepCopy() *LabelsPresence {
if in == nil {
return nil
}
out := new(LabelsPresence)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Plugin) DeepCopyInto(out *Plugin) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Plugin.
func (in *Plugin) DeepCopy() *Plugin {
if in == nil {
return nil
}
out := new(Plugin)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PluginConfig) DeepCopyInto(out *PluginConfig) {
*out = *in
in.Args.DeepCopyInto(&out.Args)
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PluginConfig.
func (in *PluginConfig) DeepCopy() *PluginConfig {
if in == nil {
return nil
}
out := new(PluginConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PluginSet) DeepCopyInto(out *PluginSet) {
*out = *in
if in.Enabled != nil {
in, out := &in.Enabled, &out.Enabled
*out = make([]Plugin, len(*in))
copy(*out, *in)
}
if in.Disabled != nil {
in, out := &in.Disabled, &out.Disabled
*out = make([]Plugin, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PluginSet.
func (in *PluginSet) DeepCopy() *PluginSet {
if in == nil {
return nil
}
out := new(PluginSet)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Plugins) DeepCopyInto(out *Plugins) {
*out = *in
if in.QueueSort != nil {
in, out := &in.QueueSort, &out.QueueSort
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.PreFilter != nil {
in, out := &in.PreFilter, &out.PreFilter
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Filter != nil {
in, out := &in.Filter, &out.Filter
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.PreScore != nil {
in, out := &in.PreScore, &out.PreScore
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Score != nil {
in, out := &in.Score, &out.Score
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Reserve != nil {
in, out := &in.Reserve, &out.Reserve
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Permit != nil {
in, out := &in.Permit, &out.Permit
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.PreBind != nil {
in, out := &in.PreBind, &out.PreBind
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Bind != nil {
in, out := &in.Bind, &out.Bind
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.PostBind != nil {
in, out := &in.PostBind, &out.PostBind
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Unreserve != nil {
in, out := &in.Unreserve, &out.Unreserve
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Plugins.
func (in *Plugins) DeepCopy() *Plugins {
if in == nil {
return nil
}
out := new(Plugins)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Policy) DeepCopyInto(out *Policy) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Predicates != nil {
in, out := &in.Predicates, &out.Predicates
*out = make([]PredicatePolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Priorities != nil {
in, out := &in.Priorities, &out.Priorities
*out = make([]PriorityPolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Extenders != nil {
in, out := &in.Extenders, &out.Extenders
*out = make([]Extender, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Policy.
func (in *Policy) DeepCopy() *Policy {
if in == nil {
return nil
}
out := new(Policy)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *Policy) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicateArgument) DeepCopyInto(out *PredicateArgument) {
*out = *in
if in.ServiceAffinity != nil {
in, out := &in.ServiceAffinity, &out.ServiceAffinity
*out = new(ServiceAffinity)
(*in).DeepCopyInto(*out)
}
if in.LabelsPresence != nil {
in, out := &in.LabelsPresence, &out.LabelsPresence
*out = new(LabelsPresence)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicateArgument.
func (in *PredicateArgument) DeepCopy() *PredicateArgument {
if in == nil {
return nil
}
out := new(PredicateArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicatePolicy) DeepCopyInto(out *PredicatePolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
*out = new(PredicateArgument)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicatePolicy.
func (in *PredicatePolicy) DeepCopy() *PredicatePolicy {
if in == nil {
return nil
}
out := new(PredicatePolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityArgument) DeepCopyInto(out *PriorityArgument) {
*out = *in
if in.ServiceAntiAffinity != nil {
in, out := &in.ServiceAntiAffinity, &out.ServiceAntiAffinity
*out = new(ServiceAntiAffinity)
**out = **in
}
if in.LabelPreference != nil {
in, out := &in.LabelPreference, &out.LabelPreference
*out = new(LabelPreference)
**out = **in
}
if in.RequestedToCapacityRatioArguments != nil {
in, out := &in.RequestedToCapacityRatioArguments, &out.RequestedToCapacityRatioArguments
*out = new(RequestedToCapacityRatioArguments)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityArgument.
func (in *PriorityArgument) DeepCopy() *PriorityArgument {
if in == nil {
return nil
}
out := new(PriorityArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityPolicy) DeepCopyInto(out *PriorityPolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
*out = new(PriorityArgument)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityPolicy.
func (in *PriorityPolicy) DeepCopy() *PriorityPolicy {
if in == nil {
return nil
}
out := new(PriorityPolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RequestedToCapacityRatioArguments) DeepCopyInto(out *RequestedToCapacityRatioArguments) {
*out = *in
if in.Shape != nil {
in, out := &in.Shape, &out.Shape
*out = make([]UtilizationShapePoint, len(*in))
copy(*out, *in)
}
if in.Resources != nil {
in, out := &in.Resources, &out.Resources
*out = make([]ResourceSpec, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RequestedToCapacityRatioArguments.
func (in *RequestedToCapacityRatioArguments) DeepCopy() *RequestedToCapacityRatioArguments {
if in == nil {
return nil
}
out := new(RequestedToCapacityRatioArguments)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ResourceSpec) DeepCopyInto(out *ResourceSpec) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceSpec.
func (in *ResourceSpec) DeepCopy() *ResourceSpec {
if in == nil {
return nil
}
out := new(ResourceSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerAlgorithmSource) DeepCopyInto(out *SchedulerAlgorithmSource) {
*out = *in
if in.Policy != nil {
in, out := &in.Policy, &out.Policy
*out = new(SchedulerPolicySource)
(*in).DeepCopyInto(*out)
}
if in.Provider != nil {
in, out := &in.Provider, &out.Provider
*out = new(string)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerAlgorithmSource.
func (in *SchedulerAlgorithmSource) DeepCopy() *SchedulerAlgorithmSource {
if in == nil {
return nil
}
out := new(SchedulerAlgorithmSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerPolicyConfigMapSource) DeepCopyInto(out *SchedulerPolicyConfigMapSource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerPolicyConfigMapSource.
func (in *SchedulerPolicyConfigMapSource) DeepCopy() *SchedulerPolicyConfigMapSource {
if in == nil {
return nil
}
out := new(SchedulerPolicyConfigMapSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerPolicyFileSource) DeepCopyInto(out *SchedulerPolicyFileSource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerPolicyFileSource.
func (in *SchedulerPolicyFileSource) DeepCopy() *SchedulerPolicyFileSource {
if in == nil {
return nil
}
out := new(SchedulerPolicyFileSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerPolicySource) DeepCopyInto(out *SchedulerPolicySource) {
*out = *in
if in.File != nil {
in, out := &in.File, &out.File
*out = new(SchedulerPolicyFileSource)
**out = **in
}
if in.ConfigMap != nil {
in, out := &in.ConfigMap, &out.ConfigMap
*out = new(SchedulerPolicyConfigMapSource)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerPolicySource.
func (in *SchedulerPolicySource) DeepCopy() *SchedulerPolicySource {
if in == nil {
return nil
}
out := new(SchedulerPolicySource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAffinity) DeepCopyInto(out *ServiceAffinity) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAffinity.
func (in *ServiceAffinity) DeepCopy() *ServiceAffinity {
if in == nil {
return nil
}
out := new(ServiceAffinity)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAntiAffinity) DeepCopyInto(out *ServiceAntiAffinity) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAntiAffinity.
func (in *ServiceAntiAffinity) DeepCopy() *ServiceAntiAffinity {
if in == nil {
return nil
}
out := new(ServiceAntiAffinity)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *UtilizationShapePoint) DeepCopyInto(out *UtilizationShapePoint) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UtilizationShapePoint.
func (in *UtilizationShapePoint) DeepCopy() *UtilizationShapePoint {
if in == nil {
return nil
}
out := new(UtilizationShapePoint)
in.DeepCopyInto(out)
return out
}

View File

@ -1,54 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"node_affinity.go",
"normalize_score.go",
"spread.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/client-go/listers/apps/v1:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"node_affinity_test.go",
"normalize_score_test.go",
"spread_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -1,78 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package helper
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
)
// PodMatchesNodeSelectorAndAffinityTerms checks whether the pod is schedulable onto nodes according to
// the requirements in both NodeAffinity and nodeSelector.
func PodMatchesNodeSelectorAndAffinityTerms(pod *v1.Pod, node *v1.Node) bool {
// Check if node.Labels match pod.Spec.NodeSelector.
if len(pod.Spec.NodeSelector) > 0 {
selector := labels.SelectorFromSet(pod.Spec.NodeSelector)
if !selector.Matches(labels.Set(node.Labels)) {
return false
}
}
// 1. nil NodeSelector matches all nodes (i.e. does not filter out any nodes)
// 2. nil []NodeSelectorTerm (equivalent to non-nil empty NodeSelector) matches no nodes
// 3. zero-length non-nil []NodeSelectorTerm matches no nodes also, just for simplicity
// 4. nil []NodeSelectorRequirement (equivalent to non-nil empty NodeSelectorTerm) matches no nodes
// 5. zero-length non-nil []NodeSelectorRequirement matches no nodes also, just for simplicity
// 6. non-nil empty NodeSelectorRequirement is not allowed
nodeAffinityMatches := true
affinity := pod.Spec.Affinity
if affinity != nil && affinity.NodeAffinity != nil {
nodeAffinity := affinity.NodeAffinity
// if no required NodeAffinity requirements, will do no-op, means select all nodes.
// TODO: Replace next line with subsequent commented-out line when implement RequiredDuringSchedulingRequiredDuringExecution.
if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
// if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution == nil && nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
return true
}
// Match node selector for requiredDuringSchedulingRequiredDuringExecution.
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
// if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution != nil {
// nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution.NodeSelectorTerms
// klog.V(10).Infof("Match for RequiredDuringSchedulingRequiredDuringExecution node selector terms %+v", nodeSelectorTerms)
// nodeAffinityMatches = nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms)
// }
// Match node selector for requiredDuringSchedulingIgnoredDuringExecution.
if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
nodeAffinityMatches = nodeAffinityMatches && nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms)
}
}
return nodeAffinityMatches
}
// nodeMatchesNodeSelectorTerms checks if a node's labels satisfy a list of node selector terms,
// terms are ORed, and an empty list of terms will match nothing.
func nodeMatchesNodeSelectorTerms(node *v1.Node, nodeSelectorTerms []v1.NodeSelectorTerm) bool {
return v1helper.MatchNodeSelectorTerms(nodeSelectorTerms, node.Labels, fields.Set{
"metadata.name": node.Name,
})
}

View File

@ -1,54 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package helper
import (
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
// DefaultNormalizeScore generates a Normalize Score function that can normalize the
// scores to [0, maxPriority]. If reverse is set to true, it reverses the scores by
// subtracting it from maxPriority.
func DefaultNormalizeScore(maxPriority int64, reverse bool, scores framework.NodeScoreList) *framework.Status {
var maxCount int64
for i := range scores {
if scores[i].Score > maxCount {
maxCount = scores[i].Score
}
}
if maxCount == 0 {
if reverse {
for i := range scores {
scores[i].Score = maxPriority
}
}
return nil
}
for i := range scores {
score := scores[i].Score
score = maxPriority * score / maxCount
if reverse {
score = maxPriority - score
}
scores[i].Score = score
}
return nil
}

View File

@ -1,95 +0,0 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package helper
import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
appslisters "k8s.io/client-go/listers/apps/v1"
corelisters "k8s.io/client-go/listers/core/v1"
)
// DefaultSelector returns a selector deduced from the Services, Replication
// Controllers, Replica Sets, and Stateful Sets matching the given pod.
func DefaultSelector(pod *v1.Pod, sl corelisters.ServiceLister, cl corelisters.ReplicationControllerLister, rsl appslisters.ReplicaSetLister, ssl appslisters.StatefulSetLister) labels.Selector {
labelSet := make(labels.Set)
// Since services, RCs, RSs and SSs match the pod, they won't have conflicting
// labels. Merging is safe.
if services, err := GetPodServices(sl, pod); err == nil {
for _, service := range services {
labelSet = labels.Merge(labelSet, service.Spec.Selector)
}
}
if rcs, err := cl.GetPodControllers(pod); err == nil {
for _, rc := range rcs {
labelSet = labels.Merge(labelSet, rc.Spec.Selector)
}
}
selector := labels.NewSelector()
if len(labelSet) != 0 {
selector = labelSet.AsSelector()
}
if rss, err := rsl.GetPodReplicaSets(pod); err == nil {
for _, rs := range rss {
if other, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
if r, ok := other.Requirements(); ok {
selector = selector.Add(r...)
}
}
}
}
if sss, err := ssl.GetPodStatefulSets(pod); err == nil {
for _, ss := range sss {
if other, err := metav1.LabelSelectorAsSelector(ss.Spec.Selector); err == nil {
if r, ok := other.Requirements(); ok {
selector = selector.Add(r...)
}
}
}
}
return selector
}
// GetPodServices gets the services that have the selector that match the labels on the given pod.
func GetPodServices(sl corelisters.ServiceLister, pod *v1.Pod) ([]*v1.Service, error) {
allServices, err := sl.Services(pod.Namespace).List(labels.Everything())
if err != nil {
return nil, err
}
var services []*v1.Service
for i := range allServices {
service := allServices[i]
if service.Spec.Selector == nil {
// services with nil selectors match nothing, not everything.
continue
}
selector := labels.Set(service.Spec.Selector).AsSelectorPreValidated()
if selector.Matches(labels.Set(pod.Labels)) {
services = append(services, service)
}
}
return services, nil
}

View File

@ -1,45 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["node_affinity.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/scheduler/framework/plugins/helper:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = ["node_affinity_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/internal/cache:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],
)

View File

@ -1,119 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeaffinity
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
pluginhelper "k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
"k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// NodeAffinity is a plugin that checks if a pod node selector matches the node label.
type NodeAffinity struct {
handle framework.FrameworkHandle
}
var _ framework.FilterPlugin = &NodeAffinity{}
var _ framework.ScorePlugin = &NodeAffinity{}
const (
// Name is the name of the plugin used in the plugin registry and configurations.
Name = "NodeAffinity"
// ErrReason for node affinity/selector not matching.
ErrReason = "node(s) didn't match node selector"
)
// Name returns name of the plugin. It is used in logs, etc.
func (pl *NodeAffinity) Name() string {
return Name
}
// Filter invoked at the filter extension point.
func (pl *NodeAffinity) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
node := nodeInfo.Node()
if node == nil {
return framework.NewStatus(framework.Error, "node not found")
}
if !pluginhelper.PodMatchesNodeSelectorAndAffinityTerms(pod, node) {
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason)
}
return nil
}
// Score invoked at the Score extension point.
func (pl *NodeAffinity) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := pl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
node := nodeInfo.Node()
if node == nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
affinity := pod.Spec.Affinity
var count int64
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
// empty PreferredSchedulingTerm matches all objects.
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
// Match PreferredDuringSchedulingIgnoredDuringExecution term by term.
for i := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
preferredSchedulingTerm := &affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution[i]
if preferredSchedulingTerm.Weight == 0 {
continue
}
// TODO: Avoid computing it for all nodes if this becomes a performance problem.
nodeSelector, err := v1helper.NodeSelectorRequirementsAsSelector(preferredSchedulingTerm.Preference.MatchExpressions)
if err != nil {
return 0, framework.NewStatus(framework.Error, err.Error())
}
if nodeSelector.Matches(labels.Set(node.Labels)) {
count += int64(preferredSchedulingTerm.Weight)
}
}
}
return count, nil
}
// NormalizeScore invoked after scoring all nodes.
func (pl *NodeAffinity) NormalizeScore(ctx context.Context, state *framework.CycleState, pod *v1.Pod, scores framework.NodeScoreList) *framework.Status {
return pluginhelper.DefaultNormalizeScore(framework.MaxNodeScore, false, scores)
}
// ScoreExtensions of the Score plugin.
func (pl *NodeAffinity) ScoreExtensions() framework.ScoreExtensions {
return pl
}
// New initializes a new plugin and returns it.
func New(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
return &NodeAffinity{handle: h}, nil
}

View File

@ -1,40 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["node_name.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename",
visibility = ["//visibility:public"],
deps = [
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = ["node_name_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],
)

View File

@ -1,65 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodename
import (
"context"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
"k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// NodeName is a plugin that checks if a pod spec node name matches the current node.
type NodeName struct{}
var _ framework.FilterPlugin = &NodeName{}
const (
// Name is the name of the plugin used in the plugin registry and configurations.
Name = "NodeName"
// ErrReason returned when node name doesn't match.
ErrReason = "node(s) didn't match the requested hostname"
)
// Name returns name of the plugin. It is used in logs, etc.
func (pl *NodeName) Name() string {
return Name
}
// Filter invoked at the filter extension point.
func (pl *NodeName) Filter(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
if nodeInfo.Node() == nil {
return framework.NewStatus(framework.Error, "node not found")
}
if !Fits(pod, nodeInfo) {
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason)
}
return nil
}
// Fits actually checks if the pod fits the node.
func Fits(pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) bool {
return len(pod.Spec.NodeName) == 0 || pod.Spec.NodeName == nodeInfo.Node().Name
}
// New initializes a new plugin and returns it.
func New(_ *runtime.Unknown, _ framework.FrameworkHandle) (framework.Plugin, error) {
return &NodeName{}, nil
}

View File

@ -1,40 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["node_ports.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports",
visibility = ["//visibility:public"],
deps = [
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = ["node_ports_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
],
)

View File

@ -1,134 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeports
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
"k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// NodePorts is a plugin that checks if a node has free ports for the requested pod ports.
type NodePorts struct{}
var _ framework.FilterPlugin = &NodePorts{}
const (
// Name is the name of the plugin used in the plugin registry and configurations.
Name = "NodePorts"
// preFilterStateKey is the key in CycleState to NodePorts pre-computed data.
// Using the name of the plugin will likely help us avoid collisions with other plugins.
preFilterStateKey = "PreFilter" + Name
// ErrReason when node ports aren't available.
ErrReason = "node(s) didn't have free ports for the requested pod ports"
)
type preFilterState []*v1.ContainerPort
// Clone the prefilter state.
func (s preFilterState) Clone() framework.StateData {
// The state is not impacted by adding/removing existing pods, hence we don't need to make a deep copy.
return s
}
// Name returns name of the plugin. It is used in logs, etc.
func (pl *NodePorts) Name() string {
return Name
}
// getContainerPorts returns the used host ports of Pods: if 'port' was used, a 'port:true' pair
// will be in the result; but it does not resolve port conflict.
func getContainerPorts(pods ...*v1.Pod) []*v1.ContainerPort {
ports := []*v1.ContainerPort{}
for _, pod := range pods {
for j := range pod.Spec.Containers {
container := &pod.Spec.Containers[j]
for k := range container.Ports {
ports = append(ports, &container.Ports[k])
}
}
}
return ports
}
// PreFilter invoked at the prefilter extension point.
func (pl *NodePorts) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) *framework.Status {
s := getContainerPorts(pod)
cycleState.Write(preFilterStateKey, preFilterState(s))
return nil
}
// PreFilterExtensions do not exist for this plugin.
func (pl *NodePorts) PreFilterExtensions() framework.PreFilterExtensions {
return nil
}
func getPreFilterState(cycleState *framework.CycleState) (preFilterState, error) {
c, err := cycleState.Read(preFilterStateKey)
if err != nil {
// preFilterState doesn't exist, likely PreFilter wasn't invoked.
return nil, fmt.Errorf("error reading %q from cycleState: %v", preFilterStateKey, err)
}
s, ok := c.(preFilterState)
if !ok {
return nil, fmt.Errorf("%+v convert to nodeports.preFilterState error", c)
}
return s, nil
}
// Filter invoked at the filter extension point.
func (pl *NodePorts) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
wantPorts, err := getPreFilterState(cycleState)
if err != nil {
return framework.NewStatus(framework.Error, err.Error())
}
fits := fitsPorts(wantPorts, nodeInfo)
if !fits {
return framework.NewStatus(framework.Unschedulable, ErrReason)
}
return nil
}
// Fits checks if the pod fits the node.
func Fits(pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) bool {
return fitsPorts(getContainerPorts(pod), nodeInfo)
}
func fitsPorts(wantPorts []*v1.ContainerPort, nodeInfo *nodeinfo.NodeInfo) bool {
// try to see whether existingPorts and wantPorts will conflict or not
existingPorts := nodeInfo.UsedPorts()
for _, cp := range wantPorts {
if existingPorts.CheckConflict(cp.HostIP, string(cp.Protocol), cp.HostPort) {
return false
}
}
return true
}
// New initializes a new plugin and returns it.
func New(_ *runtime.Unknown, _ framework.FrameworkHandle) (framework.Plugin, error) {
return &NodePorts{}, nil
}

View File

@ -1,73 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"balanced_allocation.go",
"fit.go",
"least_allocated.go",
"most_allocated.go",
"requested_to_capacity_ratio.go",
"resource_allocation.go",
"resource_limits.go",
"test_util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/apis/config:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = [
"balanced_allocation_test.go",
"fit_test.go",
"least_allocated_test.go",
"most_allocated_test.go",
"requested_to_capacity_ratio_test.go",
"resource_limits_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/internal/cache:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
],
)

View File

@ -1,120 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
"math"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
// BalancedAllocation is a score plugin that calculates the difference between the cpu and memory fraction
// of capacity, and prioritizes the host based on how close the two metrics are to each other.
type BalancedAllocation struct {
handle framework.FrameworkHandle
resourceAllocationScorer
}
var _ = framework.ScorePlugin(&BalancedAllocation{})
// BalancedAllocationName is the name of the plugin used in the plugin registry and configurations.
const BalancedAllocationName = "NodeResourcesBalancedAllocation"
// Name returns name of the plugin. It is used in logs, etc.
func (ba *BalancedAllocation) Name() string {
return BalancedAllocationName
}
// Score invoked at the score extension point.
func (ba *BalancedAllocation) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := ba.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
// ba.score favors nodes with balanced resource usage rate.
// It should **NOT** be used alone, and **MUST** be used together
// with NodeResourcesLeastAllocated plugin. It calculates the difference between the cpu and memory fraction
// of capacity, and prioritizes the host based on how close the two metrics are to each other.
// Detail: score = 10 - variance(cpuFraction,memoryFraction,volumeFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced
// Resource Utilization"
return ba.score(pod, nodeInfo)
}
// ScoreExtensions of the Score plugin.
func (ba *BalancedAllocation) ScoreExtensions() framework.ScoreExtensions {
return nil
}
// NewBalancedAllocation initializes a new plugin and returns it.
func NewBalancedAllocation(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
return &BalancedAllocation{
handle: h,
resourceAllocationScorer: resourceAllocationScorer{
BalancedAllocationName,
balancedResourceScorer,
defaultRequestedRatioResources,
},
}, nil
}
// todo: use resource weights in the scorer function
func balancedResourceScorer(requested, allocable resourceToValueMap, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
cpuFraction := fractionOfCapacity(requested[v1.ResourceCPU], allocable[v1.ResourceCPU])
memoryFraction := fractionOfCapacity(requested[v1.ResourceMemory], allocable[v1.ResourceMemory])
// This to find a node which has most balanced CPU, memory and volume usage.
if cpuFraction >= 1 || memoryFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
return 0
}
if includeVolumes && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && allocatableVolumes > 0 {
volumeFraction := float64(requestedVolumes) / float64(allocatableVolumes)
if volumeFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
return 0
}
// Compute variance for all the three fractions.
mean := (cpuFraction + memoryFraction + volumeFraction) / float64(3)
variance := float64((((cpuFraction - mean) * (cpuFraction - mean)) + ((memoryFraction - mean) * (memoryFraction - mean)) + ((volumeFraction - mean) * (volumeFraction - mean))) / float64(3))
// Since the variance is between positive fractions, it will be positive fraction. 1-variance lets the
// score to be higher for node which has least variance and multiplying it with 10 provides the scaling
// factor needed.
return int64((1 - variance) * float64(framework.MaxNodeScore))
}
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
// respectively. Multiplying the absolute value of the difference by 10 scales the value to
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)
return int64((1 - diff) * float64(framework.MaxNodeScore))
}
func fractionOfCapacity(requested, capacity int64) float64 {
if capacity == 0 {
return 1
}
return float64(requested) / float64(capacity)
}

View File

@ -1,267 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
var _ framework.PreFilterPlugin = &Fit{}
var _ framework.FilterPlugin = &Fit{}
const (
// FitName is the name of the plugin used in the plugin registry and configurations.
FitName = "NodeResourcesFit"
// preFilterStateKey is the key in CycleState to NodeResourcesFit pre-computed data.
// Using the name of the plugin will likely help us avoid collisions with other plugins.
preFilterStateKey = "PreFilter" + FitName
)
// Fit is a plugin that checks if a node has sufficient resources.
type Fit struct {
ignoredResources sets.String
}
// FitArgs holds the args that are used to configure the plugin.
type FitArgs struct {
// IgnoredResources is the list of resources that NodeResources fit filter
// should ignore.
IgnoredResources []string `json:"ignoredResources,omitempty"`
}
// preFilterState computed at PreFilter and used at Filter.
type preFilterState struct {
schedulernodeinfo.Resource
}
// Clone the prefilter state.
func (s *preFilterState) Clone() framework.StateData {
return s
}
// Name returns name of the plugin. It is used in logs, etc.
func (f *Fit) Name() string {
return FitName
}
// computePodResourceRequest returns a schedulernodeinfo.Resource that covers the largest
// width in each resource dimension. Because init-containers run sequentially, we collect
// the max in each dimension iteratively. In contrast, we sum the resource vectors for
// regular containers since they run simultaneously.
//
// If Pod Overhead is specified and the feature gate is set, the resources defined for Overhead
// are added to the calculated Resource request sum
//
// Example:
//
// Pod:
// InitContainers
// IC1:
// CPU: 2
// Memory: 1G
// IC2:
// CPU: 2
// Memory: 3G
// Containers
// C1:
// CPU: 2
// Memory: 1G
// C2:
// CPU: 1
// Memory: 1G
//
// Result: CPU: 3, Memory: 3G
func computePodResourceRequest(pod *v1.Pod) *preFilterState {
result := &preFilterState{}
for _, container := range pod.Spec.Containers {
result.Add(container.Resources.Requests)
}
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
result.SetMaxResource(container.Resources.Requests)
}
// If Overhead is being utilized, add to the total requests for the pod
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodOverhead) {
result.Add(pod.Spec.Overhead)
}
return result
}
// PreFilter invoked at the prefilter extension point.
func (f *Fit) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) *framework.Status {
cycleState.Write(preFilterStateKey, computePodResourceRequest(pod))
return nil
}
// PreFilterExtensions returns prefilter extensions, pod add and remove.
func (f *Fit) PreFilterExtensions() framework.PreFilterExtensions {
return nil
}
func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error) {
c, err := cycleState.Read(preFilterStateKey)
if err != nil {
// preFilterState doesn't exist, likely PreFilter wasn't invoked.
return nil, fmt.Errorf("error reading %q from cycleState: %v", preFilterStateKey, err)
}
s, ok := c.(*preFilterState)
if !ok {
return nil, fmt.Errorf("%+v convert to NodeResourcesFit.preFilterState error", c)
}
return s, nil
}
// Filter invoked at the filter extension point.
// Checks if a node has sufficient resources, such as cpu, memory, gpu, opaque int resources etc to run a pod.
// It returns a list of insufficient resources, if empty, then the node has all the resources requested by the pod.
func (f *Fit) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *framework.Status {
s, err := getPreFilterState(cycleState)
if err != nil {
return framework.NewStatus(framework.Error, err.Error())
}
insufficientResources := fitsRequest(s, nodeInfo, f.ignoredResources)
if len(insufficientResources) != 0 {
// We will keep all failure reasons.
failureReasons := make([]string, 0, len(insufficientResources))
for _, r := range insufficientResources {
failureReasons = append(failureReasons, r.Reason)
}
return framework.NewStatus(framework.Unschedulable, failureReasons...)
}
return nil
}
// InsufficientResource describes what kind of resource limit is hit and caused the pod to not fit the node.
type InsufficientResource struct {
ResourceName v1.ResourceName
// We explicitly have a parameter for reason to avoid formatting a message on the fly
// for common resources, which is expensive for cluster autoscaler simulations.
Reason string
Requested int64
Used int64
Capacity int64
}
// Fits checks if node have enough resources to host the pod.
func Fits(pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo, ignoredExtendedResources sets.String) []InsufficientResource {
return fitsRequest(computePodResourceRequest(pod), nodeInfo, ignoredExtendedResources)
}
func fitsRequest(podRequest *preFilterState, nodeInfo *schedulernodeinfo.NodeInfo, ignoredExtendedResources sets.String) []InsufficientResource {
insufficientResources := make([]InsufficientResource, 0, 4)
allowedPodNumber := nodeInfo.AllowedPodNumber()
if len(nodeInfo.Pods())+1 > allowedPodNumber {
insufficientResources = append(insufficientResources, InsufficientResource{
v1.ResourcePods,
"Too many pods",
1,
int64(len(nodeInfo.Pods())),
int64(allowedPodNumber),
})
}
if ignoredExtendedResources == nil {
ignoredExtendedResources = sets.NewString()
}
if podRequest.MilliCPU == 0 &&
podRequest.Memory == 0 &&
podRequest.EphemeralStorage == 0 &&
len(podRequest.ScalarResources) == 0 {
return insufficientResources
}
allocatable := nodeInfo.AllocatableResource()
if allocatable.MilliCPU < podRequest.MilliCPU+nodeInfo.RequestedResource().MilliCPU {
insufficientResources = append(insufficientResources, InsufficientResource{
v1.ResourceCPU,
"Insufficient cpu",
podRequest.MilliCPU,
nodeInfo.RequestedResource().MilliCPU,
allocatable.MilliCPU,
})
}
if allocatable.Memory < podRequest.Memory+nodeInfo.RequestedResource().Memory {
insufficientResources = append(insufficientResources, InsufficientResource{
v1.ResourceMemory,
"Insufficient memory",
podRequest.Memory,
nodeInfo.RequestedResource().Memory,
allocatable.Memory,
})
}
if allocatable.EphemeralStorage < podRequest.EphemeralStorage+nodeInfo.RequestedResource().EphemeralStorage {
insufficientResources = append(insufficientResources, InsufficientResource{
v1.ResourceEphemeralStorage,
"Insufficient ephemeral-storage",
podRequest.EphemeralStorage,
nodeInfo.RequestedResource().EphemeralStorage,
allocatable.EphemeralStorage,
})
}
for rName, rQuant := range podRequest.ScalarResources {
if v1helper.IsExtendedResourceName(rName) {
// If this resource is one of the extended resources that should be
// ignored, we will skip checking it.
if ignoredExtendedResources.Has(string(rName)) {
continue
}
}
if allocatable.ScalarResources[rName] < rQuant+nodeInfo.RequestedResource().ScalarResources[rName] {
insufficientResources = append(insufficientResources, InsufficientResource{
rName,
fmt.Sprintf("Insufficient %v", rName),
podRequest.ScalarResources[rName],
nodeInfo.RequestedResource().ScalarResources[rName],
allocatable.ScalarResources[rName],
})
}
}
return insufficientResources
}
// NewFit initializes a new plugin and returns it.
func NewFit(plArgs *runtime.Unknown, _ framework.FrameworkHandle) (framework.Plugin, error) {
args := &FitArgs{}
if err := framework.DecodeInto(plArgs, args); err != nil {
return nil, err
}
fit := &Fit{}
fit.ignoredResources = sets.NewString(args.IgnoredResources...)
return fit, nil
}

View File

@ -1,99 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
// LeastAllocated is a score plugin that favors nodes with fewer allocation requested resources based on requested resources.
type LeastAllocated struct {
handle framework.FrameworkHandle
resourceAllocationScorer
}
var _ = framework.ScorePlugin(&LeastAllocated{})
// LeastAllocatedName is the name of the plugin used in the plugin registry and configurations.
const LeastAllocatedName = "NodeResourcesLeastAllocated"
// Name returns name of the plugin. It is used in logs, etc.
func (la *LeastAllocated) Name() string {
return LeastAllocatedName
}
// Score invoked at the score extension point.
func (la *LeastAllocated) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := la.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
// la.score favors nodes with fewer requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and
// prioritizes based on the minimum of the average of the fraction of requested to capacity.
//
// Details:
// (cpu((capacity-sum(requested))*10/capacity) + memory((capacity-sum(requested))*10/capacity))/2
return la.score(pod, nodeInfo)
}
// ScoreExtensions of the Score plugin.
func (la *LeastAllocated) ScoreExtensions() framework.ScoreExtensions {
return nil
}
// NewLeastAllocated initializes a new plugin and returns it.
func NewLeastAllocated(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
return &LeastAllocated{
handle: h,
resourceAllocationScorer: resourceAllocationScorer{
LeastAllocatedName,
leastResourceScorer,
defaultRequestedRatioResources,
},
}, nil
}
func leastResourceScorer(requested, allocable resourceToValueMap, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
var nodeScore, weightSum int64
for resource, weight := range defaultRequestedRatioResources {
resourceScore := leastRequestedScore(requested[resource], allocable[resource])
nodeScore += resourceScore * weight
weightSum += weight
}
return nodeScore / weightSum
}
// The unused capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more unused resources the higher the score is.
func leastRequestedScore(requested, capacity int64) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
return 0
}
return ((capacity - requested) * int64(framework.MaxNodeScore)) / capacity
}

View File

@ -1,102 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
// MostAllocated is a score plugin that favors nodes with high allocation based on requested resources.
type MostAllocated struct {
handle framework.FrameworkHandle
resourceAllocationScorer
}
var _ = framework.ScorePlugin(&MostAllocated{})
// MostAllocatedName is the name of the plugin used in the plugin registry and configurations.
const MostAllocatedName = "NodeResourcesMostAllocated"
// Name returns name of the plugin. It is used in logs, etc.
func (ma *MostAllocated) Name() string {
return MostAllocatedName
}
// Score invoked at the Score extension point.
func (ma *MostAllocated) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := ma.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil || nodeInfo.Node() == nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v, node is nil: %v", nodeName, err, nodeInfo.Node() == nil))
}
// ma.score favors nodes with most requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
// based on the maximum of the average of the fraction of requested to capacity.
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
return ma.score(pod, nodeInfo)
}
// ScoreExtensions of the Score plugin.
func (ma *MostAllocated) ScoreExtensions() framework.ScoreExtensions {
return nil
}
// NewMostAllocated initializes a new plugin and returns it.
func NewMostAllocated(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
return &MostAllocated{
handle: h,
resourceAllocationScorer: resourceAllocationScorer{
MostAllocatedName,
mostResourceScorer,
defaultRequestedRatioResources,
},
}, nil
}
func mostResourceScorer(requested, allocable resourceToValueMap, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
var nodeScore, weightSum int64
for resource, weight := range defaultRequestedRatioResources {
resourceScore := mostRequestedScore(requested[resource], allocable[resource])
nodeScore += resourceScore * weight
weightSum += weight
}
return (nodeScore / weightSum)
}
// The used capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more resources are used the higher the score is. This function
// is almost a reversed version of least_requested_priority.calculateUnusedScore
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
// keep the final formula clean and not to modify the widely used (by users
// in their default scheduling policies) calculateUsedScore.
func mostRequestedScore(requested, capacity int64) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
return 0
}
return (requested * framework.MaxNodeScore) / capacity
}

View File

@ -1,219 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
"math"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
const (
// RequestedToCapacityRatioName is the name of this plugin.
RequestedToCapacityRatioName = "RequestedToCapacityRatio"
minUtilization = 0
maxUtilization = 100
minScore = 0
maxScore = framework.MaxNodeScore
)
// RequestedToCapacityRatioArgs holds the args that are used to configure the plugin.
type RequestedToCapacityRatioArgs struct {
config.RequestedToCapacityRatioArguments
}
type functionShape []functionShapePoint
type functionShapePoint struct {
// Utilization is function argument.
utilization int64
// Score is function value.
score int64
}
// NewRequestedToCapacityRatio initializes a new plugin and returns it.
func NewRequestedToCapacityRatio(plArgs *runtime.Unknown, handle framework.FrameworkHandle) (framework.Plugin, error) {
args := &config.RequestedToCapacityRatioArguments{}
if err := framework.DecodeInto(plArgs, args); err != nil {
return nil, err
}
shape := make([]functionShapePoint, 0, len(args.Shape))
for _, point := range args.Shape {
shape = append(shape, functionShapePoint{
utilization: int64(point.Utilization),
// MaxCustomPriorityScore may diverge from the max score used in the scheduler and defined by MaxNodeScore,
// therefore we need to scale the score returned by requested to capacity ratio to the score range
// used by the scheduler.
score: int64(point.Score) * (framework.MaxNodeScore / config.MaxCustomPriorityScore),
})
}
if err := validateFunctionShape(shape); err != nil {
return nil, err
}
resourceToWeightMap := make(resourceToWeightMap)
for _, resource := range args.Resources {
resourceToWeightMap[v1.ResourceName(resource.Name)] = resource.Weight
if resource.Weight == 0 {
// Apply the default weight.
resourceToWeightMap[v1.ResourceName(resource.Name)] = 1
}
}
if len(args.Resources) == 0 {
// If no resources specified, used the default set.
resourceToWeightMap = defaultRequestedRatioResources
}
return &RequestedToCapacityRatio{
handle: handle,
resourceAllocationScorer: resourceAllocationScorer{
RequestedToCapacityRatioName,
buildRequestedToCapacityRatioScorerFunction(shape, resourceToWeightMap),
resourceToWeightMap,
},
}, nil
}
// RequestedToCapacityRatio is a score plugin that allow users to apply bin packing
// on core resources like CPU, Memory as well as extended resources like accelerators.
type RequestedToCapacityRatio struct {
handle framework.FrameworkHandle
resourceAllocationScorer
}
var _ framework.ScorePlugin = &RequestedToCapacityRatio{}
// Name returns name of the plugin. It is used in logs, etc.
func (pl *RequestedToCapacityRatio) Name() string {
return RequestedToCapacityRatioName
}
// Score invoked at the score extension point.
func (pl *RequestedToCapacityRatio) Score(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := pl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
return pl.score(pod, nodeInfo)
}
// ScoreExtensions of the Score plugin.
func (pl *RequestedToCapacityRatio) ScoreExtensions() framework.ScoreExtensions {
return nil
}
func validateFunctionShape(shape functionShape) error {
if len(shape) == 0 {
return fmt.Errorf("at least one point must be specified")
}
for i := 1; i < len(shape); i++ {
if shape[i-1].utilization >= shape[i].utilization {
return fmt.Errorf("utilization values must be sorted. Utilization[%d]==%d >= Utilization[%d]==%d", i-1, shape[i-1].utilization, i, shape[i].utilization)
}
}
for i, point := range shape {
if point.utilization < minUtilization {
return fmt.Errorf("utilization values must not be less than %d. Utilization[%d]==%d", minUtilization, i, point.utilization)
}
if point.utilization > maxUtilization {
return fmt.Errorf("utilization values must not be greater than %d. Utilization[%d]==%d", maxUtilization, i, point.utilization)
}
if point.score < minScore {
return fmt.Errorf("score values must not be less than %d. Score[%d]==%d", minScore, i, point.score)
}
if int64(point.score) > maxScore {
return fmt.Errorf("score values not be greater than %d. Score[%d]==%d", maxScore, i, point.score)
}
}
return nil
}
func validateResourceWeightMap(resourceToWeightMap resourceToWeightMap) error {
if len(resourceToWeightMap) == 0 {
return fmt.Errorf("resourceToWeightMap cannot be nil")
}
for resource, weight := range resourceToWeightMap {
if weight < 1 {
return fmt.Errorf("resource %s weight %d must not be less than 1", string(resource), weight)
}
}
return nil
}
func buildRequestedToCapacityRatioScorerFunction(scoringFunctionShape functionShape, resourceToWeightMap resourceToWeightMap) func(resourceToValueMap, resourceToValueMap, bool, int, int) int64 {
rawScoringFunction := buildBrokenLinearFunction(scoringFunctionShape)
err := validateResourceWeightMap(resourceToWeightMap)
if err != nil {
klog.Error(err)
}
resourceScoringFunction := func(requested, capacity int64) int64 {
if capacity == 0 || requested > capacity {
return rawScoringFunction(maxUtilization)
}
return rawScoringFunction(maxUtilization - (capacity-requested)*maxUtilization/capacity)
}
return func(requested, allocable resourceToValueMap, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
var nodeScore, weightSum int64
for resource, weight := range resourceToWeightMap {
resourceScore := resourceScoringFunction(requested[resource], allocable[resource])
if resourceScore > 0 {
nodeScore += resourceScore * weight
weightSum += weight
}
}
if weightSum == 0 {
return 0
}
return int64(math.Round(float64(nodeScore) / float64(weightSum)))
}
}
// Creates a function which is built using linear segments. Segments are defined via shape array.
// Shape[i].utilization slice represents points on "utilization" axis where different segments meet.
// Shape[i].score represents function values at meeting points.
//
// function f(p) is defined as:
// shape[0].score for p < f[0].utilization
// shape[i].score for p == shape[i].utilization
// shape[n-1].score for p > shape[n-1].utilization
// and linear between points (p < shape[i].utilization)
func buildBrokenLinearFunction(shape functionShape) func(int64) int64 {
return func(p int64) int64 {
for i := 0; i < len(shape); i++ {
if p <= int64(shape[i].utilization) {
if i == 0 {
return shape[0].score
}
return shape[i-1].score + (shape[i].score-shape[i-1].score)*(p-shape[i-1].utilization)/(shape[i].utilization-shape[i-1].utilization)
}
}
return shape[len(shape)-1].score
}
}

View File

@ -1,145 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
// resourceToWeightMap contains resource name and weight.
type resourceToWeightMap map[v1.ResourceName]int64
// defaultRequestedRatioResources is used to set default requestToWeight map for CPU and memory
var defaultRequestedRatioResources = resourceToWeightMap{v1.ResourceMemory: 1, v1.ResourceCPU: 1}
// resourceAllocationScorer contains information to calculate resource allocation score.
type resourceAllocationScorer struct {
Name string
scorer func(requested, allocable resourceToValueMap, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64
resourceToWeightMap resourceToWeightMap
}
// resourceToValueMap contains resource name and score.
type resourceToValueMap map[v1.ResourceName]int64
// score will use `scorer` function to calculate the score.
func (r *resourceAllocationScorer) score(
pod *v1.Pod,
nodeInfo *schedulernodeinfo.NodeInfo) (int64, *framework.Status) {
node := nodeInfo.Node()
if node == nil {
return 0, framework.NewStatus(framework.Error, "node not found")
}
if r.resourceToWeightMap == nil {
return 0, framework.NewStatus(framework.Error, "resources not found")
}
requested := make(resourceToValueMap, len(r.resourceToWeightMap))
allocatable := make(resourceToValueMap, len(r.resourceToWeightMap))
for resource := range r.resourceToWeightMap {
allocatable[resource], requested[resource] = calculateResourceAllocatableRequest(nodeInfo, pod, resource)
}
var score int64
// Check if the pod has volumes and this could be added to scorer function for balanced resource allocation.
if len(pod.Spec.Volumes) >= 0 && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && nodeInfo.TransientInfo != nil {
score = r.scorer(requested, allocatable, true, nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount)
} else {
score = r.scorer(requested, allocatable, false, 0, 0)
}
if klog.V(10) {
if len(pod.Spec.Volumes) >= 0 && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && nodeInfo.TransientInfo != nil {
klog.Infof(
"%v -> %v: %v, map of allocatable resources %v, map of requested resources %v , allocatable volumes %d, requested volumes %d, score %d",
pod.Name, node.Name, r.Name,
allocatable, requested, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount,
nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes,
score,
)
} else {
klog.Infof(
"%v -> %v: %v, map of allocatable resources %v, map of requested resources %v ,score %d,",
pod.Name, node.Name, r.Name,
allocatable, requested, score,
)
}
}
return score, nil
}
// calculateResourceAllocatableRequest returns resources Allocatable and Requested values
func calculateResourceAllocatableRequest(nodeInfo *schedulernodeinfo.NodeInfo, pod *v1.Pod, resource v1.ResourceName) (int64, int64) {
allocatable := nodeInfo.AllocatableResource()
requested := nodeInfo.RequestedResource()
podRequest := calculatePodResourceRequest(pod, resource)
switch resource {
case v1.ResourceCPU:
return allocatable.MilliCPU, (nodeInfo.NonZeroRequest().MilliCPU + podRequest)
case v1.ResourceMemory:
return allocatable.Memory, (nodeInfo.NonZeroRequest().Memory + podRequest)
case v1.ResourceEphemeralStorage:
return allocatable.EphemeralStorage, (requested.EphemeralStorage + podRequest)
default:
if v1helper.IsScalarResourceName(resource) {
return allocatable.ScalarResources[resource], (requested.ScalarResources[resource] + podRequest)
}
}
if klog.V(10) {
klog.Infof("requested resource %v not considered for node score calculation",
resource,
)
}
return 0, 0
}
// calculatePodResourceRequest returns the total non-zero requests. If Overhead is defined for the pod and the
// PodOverhead feature is enabled, the Overhead is added to the result.
// podResourceRequest = max(sum(podSpec.Containers), podSpec.InitContainers) + overHead
func calculatePodResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 {
var podRequest int64
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
value := schedutil.GetNonzeroRequestForResource(resource, &container.Resources.Requests)
podRequest += value
}
for i := range pod.Spec.InitContainers {
initContainer := &pod.Spec.InitContainers[i]
value := schedutil.GetNonzeroRequestForResource(resource, &initContainer.Resources.Requests)
if podRequest < value {
podRequest = value
}
}
// If Overhead is being utilized, add to the total requests for the pod
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodOverhead) {
if quantity, found := pod.Spec.Overhead[resource]; found {
podRequest += quantity.Value()
}
}
return podRequest
}

View File

@ -1,161 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// ResourceLimits is a score plugin that increases score of input node by 1 if the node satisfies
// input pod's resource limits
type ResourceLimits struct {
handle framework.FrameworkHandle
}
var _ = framework.PreScorePlugin(&ResourceLimits{})
var _ = framework.ScorePlugin(&ResourceLimits{})
const (
// ResourceLimitsName is the name of the plugin used in the plugin registry and configurations.
ResourceLimitsName = "NodeResourceLimits"
// preScoreStateKey is the key in CycleState to NodeResourceLimits pre-computed data.
// Using the name of the plugin will likely help us avoid collisions with other plugins.
preScoreStateKey = "PreScore" + ResourceLimitsName
)
// preScoreState computed at PreScore and used at Score.
type preScoreState struct {
podResourceRequest *schedulernodeinfo.Resource
}
// Clone the preScore state.
func (s *preScoreState) Clone() framework.StateData {
return s
}
// Name returns name of the plugin. It is used in logs, etc.
func (rl *ResourceLimits) Name() string {
return ResourceLimitsName
}
// PreScore builds and writes cycle state used by Score and NormalizeScore.
func (rl *ResourceLimits) PreScore(
pCtx context.Context,
cycleState *framework.CycleState,
pod *v1.Pod,
nodes []*v1.Node,
) *framework.Status {
if len(nodes) == 0 {
// No nodes to score.
return nil
}
if rl.handle.SnapshotSharedLister() == nil {
return framework.NewStatus(framework.Error, fmt.Sprintf("empty shared lister"))
}
s := &preScoreState{
podResourceRequest: getResourceLimits(pod),
}
cycleState.Write(preScoreStateKey, s)
return nil
}
func getPodResource(cycleState *framework.CycleState) (*schedulernodeinfo.Resource, error) {
c, err := cycleState.Read(preScoreStateKey)
if err != nil {
return nil, fmt.Errorf("Error reading %q from cycleState: %v", preScoreStateKey, err)
}
s, ok := c.(*preScoreState)
if !ok {
return nil, fmt.Errorf("%+v convert to ResourceLimits.preScoreState error", c)
}
return s.podResourceRequest, nil
}
// Score invoked at the Score extension point.
// The "score" returned in this function is the matching number of pods on the `nodeName`.
// Currently works as follows:
// If a node does not publish its allocatable resources (cpu and memory both), the node score is not affected.
// If a pod does not specify its cpu and memory limits both, the node score is not affected.
// If one or both of cpu and memory limits of the pod are satisfied, the node is assigned a score of 1.
// Rationale of choosing the lowest score of 1 is that this is mainly selected to break ties between nodes that have
// same scores assigned by one of least and most requested priority functions.
func (rl *ResourceLimits) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := rl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil || nodeInfo.Node() == nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v, node is nil: %v", nodeName, err, nodeInfo.Node() == nil))
}
allocatableResources := nodeInfo.AllocatableResource()
podLimits, err := getPodResource(state)
if err != nil {
return 0, framework.NewStatus(framework.Error, err.Error())
}
cpuScore := computeScore(podLimits.MilliCPU, allocatableResources.MilliCPU)
memScore := computeScore(podLimits.Memory, allocatableResources.Memory)
score := int64(0)
if cpuScore == 1 || memScore == 1 {
score = 1
}
return score, nil
}
// ScoreExtensions of the Score plugin.
func (rl *ResourceLimits) ScoreExtensions() framework.ScoreExtensions {
return nil
}
// NewResourceLimits initializes a new plugin and returns it.
func NewResourceLimits(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
return &ResourceLimits{handle: h}, nil
}
// getResourceLimits computes resource limits for input pod.
// The reason to create this new function is to be consistent with other
// priority functions because most or perhaps all priority functions work
// with schedulernodeinfo.Resource.
func getResourceLimits(pod *v1.Pod) *schedulernodeinfo.Resource {
result := &schedulernodeinfo.Resource{}
for _, container := range pod.Spec.Containers {
result.Add(container.Resources.Limits)
}
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
result.SetMaxResource(container.Resources.Limits)
}
return result
}
// computeScore returns 1 if limit value is less than or equal to allocatable
// value, otherwise it returns 0.
func computeScore(limit, allocatable int64) int64 {
if limit != 0 && allocatable != 0 && limit <= allocatable {
return 1
}
return 0
}

View File

@ -1,55 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func makeNode(node string, milliCPU, memory int64) *v1.Node {
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: node},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
},
},
}
}
func makeNodeWithExtendedResource(node string, milliCPU, memory int64, extendedResource map[string]int64) *v1.Node {
resourceList := make(map[v1.ResourceName]resource.Quantity)
for res, quantity := range extendedResource {
resourceList[v1.ResourceName(res)] = *resource.NewQuantity(quantity, resource.DecimalSI)
}
resourceList[v1.ResourceCPU] = *resource.NewMilliQuantity(milliCPU, resource.DecimalSI)
resourceList[v1.ResourceMemory] = *resource.NewQuantity(memory, resource.BinarySI)
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: node},
Status: v1.NodeStatus{
Capacity: resourceList,
Allocatable: resourceList,
},
}
}

View File

@ -1,70 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"cycle_state.go",
"framework.go",
"interface.go",
"metrics_recorder.go",
"registry.go",
"waiting_pods_map.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1",
visibility = ["//visibility:public"],
deps = [
"//pkg/controller/volume/scheduling:go_default_library",
"//pkg/scheduler/apis/config:go_default_library",
"//pkg/scheduler/listers:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/json:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//staging/src/k8s.io/component-base/metrics:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/sigs.k8s.io/yaml:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = [
"cycle_state_test.go",
"framework_test.go",
"interface_test.go",
"registry_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/apis/config:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/github.com/prometheus/client_model/go:go_default_library",
],
)

View File

@ -1,130 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"errors"
"sync"
)
const (
// NotFound is the not found error message.
NotFound = "not found"
)
// StateData is a generic type for arbitrary data stored in CycleState.
type StateData interface {
// Clone is an interface to make a copy of StateData. For performance reasons,
// clone should make shallow copies for members (e.g., slices or maps) that are not
// impacted by PreFilter's optional AddPod/RemovePod methods.
Clone() StateData
}
// StateKey is the type of keys stored in CycleState.
type StateKey string
// CycleState provides a mechanism for plugins to store and retrieve arbitrary data.
// StateData stored by one plugin can be read, altered, or deleted by another plugin.
// CycleState does not provide any data protection, as all plugins are assumed to be
// trusted.
type CycleState struct {
mx sync.RWMutex
storage map[StateKey]StateData
// if recordPluginMetrics is true, PluginExecutionDuration will be recorded for this cycle.
recordPluginMetrics bool
}
// NewCycleState initializes a new CycleState and returns its pointer.
func NewCycleState() *CycleState {
return &CycleState{
storage: make(map[StateKey]StateData),
}
}
// ShouldRecordPluginMetrics returns whether PluginExecutionDuration metrics should be recorded.
func (c *CycleState) ShouldRecordPluginMetrics() bool {
if c == nil {
return false
}
return c.recordPluginMetrics
}
// SetRecordPluginMetrics sets recordPluginMetrics to the given value.
func (c *CycleState) SetRecordPluginMetrics(flag bool) {
if c == nil {
return
}
c.recordPluginMetrics = flag
}
// Clone creates a copy of CycleState and returns its pointer. Clone returns
// nil if the context being cloned is nil.
func (c *CycleState) Clone() *CycleState {
if c == nil {
return nil
}
copy := NewCycleState()
for k, v := range c.storage {
copy.Write(k, v.Clone())
}
return copy
}
// Read retrieves data with the given "key" from CycleState. If the key is not
// present an error is returned.
// This function is not thread safe. In multi-threaded code, lock should be
// acquired first.
func (c *CycleState) Read(key StateKey) (StateData, error) {
if v, ok := c.storage[key]; ok {
return v, nil
}
return nil, errors.New(NotFound)
}
// Write stores the given "val" in CycleState with the given "key".
// This function is not thread safe. In multi-threaded code, lock should be
// acquired first.
func (c *CycleState) Write(key StateKey, val StateData) {
c.storage[key] = val
}
// Delete deletes data with the given key from CycleState.
// This function is not thread safe. In multi-threaded code, lock should be
// acquired first.
func (c *CycleState) Delete(key StateKey) {
delete(c.storage, key)
}
// Lock acquires CycleState lock.
func (c *CycleState) Lock() {
c.mx.Lock()
}
// Unlock releases CycleState lock.
func (c *CycleState) Unlock() {
c.mx.Unlock()
}
// RLock acquires CycleState read lock.
func (c *CycleState) RLock() {
c.mx.RLock()
}
// RUnlock releases CycleState read lock.
func (c *CycleState) RUnlock() {
c.mx.RUnlock()
}

View File

@ -1,918 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"context"
"fmt"
"reflect"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/controller/volume/scheduling"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers"
"k8s.io/kubernetes/pkg/scheduler/metrics"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
const (
// Filter is the name of the filter extension point.
Filter = "Filter"
// Specifies the maximum timeout a permit plugin can return.
maxTimeout time.Duration = 15 * time.Minute
preFilter = "PreFilter"
preFilterExtensionAddPod = "PreFilterExtensionAddPod"
preFilterExtensionRemovePod = "PreFilterExtensionRemovePod"
preScore = "PreScore"
score = "Score"
scoreExtensionNormalize = "ScoreExtensionNormalize"
preBind = "PreBind"
bind = "Bind"
postBind = "PostBind"
reserve = "Reserve"
unreserve = "Unreserve"
permit = "Permit"
)
// framework is the component responsible for initializing and running scheduler
// plugins.
type framework struct {
registry Registry
snapshotSharedLister schedulerlisters.SharedLister
waitingPods *waitingPodsMap
pluginNameToWeightMap map[string]int
queueSortPlugins []QueueSortPlugin
preFilterPlugins []PreFilterPlugin
filterPlugins []FilterPlugin
preScorePlugins []PreScorePlugin
scorePlugins []ScorePlugin
reservePlugins []ReservePlugin
preBindPlugins []PreBindPlugin
bindPlugins []BindPlugin
postBindPlugins []PostBindPlugin
unreservePlugins []UnreservePlugin
permitPlugins []PermitPlugin
clientSet clientset.Interface
informerFactory informers.SharedInformerFactory
volumeBinder scheduling.SchedulerVolumeBinder
metricsRecorder *metricsRecorder
// Indicates that RunFilterPlugins should accumulate all failed statuses and not return
// after the first failure.
runAllFilters bool
}
// extensionPoint encapsulates desired and applied set of plugins at a specific extension
// point. This is used to simplify iterating over all extension points supported by the
// framework.
type extensionPoint struct {
// the set of plugins to be configured at this extension point.
plugins *config.PluginSet
// a pointer to the slice storing plugins implementations that will run at this
// extension point.
slicePtr interface{}
}
func (f *framework) getExtensionPoints(plugins *config.Plugins) []extensionPoint {
return []extensionPoint{
{plugins.PreFilter, &f.preFilterPlugins},
{plugins.Filter, &f.filterPlugins},
{plugins.Reserve, &f.reservePlugins},
{plugins.PreScore, &f.preScorePlugins},
{plugins.Score, &f.scorePlugins},
{plugins.PreBind, &f.preBindPlugins},
{plugins.Bind, &f.bindPlugins},
{plugins.PostBind, &f.postBindPlugins},
{plugins.Unreserve, &f.unreservePlugins},
{plugins.Permit, &f.permitPlugins},
{plugins.QueueSort, &f.queueSortPlugins},
}
}
type frameworkOptions struct {
clientSet clientset.Interface
informerFactory informers.SharedInformerFactory
snapshotSharedLister schedulerlisters.SharedLister
metricsRecorder *metricsRecorder
volumeBinder scheduling.SchedulerVolumeBinder
runAllFilters bool
}
// Option for the framework.
type Option func(*frameworkOptions)
// WithClientSet sets clientSet for the scheduling framework.
func WithClientSet(clientSet clientset.Interface) Option {
return func(o *frameworkOptions) {
o.clientSet = clientSet
}
}
// WithInformerFactory sets informer factory for the scheduling framework.
func WithInformerFactory(informerFactory informers.SharedInformerFactory) Option {
return func(o *frameworkOptions) {
o.informerFactory = informerFactory
}
}
// WithSnapshotSharedLister sets the SharedLister of the snapshot.
func WithSnapshotSharedLister(snapshotSharedLister schedulerlisters.SharedLister) Option {
return func(o *frameworkOptions) {
o.snapshotSharedLister = snapshotSharedLister
}
}
// WithRunAllFilters sets the runAllFilters flag, which means RunFilterPlugins accumulates
// all failure Statuses.
func WithRunAllFilters(runAllFilters bool) Option {
return func(o *frameworkOptions) {
o.runAllFilters = runAllFilters
}
}
// withMetricsRecorder is only used in tests.
func withMetricsRecorder(recorder *metricsRecorder) Option {
return func(o *frameworkOptions) {
o.metricsRecorder = recorder
}
}
// WithVolumeBinder sets volume binder for the scheduling framework.
func WithVolumeBinder(binder scheduling.SchedulerVolumeBinder) Option {
return func(o *frameworkOptions) {
o.volumeBinder = binder
}
}
var defaultFrameworkOptions = frameworkOptions{
metricsRecorder: newMetricsRecorder(1000, time.Second),
}
var _ Framework = &framework{}
// NewFramework initializes plugins given the configuration and the registry.
func NewFramework(r Registry, plugins *config.Plugins, args []config.PluginConfig, opts ...Option) (Framework, error) {
options := defaultFrameworkOptions
for _, opt := range opts {
opt(&options)
}
f := &framework{
registry: r,
snapshotSharedLister: options.snapshotSharedLister,
pluginNameToWeightMap: make(map[string]int),
waitingPods: newWaitingPodsMap(),
clientSet: options.clientSet,
informerFactory: options.informerFactory,
volumeBinder: options.volumeBinder,
metricsRecorder: options.metricsRecorder,
runAllFilters: options.runAllFilters,
}
if plugins == nil {
return f, nil
}
// get needed plugins from config
pg := f.pluginsNeeded(plugins)
pluginConfig := make(map[string]*runtime.Unknown, 0)
for i := range args {
name := args[i].Name
if _, ok := pluginConfig[name]; ok {
return nil, fmt.Errorf("repeated config for plugin %s", name)
}
pluginConfig[name] = &args[i].Args
}
pluginsMap := make(map[string]Plugin)
var totalPriority int64
for name, factory := range r {
// initialize only needed plugins.
if _, ok := pg[name]; !ok {
continue
}
p, err := factory(pluginConfig[name], f)
if err != nil {
return nil, fmt.Errorf("error initializing plugin %q: %v", name, err)
}
pluginsMap[name] = p
// a weight of zero is not permitted, plugins can be disabled explicitly
// when configured.
f.pluginNameToWeightMap[name] = int(pg[name].Weight)
if f.pluginNameToWeightMap[name] == 0 {
f.pluginNameToWeightMap[name] = 1
}
// Checks totalPriority against MaxTotalScore to avoid overflow
if int64(f.pluginNameToWeightMap[name])*MaxNodeScore > MaxTotalScore-totalPriority {
return nil, fmt.Errorf("total score of Score plugins could overflow")
}
totalPriority += int64(f.pluginNameToWeightMap[name]) * MaxNodeScore
}
for _, e := range f.getExtensionPoints(plugins) {
if err := updatePluginList(e.slicePtr, e.plugins, pluginsMap); err != nil {
return nil, err
}
}
// Verifying the score weights again since Plugin.Name() could return a different
// value from the one used in the configuration.
for _, scorePlugin := range f.scorePlugins {
if f.pluginNameToWeightMap[scorePlugin.Name()] == 0 {
return nil, fmt.Errorf("score plugin %q is not configured with weight", scorePlugin.Name())
}
}
if len(f.queueSortPlugins) == 0 {
return nil, fmt.Errorf("no queue sort plugin is enabled")
}
if len(f.queueSortPlugins) > 1 {
return nil, fmt.Errorf("only one queue sort plugin can be enabled")
}
if len(f.bindPlugins) == 0 {
return nil, fmt.Errorf("at least one bind plugin is needed")
}
return f, nil
}
func updatePluginList(pluginList interface{}, pluginSet *config.PluginSet, pluginsMap map[string]Plugin) error {
if pluginSet == nil {
return nil
}
plugins := reflect.ValueOf(pluginList).Elem()
pluginType := plugins.Type().Elem()
set := sets.NewString()
for _, ep := range pluginSet.Enabled {
pg, ok := pluginsMap[ep.Name]
if !ok {
return fmt.Errorf("%s %q does not exist", pluginType.Name(), ep.Name)
}
if !reflect.TypeOf(pg).Implements(pluginType) {
return fmt.Errorf("plugin %q does not extend %s plugin", ep.Name, pluginType.Name())
}
if set.Has(ep.Name) {
return fmt.Errorf("plugin %q already registered as %q", ep.Name, pluginType.Name())
}
set.Insert(ep.Name)
newPlugins := reflect.Append(plugins, reflect.ValueOf(pg))
plugins.Set(newPlugins)
}
return nil
}
// QueueSortFunc returns the function to sort pods in scheduling queue
func (f *framework) QueueSortFunc() LessFunc {
if f == nil {
// If framework is nil, simply keep their order unchanged.
// NOTE: this is primarily for tests.
return func(_, _ *PodInfo) bool { return false }
}
if len(f.queueSortPlugins) == 0 {
panic("No QueueSort plugin is registered in the framework.")
}
// Only one QueueSort plugin can be enabled.
return f.queueSortPlugins[0].Less
}
// RunPreFilterPlugins runs the set of configured PreFilter plugins. It returns
// *Status and its code is set to non-success if any of the plugins returns
// anything but Success. If a non-success status is returned, then the scheduling
// cycle is aborted.
func (f *framework) RunPreFilterPlugins(ctx context.Context, state *CycleState, pod *v1.Pod) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(preFilter, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.preFilterPlugins {
status = f.runPreFilterPlugin(ctx, pl, state, pod)
if !status.IsSuccess() {
if status.IsUnschedulable() {
msg := fmt.Sprintf("rejected by %q at prefilter: %v", pl.Name(), status.Message())
klog.V(4).Infof(msg)
return NewStatus(status.Code(), msg)
}
msg := fmt.Sprintf("error while running %q prefilter plugin for pod %q: %v", pl.Name(), pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runPreFilterPlugin(ctx context.Context, pl PreFilterPlugin, state *CycleState, pod *v1.Pod) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.PreFilter(ctx, state, pod)
}
startTime := time.Now()
status := pl.PreFilter(ctx, state, pod)
f.metricsRecorder.observePluginDurationAsync(preFilter, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunPreFilterExtensionAddPod calls the AddPod interface for the set of configured
// PreFilter plugins. It returns directly if any of the plugins return any
// status other than Success.
func (f *framework) RunPreFilterExtensionAddPod(
ctx context.Context,
state *CycleState,
podToSchedule *v1.Pod,
podToAdd *v1.Pod,
nodeInfo *schedulernodeinfo.NodeInfo,
) (status *Status) {
for _, pl := range f.preFilterPlugins {
if pl.PreFilterExtensions() == nil {
continue
}
status = f.runPreFilterExtensionAddPod(ctx, pl, state, podToSchedule, podToAdd, nodeInfo)
if !status.IsSuccess() {
msg := fmt.Sprintf("error while running AddPod for plugin %q while scheduling pod %q: %v",
pl.Name(), podToSchedule.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runPreFilterExtensionAddPod(ctx context.Context, pl PreFilterPlugin, state *CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.PreFilterExtensions().AddPod(ctx, state, podToSchedule, podToAdd, nodeInfo)
}
startTime := time.Now()
status := pl.PreFilterExtensions().AddPod(ctx, state, podToSchedule, podToAdd, nodeInfo)
f.metricsRecorder.observePluginDurationAsync(preFilterExtensionAddPod, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunPreFilterExtensionRemovePod calls the RemovePod interface for the set of configured
// PreFilter plugins. It returns directly if any of the plugins return any
// status other than Success.
func (f *framework) RunPreFilterExtensionRemovePod(
ctx context.Context,
state *CycleState,
podToSchedule *v1.Pod,
podToRemove *v1.Pod,
nodeInfo *schedulernodeinfo.NodeInfo,
) (status *Status) {
for _, pl := range f.preFilterPlugins {
if pl.PreFilterExtensions() == nil {
continue
}
status = f.runPreFilterExtensionRemovePod(ctx, pl, state, podToSchedule, podToRemove, nodeInfo)
if !status.IsSuccess() {
msg := fmt.Sprintf("error while running RemovePod for plugin %q while scheduling pod %q: %v",
pl.Name(), podToSchedule.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runPreFilterExtensionRemovePod(ctx context.Context, pl PreFilterPlugin, state *CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.PreFilterExtensions().RemovePod(ctx, state, podToSchedule, podToAdd, nodeInfo)
}
startTime := time.Now()
status := pl.PreFilterExtensions().RemovePod(ctx, state, podToSchedule, podToAdd, nodeInfo)
f.metricsRecorder.observePluginDurationAsync(preFilterExtensionRemovePod, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunFilterPlugins runs the set of configured Filter plugins for pod on
// the given node. If any of these plugins doesn't return "Success", the
// given node is not suitable for running pod.
// Meanwhile, the failure message and status are set for the given node.
func (f *framework) RunFilterPlugins(
ctx context.Context,
state *CycleState,
pod *v1.Pod,
nodeInfo *schedulernodeinfo.NodeInfo,
) PluginToStatus {
var firstFailedStatus *Status
statuses := make(PluginToStatus)
for _, pl := range f.filterPlugins {
pluginStatus := f.runFilterPlugin(ctx, pl, state, pod, nodeInfo)
if len(statuses) == 0 {
firstFailedStatus = pluginStatus
}
if !pluginStatus.IsSuccess() {
if !pluginStatus.IsUnschedulable() {
// Filter plugins are not supposed to return any status other than
// Success or Unschedulable.
firstFailedStatus = NewStatus(Error, fmt.Sprintf("running %q filter plugin for pod %q: %v", pl.Name(), pod.Name, pluginStatus.Message()))
return map[string]*Status{pl.Name(): firstFailedStatus}
}
statuses[pl.Name()] = pluginStatus
if !f.runAllFilters {
// Exit early if we don't need to run all filters.
return statuses
}
}
}
return statuses
}
func (f *framework) runFilterPlugin(ctx context.Context, pl FilterPlugin, state *CycleState, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.Filter(ctx, state, pod, nodeInfo)
}
startTime := time.Now()
status := pl.Filter(ctx, state, pod, nodeInfo)
f.metricsRecorder.observePluginDurationAsync(Filter, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunPreScorePlugins runs the set of configured pre-score plugins. If any
// of these plugins returns any status other than "Success", the given pod is rejected.
func (f *framework) RunPreScorePlugins(
ctx context.Context,
state *CycleState,
pod *v1.Pod,
nodes []*v1.Node,
) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(preScore, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.preScorePlugins {
status = f.runPreScorePlugin(ctx, pl, state, pod, nodes)
if !status.IsSuccess() {
msg := fmt.Sprintf("error while running %q prescore plugin for pod %q: %v", pl.Name(), pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runPreScorePlugin(ctx context.Context, pl PreScorePlugin, state *CycleState, pod *v1.Pod, nodes []*v1.Node) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.PreScore(ctx, state, pod, nodes)
}
startTime := time.Now()
status := pl.PreScore(ctx, state, pod, nodes)
f.metricsRecorder.observePluginDurationAsync(preScore, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunScorePlugins runs the set of configured scoring plugins. It returns a list that
// stores for each scoring plugin name the corresponding NodeScoreList(s).
// It also returns *Status, which is set to non-success if any of the plugins returns
// a non-success status.
func (f *framework) RunScorePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodes []*v1.Node) (ps PluginToNodeScores, status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(score, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
pluginToNodeScores := make(PluginToNodeScores, len(f.scorePlugins))
for _, pl := range f.scorePlugins {
pluginToNodeScores[pl.Name()] = make(NodeScoreList, len(nodes))
}
ctx, cancel := context.WithCancel(ctx)
errCh := schedutil.NewErrorChannel()
// Run Score method for each node in parallel.
workqueue.ParallelizeUntil(ctx, 16, len(nodes), func(index int) {
for _, pl := range f.scorePlugins {
nodeName := nodes[index].Name
s, status := f.runScorePlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
errCh.SendErrorWithCancel(fmt.Errorf(status.Message()), cancel)
return
}
pluginToNodeScores[pl.Name()][index] = NodeScore{
Name: nodeName,
Score: int64(s),
}
}
})
if err := errCh.ReceiveError(); err != nil {
msg := fmt.Sprintf("error while running score plugin for pod %q: %v", pod.Name, err)
klog.Error(msg)
return nil, NewStatus(Error, msg)
}
// Run NormalizeScore method for each ScorePlugin in parallel.
workqueue.ParallelizeUntil(ctx, 16, len(f.scorePlugins), func(index int) {
pl := f.scorePlugins[index]
nodeScoreList := pluginToNodeScores[pl.Name()]
if pl.ScoreExtensions() == nil {
return
}
status := f.runScoreExtension(ctx, pl, state, pod, nodeScoreList)
if !status.IsSuccess() {
err := fmt.Errorf("normalize score plugin %q failed with error %v", pl.Name(), status.Message())
errCh.SendErrorWithCancel(err, cancel)
return
}
})
if err := errCh.ReceiveError(); err != nil {
msg := fmt.Sprintf("error while running normalize score plugin for pod %q: %v", pod.Name, err)
klog.Error(msg)
return nil, NewStatus(Error, msg)
}
// Apply score defaultWeights for each ScorePlugin in parallel.
workqueue.ParallelizeUntil(ctx, 16, len(f.scorePlugins), func(index int) {
pl := f.scorePlugins[index]
// Score plugins' weight has been checked when they are initialized.
weight := f.pluginNameToWeightMap[pl.Name()]
nodeScoreList := pluginToNodeScores[pl.Name()]
for i, nodeScore := range nodeScoreList {
// return error if score plugin returns invalid score.
if nodeScore.Score > int64(MaxNodeScore) || nodeScore.Score < int64(MinNodeScore) {
err := fmt.Errorf("score plugin %q returns an invalid score %v, it should in the range of [%v, %v] after normalizing", pl.Name(), nodeScore.Score, MinNodeScore, MaxNodeScore)
errCh.SendErrorWithCancel(err, cancel)
return
}
nodeScoreList[i].Score = nodeScore.Score * int64(weight)
}
})
if err := errCh.ReceiveError(); err != nil {
msg := fmt.Sprintf("error while applying score defaultWeights for pod %q: %v", pod.Name, err)
klog.Error(msg)
return nil, NewStatus(Error, msg)
}
return pluginToNodeScores, nil
}
func (f *framework) runScorePlugin(ctx context.Context, pl ScorePlugin, state *CycleState, pod *v1.Pod, nodeName string) (int64, *Status) {
if !state.ShouldRecordPluginMetrics() {
return pl.Score(ctx, state, pod, nodeName)
}
startTime := time.Now()
s, status := pl.Score(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(score, pl.Name(), status, metrics.SinceInSeconds(startTime))
return s, status
}
func (f *framework) runScoreExtension(ctx context.Context, pl ScorePlugin, state *CycleState, pod *v1.Pod, nodeScoreList NodeScoreList) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.ScoreExtensions().NormalizeScore(ctx, state, pod, nodeScoreList)
}
startTime := time.Now()
status := pl.ScoreExtensions().NormalizeScore(ctx, state, pod, nodeScoreList)
f.metricsRecorder.observePluginDurationAsync(scoreExtensionNormalize, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunPreBindPlugins runs the set of configured prebind plugins. It returns a
// failure (bool) if any of the plugins returns an error. It also returns an
// error containing the rejection message or the error occurred in the plugin.
func (f *framework) RunPreBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(preBind, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.preBindPlugins {
status = f.runPreBindPlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
msg := fmt.Sprintf("error while running %q prebind plugin for pod %q: %v", pl.Name(), pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runPreBindPlugin(ctx context.Context, pl PreBindPlugin, state *CycleState, pod *v1.Pod, nodeName string) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.PreBind(ctx, state, pod, nodeName)
}
startTime := time.Now()
status := pl.PreBind(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(preBind, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunBindPlugins runs the set of configured bind plugins until one returns a non `Skip` status.
func (f *framework) RunBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(bind, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
if len(f.bindPlugins) == 0 {
return NewStatus(Skip, "")
}
for _, bp := range f.bindPlugins {
status = f.runBindPlugin(ctx, bp, state, pod, nodeName)
if status != nil && status.Code() == Skip {
continue
}
if !status.IsSuccess() {
msg := fmt.Sprintf("plugin %q failed to bind pod \"%v/%v\": %v", bp.Name(), pod.Namespace, pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
return status
}
return status
}
func (f *framework) runBindPlugin(ctx context.Context, bp BindPlugin, state *CycleState, pod *v1.Pod, nodeName string) *Status {
if !state.ShouldRecordPluginMetrics() {
return bp.Bind(ctx, state, pod, nodeName)
}
startTime := time.Now()
status := bp.Bind(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(bind, bp.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunPostBindPlugins runs the set of configured postbind plugins.
func (f *framework) RunPostBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(postBind, Success.String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.postBindPlugins {
f.runPostBindPlugin(ctx, pl, state, pod, nodeName)
}
}
func (f *framework) runPostBindPlugin(ctx context.Context, pl PostBindPlugin, state *CycleState, pod *v1.Pod, nodeName string) {
if !state.ShouldRecordPluginMetrics() {
pl.PostBind(ctx, state, pod, nodeName)
return
}
startTime := time.Now()
pl.PostBind(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(postBind, pl.Name(), nil, metrics.SinceInSeconds(startTime))
}
// RunReservePlugins runs the set of configured reserve plugins. If any of these
// plugins returns an error, it does not continue running the remaining ones and
// returns the error. In such case, pod will not be scheduled.
func (f *framework) RunReservePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(reserve, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.reservePlugins {
status = f.runReservePlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
msg := fmt.Sprintf("error while running %q reserve plugin for pod %q: %v", pl.Name(), pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runReservePlugin(ctx context.Context, pl ReservePlugin, state *CycleState, pod *v1.Pod, nodeName string) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.Reserve(ctx, state, pod, nodeName)
}
startTime := time.Now()
status := pl.Reserve(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(reserve, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunUnreservePlugins runs the set of configured unreserve plugins.
func (f *framework) RunUnreservePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(unreserve, Success.String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.unreservePlugins {
f.runUnreservePlugin(ctx, pl, state, pod, nodeName)
}
}
func (f *framework) runUnreservePlugin(ctx context.Context, pl UnreservePlugin, state *CycleState, pod *v1.Pod, nodeName string) {
if !state.ShouldRecordPluginMetrics() {
pl.Unreserve(ctx, state, pod, nodeName)
return
}
startTime := time.Now()
pl.Unreserve(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(unreserve, pl.Name(), nil, metrics.SinceInSeconds(startTime))
}
// RunPermitPlugins runs the set of configured permit plugins. If any of these
// plugins returns a status other than "Success" or "Wait", it does not continue
// running the remaining plugins and returns an error. Otherwise, if any of the
// plugins returns "Wait", then this function will create and add waiting pod
// to a map of currently waiting pods and return status with "Wait" code.
// Pod will remain waiting pod for the minimum duration returned by the permit plugins.
func (f *framework) RunPermitPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(permit, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
pluginsWaitTime := make(map[string]time.Duration)
statusCode := Success
for _, pl := range f.permitPlugins {
status, timeout := f.runPermitPlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
if status.IsUnschedulable() {
msg := fmt.Sprintf("rejected pod %q by permit plugin %q: %v", pod.Name, pl.Name(), status.Message())
klog.V(4).Infof(msg)
return NewStatus(status.Code(), msg)
}
if status.Code() == Wait {
// Not allowed to be greater than maxTimeout.
if timeout > maxTimeout {
timeout = maxTimeout
}
pluginsWaitTime[pl.Name()] = timeout
statusCode = Wait
} else {
msg := fmt.Sprintf("error while running %q permit plugin for pod %q: %v", pl.Name(), pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
}
if statusCode == Wait {
waitingPod := newWaitingPod(pod, pluginsWaitTime)
f.waitingPods.add(waitingPod)
msg := fmt.Sprintf("one or more plugins asked to wait and no plugin rejected pod %q", pod.Name)
klog.V(4).Infof(msg)
return NewStatus(Wait, msg)
}
return nil
}
func (f *framework) runPermitPlugin(ctx context.Context, pl PermitPlugin, state *CycleState, pod *v1.Pod, nodeName string) (*Status, time.Duration) {
if !state.ShouldRecordPluginMetrics() {
return pl.Permit(ctx, state, pod, nodeName)
}
startTime := time.Now()
status, timeout := pl.Permit(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(permit, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status, timeout
}
// WaitOnPermit will block, if the pod is a waiting pod, until the waiting pod is rejected or allowed.
func (f *framework) WaitOnPermit(ctx context.Context, pod *v1.Pod) (status *Status) {
waitingPod := f.waitingPods.get(pod.UID)
if waitingPod == nil {
return nil
}
defer f.waitingPods.remove(pod.UID)
klog.V(4).Infof("pod %q waiting on permit", pod.Name)
startTime := time.Now()
s := <-waitingPod.s
metrics.PermitWaitDuration.WithLabelValues(s.Code().String()).Observe(metrics.SinceInSeconds(startTime))
if !s.IsSuccess() {
if s.IsUnschedulable() {
msg := fmt.Sprintf("pod %q rejected while waiting on permit: %v", pod.Name, s.Message())
klog.V(4).Infof(msg)
return NewStatus(s.Code(), msg)
}
msg := fmt.Sprintf("error received while waiting on permit for pod %q: %v", pod.Name, s.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
return nil
}
// SnapshotSharedLister returns the scheduler's SharedLister of the latest NodeInfo
// snapshot. The snapshot is taken at the beginning of a scheduling cycle and remains
// unchanged until a pod finishes "Reserve". There is no guarantee that the information
// remains unchanged after "Reserve".
func (f *framework) SnapshotSharedLister() schedulerlisters.SharedLister {
return f.snapshotSharedLister
}
// IterateOverWaitingPods acquires a read lock and iterates over the WaitingPods map.
func (f *framework) IterateOverWaitingPods(callback func(WaitingPod)) {
f.waitingPods.iterate(callback)
}
// GetWaitingPod returns a reference to a WaitingPod given its UID.
func (f *framework) GetWaitingPod(uid types.UID) WaitingPod {
if wp := f.waitingPods.get(uid); wp != nil {
return wp
}
return nil // Returning nil instead of *waitingPod(nil).
}
// RejectWaitingPod rejects a WaitingPod given its UID.
func (f *framework) RejectWaitingPod(uid types.UID) {
waitingPod := f.waitingPods.get(uid)
if waitingPod != nil {
waitingPod.Reject("removed")
}
}
// HasFilterPlugins returns true if at least one filter plugin is defined.
func (f *framework) HasFilterPlugins() bool {
return len(f.filterPlugins) > 0
}
// HasScorePlugins returns true if at least one score plugin is defined.
func (f *framework) HasScorePlugins() bool {
return len(f.scorePlugins) > 0
}
// ListPlugins returns a map of extension point name to plugin names configured at each extension
// point. Returns nil if no plugins where configred.
func (f *framework) ListPlugins() map[string][]config.Plugin {
m := make(map[string][]config.Plugin)
for _, e := range f.getExtensionPoints(&config.Plugins{}) {
plugins := reflect.ValueOf(e.slicePtr).Elem()
extName := plugins.Type().Elem().Name()
var cfgs []config.Plugin
for i := 0; i < plugins.Len(); i++ {
name := plugins.Index(i).Interface().(Plugin).Name()
p := config.Plugin{Name: name}
if extName == "ScorePlugin" {
// Weights apply only to score plugins.
p.Weight = int32(f.pluginNameToWeightMap[name])
}
cfgs = append(cfgs, p)
}
if len(cfgs) > 0 {
m[extName] = cfgs
}
}
if len(m) > 0 {
return m
}
return nil
}
// ClientSet returns a kubernetes clientset.
func (f *framework) ClientSet() clientset.Interface {
return f.clientSet
}
// SharedInformerFactory returns a shared informer factory.
func (f *framework) SharedInformerFactory() informers.SharedInformerFactory {
return f.informerFactory
}
// VolumeBinder returns the volume binder used by scheduler.
func (f *framework) VolumeBinder() scheduling.SchedulerVolumeBinder {
return f.volumeBinder
}
func (f *framework) pluginsNeeded(plugins *config.Plugins) map[string]config.Plugin {
pgMap := make(map[string]config.Plugin)
if plugins == nil {
return pgMap
}
find := func(pgs *config.PluginSet) {
if pgs == nil {
return
}
for _, pg := range pgs.Enabled {
pgMap[pg.Name] = pg
}
}
for _, e := range f.getExtensionPoints(plugins) {
find(e.plugins)
}
return pgMap
}

View File

@ -1,525 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This file defines the scheduling framework plugin interfaces.
package v1alpha1
import (
"context"
"errors"
"math"
"strings"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/pkg/controller/volume/scheduling"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// NodeScoreList declares a list of nodes and their scores.
type NodeScoreList []NodeScore
// NodeScore is a struct with node name and score.
type NodeScore struct {
Name string
Score int64
}
// PluginToNodeScores declares a map from plugin name to its NodeScoreList.
type PluginToNodeScores map[string]NodeScoreList
// NodeToStatusMap declares map from node name to its status.
type NodeToStatusMap map[string]*Status
// Code is the Status code/type which is returned from plugins.
type Code int
// These are predefined codes used in a Status.
const (
// Success means that plugin ran correctly and found pod schedulable.
// NOTE: A nil status is also considered as "Success".
Success Code = iota
// Error is used for internal plugin errors, unexpected input, etc.
Error
// Unschedulable is used when a plugin finds a pod unschedulable. The scheduler might attempt to
// preempt other pods to get this pod scheduled. Use UnschedulableAndUnresolvable to make the
// scheduler skip preemption.
// The accompanying status message should explain why the pod is unschedulable.
Unschedulable
// UnschedulableAndUnresolvable is used when a (pre-)filter plugin finds a pod unschedulable and
// preemption would not change anything. Plugins should return Unschedulable if it is possible
// that the pod can get scheduled with preemption.
// The accompanying status message should explain why the pod is unschedulable.
UnschedulableAndUnresolvable
// Wait is used when a permit plugin finds a pod scheduling should wait.
Wait
// Skip is used when a bind plugin chooses to skip binding.
Skip
)
// This list should be exactly the same as the codes iota defined above in the same order.
var codes = []string{"Success", "Error", "Unschedulable", "UnschedulableAndUnresolvable", "Wait", "Skip"}
func (c Code) String() string {
return codes[c]
}
const (
// MaxNodeScore is the maximum score a Score plugin is expected to return.
MaxNodeScore int64 = 100
// MinNodeScore is the minimum score a Score plugin is expected to return.
MinNodeScore int64 = 0
// MaxTotalScore is the maximum total score.
MaxTotalScore int64 = math.MaxInt64
)
// Status indicates the result of running a plugin. It consists of a code and a
// message. When the status code is not `Success`, the reasons should explain why.
// NOTE: A nil Status is also considered as Success.
type Status struct {
code Code
reasons []string
}
// Code returns code of the Status.
func (s *Status) Code() Code {
if s == nil {
return Success
}
return s.code
}
// Message returns a concatenated message on reasons of the Status.
func (s *Status) Message() string {
if s == nil {
return ""
}
return strings.Join(s.reasons, ", ")
}
// Reasons returns reasons of the Status.
func (s *Status) Reasons() []string {
return s.reasons
}
// AppendReason appends given reason to the Status.
func (s *Status) AppendReason(reason string) {
s.reasons = append(s.reasons, reason)
}
// IsSuccess returns true if and only if "Status" is nil or Code is "Success".
func (s *Status) IsSuccess() bool {
return s.Code() == Success
}
// IsUnschedulable returns true if "Status" is Unschedulable (Unschedulable or UnschedulableAndUnresolvable).
func (s *Status) IsUnschedulable() bool {
code := s.Code()
return code == Unschedulable || code == UnschedulableAndUnresolvable
}
// AsError returns nil if the status is a success; otherwise returns an "error" object
// with a concatenated message on reasons of the Status.
func (s *Status) AsError() error {
if s.IsSuccess() {
return nil
}
return errors.New(s.Message())
}
// NewStatus makes a Status out of the given arguments and returns its pointer.
func NewStatus(code Code, reasons ...string) *Status {
return &Status{
code: code,
reasons: reasons,
}
}
// PluginToStatus maps plugin name to status. Currently used to identify which Filter plugin
// returned which status.
type PluginToStatus map[string]*Status
// Merge merges the statuses in the map into one. The resulting status code have the following
// precedence: Error, UnschedulableAndUnresolvable, Unschedulable.
func (p PluginToStatus) Merge() *Status {
if len(p) == 0 {
return nil
}
finalStatus := NewStatus(Success)
var hasError, hasUnschedulableAndUnresolvable, hasUnschedulable bool
for _, s := range p {
if s.Code() == Error {
hasError = true
} else if s.Code() == UnschedulableAndUnresolvable {
hasUnschedulableAndUnresolvable = true
} else if s.Code() == Unschedulable {
hasUnschedulable = true
}
finalStatus.code = s.Code()
for _, r := range s.reasons {
finalStatus.AppendReason(r)
}
}
if hasError {
finalStatus.code = Error
} else if hasUnschedulableAndUnresolvable {
finalStatus.code = UnschedulableAndUnresolvable
} else if hasUnschedulable {
finalStatus.code = Unschedulable
}
return finalStatus
}
// WaitingPod represents a pod currently waiting in the permit phase.
type WaitingPod interface {
// GetPod returns a reference to the waiting pod.
GetPod() *v1.Pod
// GetPendingPlugins returns a list of pending permit plugin's name.
GetPendingPlugins() []string
// Allow declares the waiting pod is allowed to be scheduled by plugin pluginName.
// If this is the last remaining plugin to allow, then a success signal is delivered
// to unblock the pod.
Allow(pluginName string)
// Reject declares the waiting pod unschedulable.
Reject(msg string)
}
// Plugin is the parent type for all the scheduling framework plugins.
type Plugin interface {
Name() string
}
// PodInfo is a wrapper to a Pod with additional information for purposes such as tracking
// the timestamp when it's added to the queue or recording per-pod metrics.
type PodInfo struct {
Pod *v1.Pod
// The time pod added to the scheduling queue.
Timestamp time.Time
// Number of schedule attempts before successfully scheduled.
// It's used to record the # attempts metric.
Attempts int
// The time when the pod is added to the queue for the first time. The pod may be added
// back to the queue multiple times before it's successfully scheduled.
// It shouldn't be updated once initialized. It's used to record the e2e scheduling
// latency for a pod.
InitialAttemptTimestamp time.Time
}
// DeepCopy returns a deep copy of the PodInfo object.
func (podInfo *PodInfo) DeepCopy() *PodInfo {
return &PodInfo{
Pod: podInfo.Pod.DeepCopy(),
Timestamp: podInfo.Timestamp,
Attempts: podInfo.Attempts,
InitialAttemptTimestamp: podInfo.InitialAttemptTimestamp,
}
}
// LessFunc is the function to sort pod info
type LessFunc func(podInfo1, podInfo2 *PodInfo) bool
// QueueSortPlugin is an interface that must be implemented by "QueueSort" plugins.
// These plugins are used to sort pods in the scheduling queue. Only one queue sort
// plugin may be enabled at a time.
type QueueSortPlugin interface {
Plugin
// Less are used to sort pods in the scheduling queue.
Less(*PodInfo, *PodInfo) bool
}
// PreFilterExtensions is an interface that is included in plugins that allow specifying
// callbacks to make incremental updates to its supposedly pre-calculated
// state.
type PreFilterExtensions interface {
// AddPod is called by the framework while trying to evaluate the impact
// of adding podToAdd to the node while scheduling podToSchedule.
AddPod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status
// RemovePod is called by the framework while trying to evaluate the impact
// of removing podToRemove from the node while scheduling podToSchedule.
RemovePod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podToRemove *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status
}
// PreFilterPlugin is an interface that must be implemented by "prefilter" plugins.
// These plugins are called at the beginning of the scheduling cycle.
type PreFilterPlugin interface {
Plugin
// PreFilter is called at the beginning of the scheduling cycle. All PreFilter
// plugins must return success or the pod will be rejected.
PreFilter(ctx context.Context, state *CycleState, p *v1.Pod) *Status
// PreFilterExtensions returns a PreFilterExtensions interface if the plugin implements one,
// or nil if it does not. A Pre-filter plugin can provide extensions to incrementally
// modify its pre-processed info. The framework guarantees that the extensions
// AddPod/RemovePod will only be called after PreFilter, possibly on a cloned
// CycleState, and may call those functions more than once before calling
// Filter again on a specific node.
PreFilterExtensions() PreFilterExtensions
}
// FilterPlugin is an interface for Filter plugins. These plugins are called at the
// filter extension point for filtering out hosts that cannot run a pod.
// This concept used to be called 'predicate' in the original scheduler.
// These plugins should return "Success", "Unschedulable" or "Error" in Status.code.
// However, the scheduler accepts other valid codes as well.
// Anything other than "Success" will lead to exclusion of the given host from
// running the pod.
type FilterPlugin interface {
Plugin
// Filter is called by the scheduling framework.
// All FilterPlugins should return "Success" to declare that
// the given node fits the pod. If Filter doesn't return "Success",
// please refer scheduler/algorithm/predicates/error.go
// to set error message.
// For the node being evaluated, Filter plugins should look at the passed
// nodeInfo reference for this particular node's information (e.g., pods
// considered to be running on the node) instead of looking it up in the
// NodeInfoSnapshot because we don't guarantee that they will be the same.
// For example, during preemption, we may pass a copy of the original
// nodeInfo object that has some pods removed from it to evaluate the
// possibility of preempting them to schedule the target pod.
Filter(ctx context.Context, state *CycleState, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status
}
// PreScorePlugin is an interface for Pre-score plugin. Pre-score is an
// informational extension point. Plugins will be called with a list of nodes
// that passed the filtering phase. A plugin may use this data to update internal
// state or to generate logs/metrics.
type PreScorePlugin interface {
Plugin
// PreScore is called by the scheduling framework after a list of nodes
// passed the filtering phase. All prescore plugins must return success or
// the pod will be rejected
PreScore(ctx context.Context, state *CycleState, pod *v1.Pod, nodes []*v1.Node) *Status
}
// ScoreExtensions is an interface for Score extended functionality.
type ScoreExtensions interface {
// NormalizeScore is called for all node scores produced by the same plugin's "Score"
// method. A successful run of NormalizeScore will update the scores list and return
// a success status.
NormalizeScore(ctx context.Context, state *CycleState, p *v1.Pod, scores NodeScoreList) *Status
}
// ScorePlugin is an interface that must be implemented by "score" plugins to rank
// nodes that passed the filtering phase.
type ScorePlugin interface {
Plugin
// Score is called on each filtered node. It must return success and an integer
// indicating the rank of the node. All scoring plugins must return success or
// the pod will be rejected.
Score(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) (int64, *Status)
// ScoreExtensions returns a ScoreExtensions interface if it implements one, or nil if does not.
ScoreExtensions() ScoreExtensions
}
// ReservePlugin is an interface for Reserve plugins. These plugins are called
// at the reservation point. These are meant to update the state of the plugin.
// This concept used to be called 'assume' in the original scheduler.
// These plugins should return only Success or Error in Status.code. However,
// the scheduler accepts other valid codes as well. Anything other than Success
// will lead to rejection of the pod.
type ReservePlugin interface {
Plugin
// Reserve is called by the scheduling framework when the scheduler cache is
// updated.
Reserve(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status
}
// PreBindPlugin is an interface that must be implemented by "prebind" plugins.
// These plugins are called before a pod being scheduled.
type PreBindPlugin interface {
Plugin
// PreBind is called before binding a pod. All prebind plugins must return
// success or the pod will be rejected and won't be sent for binding.
PreBind(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status
}
// PostBindPlugin is an interface that must be implemented by "postbind" plugins.
// These plugins are called after a pod is successfully bound to a node.
type PostBindPlugin interface {
Plugin
// PostBind is called after a pod is successfully bound. These plugins are
// informational. A common application of this extension point is for cleaning
// up. If a plugin needs to clean-up its state after a pod is scheduled and
// bound, PostBind is the extension point that it should register.
PostBind(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string)
}
// UnreservePlugin is an interface for Unreserve plugins. This is an informational
// extension point. If a pod was reserved and then rejected in a later phase, then
// un-reserve plugins will be notified. Un-reserve plugins should clean up state
// associated with the reserved Pod.
type UnreservePlugin interface {
Plugin
// Unreserve is called by the scheduling framework when a reserved pod was
// rejected in a later phase.
Unreserve(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string)
}
// PermitPlugin is an interface that must be implemented by "permit" plugins.
// These plugins are called before a pod is bound to a node.
type PermitPlugin interface {
Plugin
// Permit is called before binding a pod (and before prebind plugins). Permit
// plugins are used to prevent or delay the binding of a Pod. A permit plugin
// must return success or wait with timeout duration, or the pod will be rejected.
// The pod will also be rejected if the wait timeout or the pod is rejected while
// waiting. Note that if the plugin returns "wait", the framework will wait only
// after running the remaining plugins given that no other plugin rejects the pod.
Permit(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) (*Status, time.Duration)
}
// BindPlugin is an interface that must be implemented by "bind" plugins. Bind
// plugins are used to bind a pod to a Node.
type BindPlugin interface {
Plugin
// Bind plugins will not be called until all pre-bind plugins have completed. Each
// bind plugin is called in the configured order. A bind plugin may choose whether
// or not to handle the given Pod. If a bind plugin chooses to handle a Pod, the
// remaining bind plugins are skipped. When a bind plugin does not handle a pod,
// it must return Skip in its Status code. If a bind plugin returns an Error, the
// pod is rejected and will not be bound.
Bind(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status
}
// Framework manages the set of plugins in use by the scheduling framework.
// Configured plugins are called at specified points in a scheduling context.
type Framework interface {
FrameworkHandle
// QueueSortFunc returns the function to sort pods in scheduling queue
QueueSortFunc() LessFunc
// RunPreFilterPlugins runs the set of configured prefilter plugins. It returns
// *Status and its code is set to non-success if any of the plugins returns
// anything but Success. If a non-success status is returned, then the scheduling
// cycle is aborted.
RunPreFilterPlugins(ctx context.Context, state *CycleState, pod *v1.Pod) *Status
// RunFilterPlugins runs the set of configured filter plugins for pod on
// the given node. Note that for the node being evaluated, the passed nodeInfo
// reference could be different from the one in NodeInfoSnapshot map (e.g., pods
// considered to be running on the node could be different). For example, during
// preemption, we may pass a copy of the original nodeInfo object that has some pods
// removed from it to evaluate the possibility of preempting them to
// schedule the target pod.
RunFilterPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) PluginToStatus
// RunPreFilterExtensionAddPod calls the AddPod interface for the set of configured
// PreFilter plugins. It returns directly if any of the plugins return any
// status other than Success.
RunPreFilterExtensionAddPod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status
// RunPreFilterExtensionRemovePod calls the RemovePod interface for the set of configured
// PreFilter plugins. It returns directly if any of the plugins return any
// status other than Success.
RunPreFilterExtensionRemovePod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status
// RunPreScorePlugins runs the set of configured pre-score plugins. If any
// of these plugins returns any status other than "Success", the given pod is rejected.
RunPreScorePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodes []*v1.Node) *Status
// RunScorePlugins runs the set of configured scoring plugins. It returns a map that
// stores for each scoring plugin name the corresponding NodeScoreList(s).
// It also returns *Status, which is set to non-success if any of the plugins returns
// a non-success status.
RunScorePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodes []*v1.Node) (PluginToNodeScores, *Status)
// RunPreBindPlugins runs the set of configured prebind plugins. It returns
// *Status and its code is set to non-success if any of the plugins returns
// anything but Success. If the Status code is "Unschedulable", it is
// considered as a scheduling check failure, otherwise, it is considered as an
// internal error. In either case the pod is not going to be bound.
RunPreBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
// RunPostBindPlugins runs the set of configured postbind plugins.
RunPostBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string)
// RunReservePlugins runs the set of configured reserve plugins. If any of these
// plugins returns an error, it does not continue running the remaining ones and
// returns the error. In such case, pod will not be scheduled.
RunReservePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
// RunUnreservePlugins runs the set of configured unreserve plugins.
RunUnreservePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string)
// RunPermitPlugins runs the set of configured permit plugins. If any of these
// plugins returns a status other than "Success" or "Wait", it does not continue
// running the remaining plugins and returns an error. Otherwise, if any of the
// plugins returns "Wait", then this function will create and add waiting pod
// to a map of currently waiting pods and return status with "Wait" code.
// Pod will remain waiting pod for the minimum duration returned by the permit plugins.
RunPermitPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
// WaitOnPermit will block, if the pod is a waiting pod, until the waiting pod is rejected or allowed.
WaitOnPermit(ctx context.Context, pod *v1.Pod) *Status
// RunBindPlugins runs the set of configured bind plugins. A bind plugin may choose
// whether or not to handle the given Pod. If a bind plugin chooses to skip the
// binding, it should return code=5("skip") status. Otherwise, it should return "Error"
// or "Success". If none of the plugins handled binding, RunBindPlugins returns
// code=5("skip") status.
RunBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
// HasFilterPlugins returns true if at least one filter plugin is defined.
HasFilterPlugins() bool
// HasScorePlugins returns true if at least one score plugin is defined.
HasScorePlugins() bool
// ListPlugins returns a map of extension point name to list of configured Plugins.
ListPlugins() map[string][]config.Plugin
}
// FrameworkHandle provides data and some tools that plugins can use. It is
// passed to the plugin factories at the time of plugin initialization. Plugins
// must store and use this handle to call framework functions.
type FrameworkHandle interface {
// SnapshotSharedLister returns listers from the latest NodeInfo Snapshot. The snapshot
// is taken at the beginning of a scheduling cycle and remains unchanged until
// a pod finishes "Permit" point. There is no guarantee that the information
// remains unchanged in the binding phase of scheduling, so plugins in the binding
// cycle (pre-bind/bind/post-bind/un-reserve plugin) should not use it,
// otherwise a concurrent read/write error might occur, they should use scheduler
// cache instead.
SnapshotSharedLister() schedulerlisters.SharedLister
// IterateOverWaitingPods acquires a read lock and iterates over the WaitingPods map.
IterateOverWaitingPods(callback func(WaitingPod))
// GetWaitingPod returns a waiting pod given its UID.
GetWaitingPod(uid types.UID) WaitingPod
// RejectWaitingPod rejects a waiting pod given its UID.
RejectWaitingPod(uid types.UID)
// ClientSet returns a kubernetes clientSet.
ClientSet() clientset.Interface
SharedInformerFactory() informers.SharedInformerFactory
// VolumeBinder returns the volume binder used by scheduler.
VolumeBinder() scheduling.SchedulerVolumeBinder
}

View File

@ -1,101 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"time"
k8smetrics "k8s.io/component-base/metrics"
"k8s.io/kubernetes/pkg/scheduler/metrics"
)
// frameworkMetric is the data structure passed in the buffer channel between the main framework thread
// and the metricsRecorder goroutine.
type frameworkMetric struct {
metric *k8smetrics.HistogramVec
labelValues []string
value float64
}
// metricRecorder records framework metrics in a separate goroutine to avoid overhead in the critical path.
type metricsRecorder struct {
// bufferCh is a channel that serves as a metrics buffer before the metricsRecorder goroutine reports it.
bufferCh chan *frameworkMetric
// if bufferSize is reached, incoming metrics will be discarded.
bufferSize int
// how often the recorder runs to flush the metrics.
interval time.Duration
// stopCh is used to stop the goroutine which periodically flushes metrics. It's currently only
// used in tests.
stopCh chan struct{}
// isStoppedCh indicates whether the goroutine is stopped. It's used in tests only to make sure
// the metric flushing goroutine is stopped so that tests can collect metrics for verification.
isStoppedCh chan struct{}
}
func newMetricsRecorder(bufferSize int, interval time.Duration) *metricsRecorder {
recorder := &metricsRecorder{
bufferCh: make(chan *frameworkMetric, bufferSize),
bufferSize: bufferSize,
interval: interval,
stopCh: make(chan struct{}),
isStoppedCh: make(chan struct{}),
}
go recorder.run()
return recorder
}
// observePluginDurationAsync observes the plugin_execution_duration_seconds metric.
// The metric will be flushed to Prometheus asynchronously.
func (r *metricsRecorder) observePluginDurationAsync(extensionPoint, pluginName string, status *Status, value float64) {
newMetric := &frameworkMetric{
metric: metrics.PluginExecutionDuration,
labelValues: []string{pluginName, extensionPoint, status.Code().String()},
value: value,
}
select {
case r.bufferCh <- newMetric:
default:
}
}
// run flushes buffered metrics into Prometheus every second.
func (r *metricsRecorder) run() {
for {
select {
case <-r.stopCh:
close(r.isStoppedCh)
return
default:
}
r.flushMetrics()
time.Sleep(r.interval)
}
}
// flushMetrics tries to clean up the bufferCh by reading at most bufferSize metrics.
func (r *metricsRecorder) flushMetrics() {
for i := 0; i < r.bufferSize; i++ {
select {
case m := <-r.bufferCh:
m.metric.WithLabelValues(m.labelValues...).Observe(m.value)
default:
return
}
}
}

View File

@ -1,80 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"fmt"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/json"
"sigs.k8s.io/yaml"
)
// PluginFactory is a function that builds a plugin.
type PluginFactory = func(configuration *runtime.Unknown, f FrameworkHandle) (Plugin, error)
// DecodeInto decodes configuration whose type is *runtime.Unknown to the interface into.
func DecodeInto(configuration *runtime.Unknown, into interface{}) error {
if configuration == nil || configuration.Raw == nil {
return nil
}
switch configuration.ContentType {
// If ContentType is empty, it means ContentTypeJSON by default.
case runtime.ContentTypeJSON, "":
return json.Unmarshal(configuration.Raw, into)
case runtime.ContentTypeYAML:
return yaml.Unmarshal(configuration.Raw, into)
default:
return fmt.Errorf("not supported content type %s", configuration.ContentType)
}
}
// Registry is a collection of all available plugins. The framework uses a
// registry to enable and initialize configured plugins.
// All plugins must be in the registry before initializing the framework.
type Registry map[string]PluginFactory
// Register adds a new plugin to the registry. If a plugin with the same name
// exists, it returns an error.
func (r Registry) Register(name string, factory PluginFactory) error {
if _, ok := r[name]; ok {
return fmt.Errorf("a plugin named %v already exists", name)
}
r[name] = factory
return nil
}
// Unregister removes an existing plugin from the registry. If no plugin with
// the provided name exists, it returns an error.
func (r Registry) Unregister(name string) error {
if _, ok := r[name]; !ok {
return fmt.Errorf("no plugin named %v exists", name)
}
delete(r, name)
return nil
}
// Merge merges the provided registry to the current one.
func (r Registry) Merge(in Registry) error {
for name, factory := range in {
if err := r.Register(name, factory); err != nil {
return err
}
}
return nil
}

View File

@ -1,164 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"fmt"
"sync"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
)
// waitingPodsMap a thread-safe map used to maintain pods waiting in the permit phase.
type waitingPodsMap struct {
pods map[types.UID]*waitingPod
mu sync.RWMutex
}
// newWaitingPodsMap returns a new waitingPodsMap.
func newWaitingPodsMap() *waitingPodsMap {
return &waitingPodsMap{
pods: make(map[types.UID]*waitingPod),
}
}
// add a new WaitingPod to the map.
func (m *waitingPodsMap) add(wp *waitingPod) {
m.mu.Lock()
defer m.mu.Unlock()
m.pods[wp.GetPod().UID] = wp
}
// remove a WaitingPod from the map.
func (m *waitingPodsMap) remove(uid types.UID) {
m.mu.Lock()
defer m.mu.Unlock()
delete(m.pods, uid)
}
// get a WaitingPod from the map.
func (m *waitingPodsMap) get(uid types.UID) *waitingPod {
m.mu.RLock()
defer m.mu.RUnlock()
return m.pods[uid]
}
// iterate acquires a read lock and iterates over the WaitingPods map.
func (m *waitingPodsMap) iterate(callback func(WaitingPod)) {
m.mu.RLock()
defer m.mu.RUnlock()
for _, v := range m.pods {
callback(v)
}
}
// waitingPod represents a pod waiting in the permit phase.
type waitingPod struct {
pod *v1.Pod
pendingPlugins map[string]*time.Timer
s chan *Status
mu sync.RWMutex
}
var _ WaitingPod = &waitingPod{}
// newWaitingPod returns a new waitingPod instance.
func newWaitingPod(pod *v1.Pod, pluginsMaxWaitTime map[string]time.Duration) *waitingPod {
wp := &waitingPod{
pod: pod,
// Allow() and Reject() calls are non-blocking. This property is guaranteed
// by using non-blocking send to this channel. This channel has a buffer of size 1
// to ensure that non-blocking send will not be ignored - possible situation when
// receiving from this channel happens after non-blocking send.
s: make(chan *Status, 1),
}
wp.pendingPlugins = make(map[string]*time.Timer, len(pluginsMaxWaitTime))
// The time.AfterFunc calls wp.Reject which iterates through pendingPlugins map. Acquire the
// lock here so that time.AfterFunc can only execute after newWaitingPod finishes.
wp.mu.Lock()
defer wp.mu.Unlock()
for k, v := range pluginsMaxWaitTime {
plugin, waitTime := k, v
wp.pendingPlugins[plugin] = time.AfterFunc(waitTime, func() {
msg := fmt.Sprintf("rejected due to timeout after waiting %v at plugin %v",
waitTime, plugin)
wp.Reject(msg)
})
}
return wp
}
// GetPod returns a reference to the waiting pod.
func (w *waitingPod) GetPod() *v1.Pod {
return w.pod
}
// GetPendingPlugins returns a list of pending permit plugin's name.
func (w *waitingPod) GetPendingPlugins() []string {
w.mu.RLock()
defer w.mu.RUnlock()
plugins := make([]string, 0, len(w.pendingPlugins))
for p := range w.pendingPlugins {
plugins = append(plugins, p)
}
return plugins
}
// Allow declares the waiting pod is allowed to be scheduled by plugin pluginName.
// If this is the last remaining plugin to allow, then a success signal is delivered
// to unblock the pod.
func (w *waitingPod) Allow(pluginName string) {
w.mu.Lock()
defer w.mu.Unlock()
if timer, exist := w.pendingPlugins[pluginName]; exist {
timer.Stop()
delete(w.pendingPlugins, pluginName)
}
// Only signal success status after all plugins have allowed
if len(w.pendingPlugins) != 0 {
return
}
// The select clause works as a non-blocking send.
// If there is no receiver, it's a no-op (default case).
select {
case w.s <- NewStatus(Success, ""):
default:
}
}
// Reject declares the waiting pod unschedulable.
func (w *waitingPod) Reject(msg string) {
w.mu.RLock()
defer w.mu.RUnlock()
for _, timer := range w.pendingPlugins {
timer.Stop()
}
// The select clause works as a non-blocking send.
// If there is no receiver, it's a no-op (default case).
select {
case w.s <- NewStatus(Unschedulable, msg):
default:
}
}

View File

@ -1,31 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["listers.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/listers",
visibility = ["//visibility:public"],
deps = [
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/scheduler/listers/fake:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -1,76 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package listers
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
v1listers "k8s.io/client-go/listers/core/v1"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// PodFilter is a function to filter a pod. If pod passed return true else return false.
type PodFilter func(*v1.Pod) bool
// PodLister interface represents anything that can list pods for a scheduler.
type PodLister interface {
// Returns the list of pods.
List(labels.Selector) ([]*v1.Pod, error)
// This is similar to "List()", but the returned slice does not
// contain pods that don't pass `podFilter`.
FilteredList(podFilter PodFilter, selector labels.Selector) ([]*v1.Pod, error)
}
// NodeInfoLister interface represents anything that can list/get NodeInfo objects from node name.
type NodeInfoLister interface {
// Returns the list of NodeInfos.
List() ([]*schedulernodeinfo.NodeInfo, error)
// Returns the list of NodeInfos of nodes with pods with affinity terms.
HavePodsWithAffinityList() ([]*schedulernodeinfo.NodeInfo, error)
// Returns the NodeInfo of the given node name.
Get(nodeName string) (*schedulernodeinfo.NodeInfo, error)
}
// SharedLister groups scheduler-specific listers.
type SharedLister interface {
Pods() PodLister
NodeInfos() NodeInfoLister
}
// GetPodServices gets the services that have the selector that match the labels on the given pod.
// TODO: this should be moved to ServiceAffinity plugin once that plugin is ready.
func GetPodServices(serviceLister v1listers.ServiceLister, pod *v1.Pod) ([]*v1.Service, error) {
allServices, err := serviceLister.Services(pod.Namespace).List(labels.Everything())
if err != nil {
return nil, err
}
var services []*v1.Service
for i := range allServices {
service := allServices[i]
if service.Spec.Selector == nil {
// services with nil selectors match nothing, not everything.
continue
}
selector := labels.Set(service.Spec.Selector).AsSelectorPreValidated()
if selector.Matches(labels.Set(pod.Labels)) {
services = append(services, service)
}
}
return services, nil
}

View File

@ -1,36 +0,0 @@
package(default_visibility = ["//visibility:public"])
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"metric_recorder.go",
"metrics.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/metrics",
deps = [
"//pkg/controller/volume/scheduling/metrics:go_default_library",
"//staging/src/k8s.io/component-base/metrics:go_default_library",
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)
go_test(
name = "go_default_test",
srcs = ["metric_recorder_test.go"],
embed = [":go_default_library"],
)

View File

@ -1,72 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"k8s.io/component-base/metrics"
)
// MetricRecorder represents a metric recorder which takes action when the
// metric Inc(), Dec() and Clear()
type MetricRecorder interface {
Inc()
Dec()
Clear()
}
var _ MetricRecorder = &PendingPodsRecorder{}
// PendingPodsRecorder is an implementation of MetricRecorder
type PendingPodsRecorder struct {
recorder metrics.GaugeMetric
}
// NewActivePodsRecorder returns ActivePods in a Prometheus metric fashion
func NewActivePodsRecorder() *PendingPodsRecorder {
return &PendingPodsRecorder{
recorder: ActivePods(),
}
}
// NewUnschedulablePodsRecorder returns UnschedulablePods in a Prometheus metric fashion
func NewUnschedulablePodsRecorder() *PendingPodsRecorder {
return &PendingPodsRecorder{
recorder: UnschedulablePods(),
}
}
// NewBackoffPodsRecorder returns BackoffPods in a Prometheus metric fashion
func NewBackoffPodsRecorder() *PendingPodsRecorder {
return &PendingPodsRecorder{
recorder: BackoffPods(),
}
}
// Inc increases a metric counter by 1, in an atomic way
func (r *PendingPodsRecorder) Inc() {
r.recorder.Inc()
}
// Dec decreases a metric counter by 1, in an atomic way
func (r *PendingPodsRecorder) Dec() {
r.recorder.Dec()
}
// Clear set a metric counter to 0, in an atomic way
func (r *PendingPodsRecorder) Clear() {
r.recorder.Set(float64(0))
}

View File

@ -1,302 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"time"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
volumeschedulingmetrics "k8s.io/kubernetes/pkg/controller/volume/scheduling/metrics"
)
const (
// SchedulerSubsystem - subsystem name used by scheduler
SchedulerSubsystem = "scheduler"
// DeprecatedSchedulingDurationName - scheduler duration metric name which is deprecated
DeprecatedSchedulingDurationName = "scheduling_duration_seconds"
// OperationLabel - operation label name
OperationLabel = "operation"
// Below are possible values for the operation label. Each represents a substep of e2e scheduling:
// PredicateEvaluation - predicate evaluation operation label value
PredicateEvaluation = "predicate_evaluation"
// PriorityEvaluation - priority evaluation operation label value
PriorityEvaluation = "priority_evaluation"
// PreemptionEvaluation - preemption evaluation operation label value (occurs in case of scheduling fitError).
PreemptionEvaluation = "preemption_evaluation"
// Binding - binding operation label value
Binding = "binding"
// E2eScheduling - e2e scheduling operation label value
)
// All the histogram based metrics have 1ms as size for the smallest bucket.
var (
scheduleAttempts = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "schedule_attempts_total",
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
StabilityLevel: metrics.ALPHA,
}, []string{"result"})
// PodScheduleSuccesses counts how many pods were scheduled.
// This metric will be initialized again in Register() to assure the metric is not no-op metric.
PodScheduleSuccesses = scheduleAttempts.With(metrics.Labels{"result": "scheduled"})
// PodScheduleFailures counts how many pods could not be scheduled.
// This metric will be initialized again in Register() to assure the metric is not no-op metric.
PodScheduleFailures = scheduleAttempts.With(metrics.Labels{"result": "unschedulable"})
// PodScheduleErrors counts how many pods could not be scheduled due to a scheduler error.
// This metric will be initialized again in Register() to assure the metric is not no-op metric.
PodScheduleErrors = scheduleAttempts.With(metrics.Labels{"result": "error"})
DeprecatedSchedulingDuration = metrics.NewSummaryVec(
&metrics.SummaryOpts{
Subsystem: SchedulerSubsystem,
Name: DeprecatedSchedulingDurationName,
Help: "Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour,
StabilityLevel: metrics.ALPHA,
DeprecatedVersion: "1.19.0",
},
[]string{OperationLabel},
)
E2eSchedulingLatency = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_duration_seconds",
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
},
)
SchedulingAlgorithmLatency = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_duration_seconds",
Help: "Scheduling algorithm latency in seconds",
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
},
)
DeprecatedSchedulingAlgorithmPredicateEvaluationSecondsDuration = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation_seconds",
Help: "Scheduling algorithm predicate evaluation duration in seconds",
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
DeprecatedVersion: "1.19.0",
},
)
DeprecatedSchedulingAlgorithmPriorityEvaluationSecondsDuration = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation_seconds",
Help: "Scheduling algorithm priority evaluation duration in seconds",
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
DeprecatedVersion: "1.19.0",
},
)
SchedulingAlgorithmPreemptionEvaluationDuration = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation_seconds",
Help: "Scheduling algorithm preemption evaluation duration in seconds",
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
},
)
BindingLatency = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "binding_duration_seconds",
Help: "Binding latency in seconds",
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
},
)
PreemptionVictims = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "pod_preemption_victims",
Help: "Number of selected preemption victims",
// we think #victims>50 is pretty rare, therefore [50, +Inf) is considered a single bucket.
Buckets: metrics.LinearBuckets(5, 5, 10),
StabilityLevel: metrics.ALPHA,
})
PreemptionAttempts = metrics.NewCounter(
&metrics.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "total_preemption_attempts",
Help: "Total preemption attempts in the cluster till now",
StabilityLevel: metrics.ALPHA,
})
pendingPods = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: SchedulerSubsystem,
Name: "pending_pods",
Help: "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.",
StabilityLevel: metrics.ALPHA,
}, []string{"queue"})
SchedulerGoroutines = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduler_goroutines",
Help: "Number of running goroutines split by the work they do such as binding.",
StabilityLevel: metrics.ALPHA,
}, []string{"work"})
PodSchedulingDuration = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "pod_scheduling_duration_seconds",
Help: "E2e latency for a pod being scheduled which may include multiple scheduling attempts.",
// Start with 1ms with the last bucket being [~16s, Inf)
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
})
PodSchedulingAttempts = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "pod_scheduling_attempts",
Help: "Number of attempts to successfully schedule a pod.",
Buckets: metrics.ExponentialBuckets(1, 2, 5),
StabilityLevel: metrics.ALPHA,
})
FrameworkExtensionPointDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "framework_extension_point_duration_seconds",
Help: "Latency for running all plugins of a specific extension point.",
// Start with 0.1ms with the last bucket being [~200ms, Inf)
Buckets: metrics.ExponentialBuckets(0.0001, 2, 12),
StabilityLevel: metrics.ALPHA,
},
[]string{"extension_point", "status"})
PluginExecutionDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "plugin_execution_duration_seconds",
Help: "Duration for running a plugin at a specific extension point.",
// Start with 0.01ms with the last bucket being [~22ms, Inf). We use a small factor (1.5)
// so that we have better granularity since plugin latency is very sensitive.
Buckets: metrics.ExponentialBuckets(0.00001, 1.5, 20),
StabilityLevel: metrics.ALPHA,
},
[]string{"plugin", "extension_point", "status"})
SchedulerQueueIncomingPods = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "queue_incoming_pods_total",
Help: "Number of pods added to scheduling queues by event and queue type.",
StabilityLevel: metrics.ALPHA,
}, []string{"queue", "event"})
PermitWaitDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "permit_wait_duration_seconds",
Help: "Duration of waiting on permit.",
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
},
[]string{"result"})
CacheSize = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduler_cache_size",
Help: "Number of nodes, pods, and assumed (bound) pods in the scheduler cache.",
StabilityLevel: metrics.ALPHA,
}, []string{"type"})
metricsList = []metrics.Registerable{
scheduleAttempts,
DeprecatedSchedulingDuration,
E2eSchedulingLatency,
SchedulingAlgorithmLatency,
BindingLatency,
DeprecatedSchedulingAlgorithmPredicateEvaluationSecondsDuration,
DeprecatedSchedulingAlgorithmPriorityEvaluationSecondsDuration,
SchedulingAlgorithmPreemptionEvaluationDuration,
PreemptionVictims,
PreemptionAttempts,
pendingPods,
PodSchedulingDuration,
PodSchedulingAttempts,
FrameworkExtensionPointDuration,
PluginExecutionDuration,
SchedulerQueueIncomingPods,
SchedulerGoroutines,
PermitWaitDuration,
CacheSize,
}
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
for _, metric := range metricsList {
legacyregistry.MustRegister(metric)
}
volumeschedulingmetrics.RegisterVolumeSchedulingMetrics()
PodScheduleSuccesses = scheduleAttempts.With(metrics.Labels{"result": "scheduled"})
PodScheduleFailures = scheduleAttempts.With(metrics.Labels{"result": "unschedulable"})
PodScheduleErrors = scheduleAttempts.With(metrics.Labels{"result": "error"})
})
}
// GetGather returns the gatherer. It used by test case outside current package.
func GetGather() metrics.Gatherer {
return legacyregistry.DefaultGatherer
}
// ActivePods returns the pending pods metrics with the label active
func ActivePods() metrics.GaugeMetric {
return pendingPods.With(metrics.Labels{"queue": "active"})
}
// BackoffPods returns the pending pods metrics with the label backoff
func BackoffPods() metrics.GaugeMetric {
return pendingPods.With(metrics.Labels{"queue": "backoff"})
}
// UnschedulablePods returns the pending pods metrics with the label unschedulable
func UnschedulablePods() metrics.GaugeMetric {
return pendingPods.With(metrics.Labels{"queue": "unschedulable"})
}
// Reset resets metrics
func Reset() {
DeprecatedSchedulingDuration.Reset()
}
// SinceInSeconds gets the time since the specified start in seconds.
func SinceInSeconds(start time.Time) float64 {
return time.Since(start).Seconds()
}

View File

@ -1,49 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"host_ports.go",
"node_info.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/nodeinfo",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"host_ports_test.go",
"node_info_test.go",
],
embed = [":go_default_library"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -1,135 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeinfo
import (
"k8s.io/api/core/v1"
)
// DefaultBindAllHostIP defines the default ip address used to bind to all host.
const DefaultBindAllHostIP = "0.0.0.0"
// ProtocolPort represents a protocol port pair, e.g. tcp:80.
type ProtocolPort struct {
Protocol string
Port int32
}
// NewProtocolPort creates a ProtocolPort instance.
func NewProtocolPort(protocol string, port int32) *ProtocolPort {
pp := &ProtocolPort{
Protocol: protocol,
Port: port,
}
if len(pp.Protocol) == 0 {
pp.Protocol = string(v1.ProtocolTCP)
}
return pp
}
// HostPortInfo stores mapping from ip to a set of ProtocolPort
type HostPortInfo map[string]map[ProtocolPort]struct{}
// Add adds (ip, protocol, port) to HostPortInfo
func (h HostPortInfo) Add(ip, protocol string, port int32) {
if port <= 0 {
return
}
h.sanitize(&ip, &protocol)
pp := NewProtocolPort(protocol, port)
if _, ok := h[ip]; !ok {
h[ip] = map[ProtocolPort]struct{}{
*pp: {},
}
return
}
h[ip][*pp] = struct{}{}
}
// Remove removes (ip, protocol, port) from HostPortInfo
func (h HostPortInfo) Remove(ip, protocol string, port int32) {
if port <= 0 {
return
}
h.sanitize(&ip, &protocol)
pp := NewProtocolPort(protocol, port)
if m, ok := h[ip]; ok {
delete(m, *pp)
if len(h[ip]) == 0 {
delete(h, ip)
}
}
}
// Len returns the total number of (ip, protocol, port) tuple in HostPortInfo
func (h HostPortInfo) Len() int {
length := 0
for _, m := range h {
length += len(m)
}
return length
}
// CheckConflict checks if the input (ip, protocol, port) conflicts with the existing
// ones in HostPortInfo.
func (h HostPortInfo) CheckConflict(ip, protocol string, port int32) bool {
if port <= 0 {
return false
}
h.sanitize(&ip, &protocol)
pp := NewProtocolPort(protocol, port)
// If ip is 0.0.0.0 check all IP's (protocol, port) pair
if ip == DefaultBindAllHostIP {
for _, m := range h {
if _, ok := m[*pp]; ok {
return true
}
}
return false
}
// If ip isn't 0.0.0.0, only check IP and 0.0.0.0's (protocol, port) pair
for _, key := range []string{DefaultBindAllHostIP, ip} {
if m, ok := h[key]; ok {
if _, ok2 := m[*pp]; ok2 {
return true
}
}
}
return false
}
// sanitize the parameters
func (h HostPortInfo) sanitize(ip, protocol *string) {
if len(*ip) == 0 {
*ip = DefaultBindAllHostIP
}
if len(*protocol) == 0 {
*protocol = string(v1.ProtocolTCP)
}
}

View File

@ -1,705 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeinfo
import (
"errors"
"fmt"
"sync"
"sync/atomic"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
var (
emptyResource = Resource{}
generation int64
)
// ImageStateSummary provides summarized information about the state of an image.
type ImageStateSummary struct {
// Size of the image
Size int64
// Used to track how many nodes have this image
NumNodes int
}
// NodeInfo is node level aggregated information.
type NodeInfo struct {
// Overall node information.
node *v1.Node
pods []*v1.Pod
podsWithAffinity []*v1.Pod
usedPorts HostPortInfo
// Total requested resources of all pods on this node. This includes assumed
// pods, which scheduler has sent for binding, but may not be scheduled yet.
requestedResource *Resource
// Total requested resources of all pods on this node with a minimum value
// applied to each container's CPU and memory requests. This does not reflect
// the actual resource requests for this node, but is used to avoid scheduling
// many zero-request pods onto one node.
nonzeroRequest *Resource
// We store allocatedResources (which is Node.Status.Allocatable.*) explicitly
// as int64, to avoid conversions and accessing map.
allocatableResource *Resource
// Cached taints of the node for faster lookup.
taints []v1.Taint
taintsErr error
// imageStates holds the entry of an image if and only if this image is on the node. The entry can be used for
// checking an image's existence and advanced usage (e.g., image locality scheduling policy) based on the image
// state information.
imageStates map[string]*ImageStateSummary
// TransientInfo holds the information pertaining to a scheduling cycle. This will be destructed at the end of
// scheduling cycle.
// TODO: @ravig. Remove this once we have a clear approach for message passing across predicates and priorities.
TransientInfo *TransientSchedulerInfo
// Cached conditions of node for faster lookup.
memoryPressureCondition v1.ConditionStatus
diskPressureCondition v1.ConditionStatus
pidPressureCondition v1.ConditionStatus
// Whenever NodeInfo changes, generation is bumped.
// This is used to avoid cloning it if the object didn't change.
generation int64
}
//initializeNodeTransientInfo initializes transient information pertaining to node.
func initializeNodeTransientInfo() nodeTransientInfo {
return nodeTransientInfo{AllocatableVolumesCount: 0, RequestedVolumes: 0}
}
// nextGeneration: Let's make sure history never forgets the name...
// Increments the generation number monotonically ensuring that generation numbers never collide.
// Collision of the generation numbers would be particularly problematic if a node was deleted and
// added back with the same name. See issue#63262.
func nextGeneration() int64 {
return atomic.AddInt64(&generation, 1)
}
// nodeTransientInfo contains transient node information while scheduling.
type nodeTransientInfo struct {
// AllocatableVolumesCount contains number of volumes that could be attached to node.
AllocatableVolumesCount int
// Requested number of volumes on a particular node.
RequestedVolumes int
}
// TransientSchedulerInfo is a transient structure which is destructed at the end of each scheduling cycle.
// It consists of items that are valid for a scheduling cycle and is used for message passing across predicates and
// priorities. Some examples which could be used as fields are number of volumes being used on node, current utilization
// on node etc.
// IMPORTANT NOTE: Make sure that each field in this structure is documented along with usage. Expand this structure
// only when absolutely needed as this data structure will be created and destroyed during every scheduling cycle.
type TransientSchedulerInfo struct {
TransientLock sync.Mutex
// NodeTransInfo holds the information related to nodeTransientInformation. NodeName is the key here.
TransNodeInfo nodeTransientInfo
}
// NewTransientSchedulerInfo returns a new scheduler transient structure with initialized values.
func NewTransientSchedulerInfo() *TransientSchedulerInfo {
tsi := &TransientSchedulerInfo{
TransNodeInfo: initializeNodeTransientInfo(),
}
return tsi
}
// ResetTransientSchedulerInfo resets the TransientSchedulerInfo.
func (transientSchedInfo *TransientSchedulerInfo) ResetTransientSchedulerInfo() {
transientSchedInfo.TransientLock.Lock()
defer transientSchedInfo.TransientLock.Unlock()
// Reset TransientNodeInfo.
transientSchedInfo.TransNodeInfo.AllocatableVolumesCount = 0
transientSchedInfo.TransNodeInfo.RequestedVolumes = 0
}
// Resource is a collection of compute resource.
type Resource struct {
MilliCPU int64
Memory int64
EphemeralStorage int64
// We store allowedPodNumber (which is Node.Status.Allocatable.Pods().Value())
// explicitly as int, to avoid conversions and improve performance.
AllowedPodNumber int
// ScalarResources
ScalarResources map[v1.ResourceName]int64
}
// NewResource creates a Resource from ResourceList
func NewResource(rl v1.ResourceList) *Resource {
r := &Resource{}
r.Add(rl)
return r
}
// Add adds ResourceList into Resource.
func (r *Resource) Add(rl v1.ResourceList) {
if r == nil {
return
}
for rName, rQuant := range rl {
switch rName {
case v1.ResourceCPU:
r.MilliCPU += rQuant.MilliValue()
case v1.ResourceMemory:
r.Memory += rQuant.Value()
case v1.ResourcePods:
r.AllowedPodNumber += int(rQuant.Value())
case v1.ResourceEphemeralStorage:
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk.
r.EphemeralStorage += rQuant.Value()
}
default:
if v1helper.IsScalarResourceName(rName) {
r.AddScalar(rName, rQuant.Value())
}
}
}
}
// ResourceList returns a resource list of this resource.
func (r *Resource) ResourceList() v1.ResourceList {
result := v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(r.MilliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(r.Memory, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(int64(r.AllowedPodNumber), resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(r.EphemeralStorage, resource.BinarySI),
}
for rName, rQuant := range r.ScalarResources {
if v1helper.IsHugePageResourceName(rName) {
result[rName] = *resource.NewQuantity(rQuant, resource.BinarySI)
} else {
result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI)
}
}
return result
}
// Clone returns a copy of this resource.
func (r *Resource) Clone() *Resource {
res := &Resource{
MilliCPU: r.MilliCPU,
Memory: r.Memory,
AllowedPodNumber: r.AllowedPodNumber,
EphemeralStorage: r.EphemeralStorage,
}
if r.ScalarResources != nil {
res.ScalarResources = make(map[v1.ResourceName]int64)
for k, v := range r.ScalarResources {
res.ScalarResources[k] = v
}
}
return res
}
// AddScalar adds a resource by a scalar value of this resource.
func (r *Resource) AddScalar(name v1.ResourceName, quantity int64) {
r.SetScalar(name, r.ScalarResources[name]+quantity)
}
// SetScalar sets a resource by a scalar value of this resource.
func (r *Resource) SetScalar(name v1.ResourceName, quantity int64) {
// Lazily allocate scalar resource map.
if r.ScalarResources == nil {
r.ScalarResources = map[v1.ResourceName]int64{}
}
r.ScalarResources[name] = quantity
}
// SetMaxResource compares with ResourceList and takes max value for each Resource.
func (r *Resource) SetMaxResource(rl v1.ResourceList) {
if r == nil {
return
}
for rName, rQuantity := range rl {
switch rName {
case v1.ResourceMemory:
if mem := rQuantity.Value(); mem > r.Memory {
r.Memory = mem
}
case v1.ResourceCPU:
if cpu := rQuantity.MilliValue(); cpu > r.MilliCPU {
r.MilliCPU = cpu
}
case v1.ResourceEphemeralStorage:
if ephemeralStorage := rQuantity.Value(); ephemeralStorage > r.EphemeralStorage {
r.EphemeralStorage = ephemeralStorage
}
default:
if v1helper.IsScalarResourceName(rName) {
value := rQuantity.Value()
if value > r.ScalarResources[rName] {
r.SetScalar(rName, value)
}
}
}
}
}
// NewNodeInfo returns a ready to use empty NodeInfo object.
// If any pods are given in arguments, their information will be aggregated in
// the returned object.
func NewNodeInfo(pods ...*v1.Pod) *NodeInfo {
ni := &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
allocatableResource: &Resource{},
TransientInfo: NewTransientSchedulerInfo(),
generation: nextGeneration(),
usedPorts: make(HostPortInfo),
imageStates: make(map[string]*ImageStateSummary),
}
for _, pod := range pods {
ni.AddPod(pod)
}
return ni
}
// Node returns overall information about this node.
func (n *NodeInfo) Node() *v1.Node {
if n == nil {
return nil
}
return n.node
}
// Pods return all pods scheduled (including assumed to be) on this node.
func (n *NodeInfo) Pods() []*v1.Pod {
if n == nil {
return nil
}
return n.pods
}
// SetPods sets all pods scheduled (including assumed to be) on this node.
func (n *NodeInfo) SetPods(pods []*v1.Pod) {
n.pods = pods
}
// UsedPorts returns used ports on this node.
func (n *NodeInfo) UsedPorts() HostPortInfo {
if n == nil {
return nil
}
return n.usedPorts
}
// SetUsedPorts sets the used ports on this node.
func (n *NodeInfo) SetUsedPorts(newUsedPorts HostPortInfo) {
n.usedPorts = newUsedPorts
}
// ImageStates returns the state information of all images.
func (n *NodeInfo) ImageStates() map[string]*ImageStateSummary {
if n == nil {
return nil
}
return n.imageStates
}
// SetImageStates sets the state information of all images.
func (n *NodeInfo) SetImageStates(newImageStates map[string]*ImageStateSummary) {
n.imageStates = newImageStates
}
// PodsWithAffinity return all pods with (anti)affinity constraints on this node.
func (n *NodeInfo) PodsWithAffinity() []*v1.Pod {
if n == nil {
return nil
}
return n.podsWithAffinity
}
// AllowedPodNumber returns the number of the allowed pods on this node.
func (n *NodeInfo) AllowedPodNumber() int {
if n == nil || n.allocatableResource == nil {
return 0
}
return n.allocatableResource.AllowedPodNumber
}
// Taints returns the taints list on this node.
func (n *NodeInfo) Taints() ([]v1.Taint, error) {
if n == nil {
return nil, nil
}
return n.taints, n.taintsErr
}
// SetTaints sets the taints list on this node.
func (n *NodeInfo) SetTaints(newTaints []v1.Taint) {
n.taints = newTaints
}
// RequestedResource returns aggregated resource request of pods on this node.
func (n *NodeInfo) RequestedResource() Resource {
if n == nil {
return emptyResource
}
return *n.requestedResource
}
// SetRequestedResource sets the aggregated resource request of pods on this node.
func (n *NodeInfo) SetRequestedResource(newResource *Resource) {
n.requestedResource = newResource
}
// NonZeroRequest returns aggregated nonzero resource request of pods on this node.
func (n *NodeInfo) NonZeroRequest() Resource {
if n == nil {
return emptyResource
}
return *n.nonzeroRequest
}
// SetNonZeroRequest sets the aggregated nonzero resource request of pods on this node.
func (n *NodeInfo) SetNonZeroRequest(newResource *Resource) {
n.nonzeroRequest = newResource
}
// AllocatableResource returns allocatable resources on a given node.
func (n *NodeInfo) AllocatableResource() Resource {
if n == nil {
return emptyResource
}
return *n.allocatableResource
}
// SetAllocatableResource sets the allocatableResource information of given node.
func (n *NodeInfo) SetAllocatableResource(allocatableResource *Resource) {
n.allocatableResource = allocatableResource
n.generation = nextGeneration()
}
// GetGeneration returns the generation on this node.
func (n *NodeInfo) GetGeneration() int64 {
if n == nil {
return 0
}
return n.generation
}
// SetGeneration sets the generation on this node. This is for testing only.
func (n *NodeInfo) SetGeneration(newGeneration int64) {
n.generation = newGeneration
}
// Clone returns a copy of this node.
func (n *NodeInfo) Clone() *NodeInfo {
clone := &NodeInfo{
node: n.node,
requestedResource: n.requestedResource.Clone(),
nonzeroRequest: n.nonzeroRequest.Clone(),
allocatableResource: n.allocatableResource.Clone(),
taintsErr: n.taintsErr,
TransientInfo: n.TransientInfo,
memoryPressureCondition: n.memoryPressureCondition,
diskPressureCondition: n.diskPressureCondition,
pidPressureCondition: n.pidPressureCondition,
usedPorts: make(HostPortInfo),
imageStates: n.imageStates,
generation: n.generation,
}
if len(n.pods) > 0 {
clone.pods = append([]*v1.Pod(nil), n.pods...)
}
if len(n.usedPorts) > 0 {
// HostPortInfo is a map-in-map struct
// make sure it's deep copied
for ip, portMap := range n.usedPorts {
clone.usedPorts[ip] = make(map[ProtocolPort]struct{})
for protocolPort, v := range portMap {
clone.usedPorts[ip][protocolPort] = v
}
}
}
if len(n.podsWithAffinity) > 0 {
clone.podsWithAffinity = append([]*v1.Pod(nil), n.podsWithAffinity...)
}
if len(n.taints) > 0 {
clone.taints = append([]v1.Taint(nil), n.taints...)
}
return clone
}
// VolumeLimits returns volume limits associated with the node
func (n *NodeInfo) VolumeLimits() map[v1.ResourceName]int64 {
volumeLimits := map[v1.ResourceName]int64{}
for k, v := range n.AllocatableResource().ScalarResources {
if v1helper.IsAttachableVolumeResourceName(k) {
volumeLimits[k] = v
}
}
return volumeLimits
}
// String returns representation of human readable format of this NodeInfo.
func (n *NodeInfo) String() string {
podKeys := make([]string, len(n.pods))
for i, pod := range n.pods {
podKeys[i] = pod.Name
}
return fmt.Sprintf("&NodeInfo{Pods:%v, RequestedResource:%#v, NonZeroRequest: %#v, UsedPort: %#v, AllocatableResource:%#v}",
podKeys, n.requestedResource, n.nonzeroRequest, n.usedPorts, n.allocatableResource)
}
func hasPodAffinityConstraints(pod *v1.Pod) bool {
affinity := pod.Spec.Affinity
return affinity != nil && (affinity.PodAffinity != nil || affinity.PodAntiAffinity != nil)
}
// AddPod adds pod information to this NodeInfo.
func (n *NodeInfo) AddPod(pod *v1.Pod) {
res, non0CPU, non0Mem := calculateResource(pod)
n.requestedResource.MilliCPU += res.MilliCPU
n.requestedResource.Memory += res.Memory
n.requestedResource.EphemeralStorage += res.EphemeralStorage
if n.requestedResource.ScalarResources == nil && len(res.ScalarResources) > 0 {
n.requestedResource.ScalarResources = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.ScalarResources {
n.requestedResource.ScalarResources[rName] += rQuant
}
n.nonzeroRequest.MilliCPU += non0CPU
n.nonzeroRequest.Memory += non0Mem
n.pods = append(n.pods, pod)
if hasPodAffinityConstraints(pod) {
n.podsWithAffinity = append(n.podsWithAffinity, pod)
}
// Consume ports when pods added.
n.UpdateUsedPorts(pod, true)
n.generation = nextGeneration()
}
// RemovePod subtracts pod information from this NodeInfo.
func (n *NodeInfo) RemovePod(pod *v1.Pod) error {
k1, err := GetPodKey(pod)
if err != nil {
return err
}
for i := range n.podsWithAffinity {
k2, err := GetPodKey(n.podsWithAffinity[i])
if err != nil {
klog.Errorf("Cannot get pod key, err: %v", err)
continue
}
if k1 == k2 {
// delete the element
n.podsWithAffinity[i] = n.podsWithAffinity[len(n.podsWithAffinity)-1]
n.podsWithAffinity = n.podsWithAffinity[:len(n.podsWithAffinity)-1]
break
}
}
for i := range n.pods {
k2, err := GetPodKey(n.pods[i])
if err != nil {
klog.Errorf("Cannot get pod key, err: %v", err)
continue
}
if k1 == k2 {
// delete the element
n.pods[i] = n.pods[len(n.pods)-1]
n.pods = n.pods[:len(n.pods)-1]
// reduce the resource data
res, non0CPU, non0Mem := calculateResource(pod)
n.requestedResource.MilliCPU -= res.MilliCPU
n.requestedResource.Memory -= res.Memory
n.requestedResource.EphemeralStorage -= res.EphemeralStorage
if len(res.ScalarResources) > 0 && n.requestedResource.ScalarResources == nil {
n.requestedResource.ScalarResources = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.ScalarResources {
n.requestedResource.ScalarResources[rName] -= rQuant
}
n.nonzeroRequest.MilliCPU -= non0CPU
n.nonzeroRequest.Memory -= non0Mem
// Release ports when remove Pods.
n.UpdateUsedPorts(pod, false)
n.generation = nextGeneration()
n.resetSlicesIfEmpty()
return nil
}
}
return fmt.Errorf("no corresponding pod %s in pods of node %s", pod.Name, n.node.Name)
}
// resets the slices to nil so that we can do DeepEqual in unit tests.
func (n *NodeInfo) resetSlicesIfEmpty() {
if len(n.podsWithAffinity) == 0 {
n.podsWithAffinity = nil
}
if len(n.pods) == 0 {
n.pods = nil
}
}
// resourceRequest = max(sum(podSpec.Containers), podSpec.InitContainers) + overHead
func calculateResource(pod *v1.Pod) (res Resource, non0CPU int64, non0Mem int64) {
resPtr := &res
for _, c := range pod.Spec.Containers {
resPtr.Add(c.Resources.Requests)
non0CPUReq, non0MemReq := schedutil.GetNonzeroRequests(&c.Resources.Requests)
non0CPU += non0CPUReq
non0Mem += non0MemReq
// No non-zero resources for GPUs or opaque resources.
}
for _, ic := range pod.Spec.InitContainers {
resPtr.SetMaxResource(ic.Resources.Requests)
non0CPUReq, non0MemReq := schedutil.GetNonzeroRequests(&ic.Resources.Requests)
if non0CPU < non0CPUReq {
non0CPU = non0CPUReq
}
if non0Mem < non0MemReq {
non0Mem = non0MemReq
}
}
// If Overhead is being utilized, add to the total requests for the pod
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodOverhead) {
resPtr.Add(pod.Spec.Overhead)
if _, found := pod.Spec.Overhead[v1.ResourceCPU]; found {
non0CPU += pod.Spec.Overhead.Cpu().MilliValue()
}
if _, found := pod.Spec.Overhead[v1.ResourceMemory]; found {
non0Mem += pod.Spec.Overhead.Memory().Value()
}
}
return
}
// UpdateUsedPorts updates the UsedPorts of NodeInfo.
func (n *NodeInfo) UpdateUsedPorts(pod *v1.Pod, add bool) {
for j := range pod.Spec.Containers {
container := &pod.Spec.Containers[j]
for k := range container.Ports {
podPort := &container.Ports[k]
if add {
n.usedPorts.Add(podPort.HostIP, string(podPort.Protocol), podPort.HostPort)
} else {
n.usedPorts.Remove(podPort.HostIP, string(podPort.Protocol), podPort.HostPort)
}
}
}
}
// SetNode sets the overall node information.
func (n *NodeInfo) SetNode(node *v1.Node) error {
n.node = node
n.allocatableResource = NewResource(node.Status.Allocatable)
n.taints = node.Spec.Taints
for i := range node.Status.Conditions {
cond := &node.Status.Conditions[i]
switch cond.Type {
case v1.NodeMemoryPressure:
n.memoryPressureCondition = cond.Status
case v1.NodeDiskPressure:
n.diskPressureCondition = cond.Status
case v1.NodePIDPressure:
n.pidPressureCondition = cond.Status
default:
// We ignore other conditions.
}
}
n.TransientInfo = NewTransientSchedulerInfo()
n.generation = nextGeneration()
return nil
}
// FilterOutPods receives a list of pods and filters out those whose node names
// are equal to the node of this NodeInfo, but are not found in the pods of this NodeInfo.
//
// Preemption logic simulates removal of pods on a node by removing them from the
// corresponding NodeInfo. In order for the simulation to work, we call this method
// on the pods returned from SchedulerCache, so that predicate functions see
// only the pods that are not removed from the NodeInfo.
func (n *NodeInfo) FilterOutPods(pods []*v1.Pod) []*v1.Pod {
node := n.Node()
if node == nil {
return pods
}
filtered := make([]*v1.Pod, 0, len(pods))
for _, p := range pods {
if p.Spec.NodeName != node.Name {
filtered = append(filtered, p)
continue
}
// If pod is on the given node, add it to 'filtered' only if it is present in nodeInfo.
podKey, err := GetPodKey(p)
if err != nil {
continue
}
for _, np := range n.Pods() {
npodkey, _ := GetPodKey(np)
if npodkey == podKey {
filtered = append(filtered, p)
break
}
}
}
return filtered
}
// GetPodKey returns the string key of a pod.
func GetPodKey(pod *v1.Pod) (string, error) {
uid := string(pod.UID)
if len(uid) == 0 {
return "", errors.New("Cannot get cache key for pod with empty UID")
}
return uid, nil
}
// Filter implements PodFilter interface. It returns false only if the pod node name
// matches NodeInfo.node and the pod is not found in the pods list. Otherwise,
// returns true.
func (n *NodeInfo) Filter(pod *v1.Pod) bool {
if pod.Spec.NodeName != n.node.Name {
return true
}
for _, p := range n.pods {
if p.Name == pod.Name && p.Namespace == pod.Namespace {
return true
}
}
return false
}

View File

@ -1,65 +0,0 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = [
"error_channel_test.go",
"non_zero_test.go",
"topologies_test.go",
"utils_test.go",
],
embed = [":go_default_library"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/selection:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/kube-scheduler/extender/v1:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"clock.go",
"error_channel.go",
"non_zero.go",
"topologies.go",
"utils.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/util",
deps = [
"//pkg/api/v1/pod:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/kube-scheduler/extender/v1:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -1,34 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"time"
)
// Clock provides an interface for getting the current time
type Clock interface {
Now() time.Time
}
// RealClock implements a clock using time
type RealClock struct{}
// Now returns the current time with time.Now
func (RealClock) Now() time.Time {
return time.Now()
}

View File

@ -1,59 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import "context"
// ErrorChannel supports non-blocking send and receive operation to capture error.
// A maximum of one error is kept in the channel and the rest of the errors sent
// are ignored, unless the existing error is received and the channel becomes empty
// again.
type ErrorChannel struct {
errCh chan error
}
// SendError sends an error without blocking the sender.
func (e *ErrorChannel) SendError(err error) {
select {
case e.errCh <- err:
default:
}
}
// SendErrorWithCancel sends an error without blocking the sender and calls
// cancel function.
func (e *ErrorChannel) SendErrorWithCancel(err error, cancel context.CancelFunc) {
e.SendError(err)
cancel()
}
// ReceiveError receives an error from channel without blocking on the receiver.
func (e *ErrorChannel) ReceiveError() error {
select {
case err := <-e.errCh:
return err
default:
return nil
}
}
// NewErrorChannel returns a new ErrorChannel.
func NewErrorChannel() *ErrorChannel {
return &ErrorChannel{
errCh: make(chan error, 1),
}
}

View File

@ -1,85 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
)
// For each of these resources, a pod that doesn't request the resource explicitly
// will be treated as having requested the amount indicated below, for the purpose
// of computing priority only. This ensures that when scheduling zero-request pods, such
// pods will not all be scheduled to the machine with the smallest in-use request,
// and that when scheduling regular pods, such pods will not see zero-request pods as
// consuming no resources whatsoever. We chose these values to be similar to the
// resources that we give to cluster addon pods (#10653). But they are pretty arbitrary.
// As described in #11713, we use request instead of limit to deal with resource requirements.
const (
// DefaultMilliCPURequest defines default milli cpu request number.
DefaultMilliCPURequest int64 = 100 // 0.1 core
// DefaultMemoryRequest defines default memory request size.
DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
)
// GetNonzeroRequests returns the default cpu and memory resource request if none is found or
// what is provided on the request.
func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
return GetNonzeroRequestForResource(v1.ResourceCPU, requests),
GetNonzeroRequestForResource(v1.ResourceMemory, requests)
}
// GetNonzeroRequestForResource returns the default resource request if none is found or
// what is provided on the request.
func GetNonzeroRequestForResource(resource v1.ResourceName, requests *v1.ResourceList) int64 {
switch resource {
case v1.ResourceCPU:
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceCPU]; !found {
return DefaultMilliCPURequest
}
return requests.Cpu().MilliValue()
case v1.ResourceMemory:
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceMemory]; !found {
return DefaultMemoryRequest
}
return requests.Memory().Value()
case v1.ResourceEphemeralStorage:
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk.
if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
return 0
}
quantity, found := (*requests)[v1.ResourceEphemeralStorage]
if !found {
return 0
}
return quantity.Value()
default:
if v1helper.IsScalarResourceName(resource) {
quantity, found := (*requests)[resource]
if !found {
return 0
}
return quantity.Value()
}
}
return 0
}

View File

@ -1,81 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
)
// GetNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.String {
names := sets.String{}
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
} else {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}
// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.String, selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}
if !selector.Matches(labels.Set(pod.Labels)) {
return false
}
return true
}
// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key.
// Returns false if topologyKey is empty.
func NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool {
if len(topologyKey) == 0 {
return false
}
if nodeA.Labels == nil || nodeB.Labels == nil {
return false
}
nodeALabel, okA := nodeA.Labels[topologyKey]
nodeBLabel, okB := nodeB.Labels[topologyKey]
// If found label in both nodes, check the label
if okB && okA {
return nodeALabel == nodeBLabel
}
return false
}
// Topologies contains topologies information of nodes.
type Topologies struct {
DefaultKeys []string
}
// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key.
func (tps *Topologies) NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool {
return NodesHaveSameTopologyKey(nodeA, nodeB, topologyKey)
}

View File

@ -1,111 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog"
extenderv1 "k8s.io/kube-scheduler/extender/v1"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
)
// GetPodFullName returns a name that uniquely identifies a pod.
func GetPodFullName(pod *v1.Pod) string {
// Use underscore as the delimiter because it is not allowed in pod name
// (DNS subdomain format).
return pod.Name + "_" + pod.Namespace
}
// GetPodStartTime returns start time of the given pod or current timestamp
// if it hasn't started yet.
func GetPodStartTime(pod *v1.Pod) *metav1.Time {
if pod.Status.StartTime != nil {
return pod.Status.StartTime
}
// Assumed pods and bound pods that haven't started don't have a StartTime yet.
return &metav1.Time{Time: time.Now()}
}
// GetEarliestPodStartTime returns the earliest start time of all pods that
// have the highest priority among all victims.
func GetEarliestPodStartTime(victims *extenderv1.Victims) *metav1.Time {
if len(victims.Pods) == 0 {
// should not reach here.
klog.Errorf("victims.Pods is empty. Should not reach here.")
return nil
}
earliestPodStartTime := GetPodStartTime(victims.Pods[0])
maxPriority := podutil.GetPodPriority(victims.Pods[0])
for _, pod := range victims.Pods {
if podutil.GetPodPriority(pod) == maxPriority {
if GetPodStartTime(pod).Before(earliestPodStartTime) {
earliestPodStartTime = GetPodStartTime(pod)
}
} else if podutil.GetPodPriority(pod) > maxPriority {
maxPriority = podutil.GetPodPriority(pod)
earliestPodStartTime = GetPodStartTime(pod)
}
}
return earliestPodStartTime
}
// MoreImportantPod return true when priority of the first pod is higher than
// the second one. If two pods' priorities are equal, compare their StartTime.
// It takes arguments of the type "interface{}" to be used with SortableList,
// but expects those arguments to be *v1.Pod.
func MoreImportantPod(pod1, pod2 *v1.Pod) bool {
p1 := podutil.GetPodPriority(pod1)
p2 := podutil.GetPodPriority(pod2)
if p1 != p2 {
return p1 > p2
}
return GetPodStartTime(pod1).Before(GetPodStartTime(pod2))
}
// GetPodAffinityTerms gets pod affinity terms by a pod affinity object.
func GetPodAffinityTerms(podAffinity *v1.PodAffinity) (terms []v1.PodAffinityTerm) {
if podAffinity != nil {
if len(podAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
terms = podAffinity.RequiredDuringSchedulingIgnoredDuringExecution
}
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
//if len(podAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
// terms = append(terms, podAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
//}
}
return terms
}
// GetPodAntiAffinityTerms gets pod affinity terms by a pod anti-affinity.
func GetPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
if podAntiAffinity != nil {
if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
terms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
}
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
//if len(podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
// terms = append(terms, podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
//}
}
return terms
}