mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-14 18:53:35 +00:00
rebase: update K8s packages to v0.32.1
Update K8s packages in go.mod to v0.32.1 Signed-off-by: Praveen M <m.praveen@ibm.com>
This commit is contained in:
8
vendor/k8s.io/kubernetes/pkg/scheduler/OWNERS
generated
vendored
Normal file
8
vendor/k8s.io/kubernetes/pkg/scheduler/OWNERS
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-scheduling-maintainers
|
||||
reviewers:
|
||||
- sig-scheduling
|
||||
labels:
|
||||
- sig/scheduling
|
11
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/OWNERS
generated
vendored
Normal file
11
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/OWNERS
generated
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- api-approvers
|
||||
reviewers:
|
||||
- api-reviewers
|
||||
- sig-scheduling-api-reviewers
|
||||
- sig-scheduling-api-approvers
|
||||
labels:
|
||||
- kind/api-change
|
||||
- sig/scheduling
|
20
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/doc.go
generated
vendored
Normal file
20
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/doc.go
generated
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// +k8s:deepcopy-gen=package
|
||||
// +groupName=kubescheduler.config.k8s.io
|
||||
|
||||
package config // import "k8s.io/kubernetes/pkg/scheduler/apis/config"
|
50
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/register.go
generated
vendored
Normal file
50
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/register.go
generated
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
)
|
||||
|
||||
// GroupName is the group name used in this package
|
||||
const GroupName = "kubescheduler.config.k8s.io"
|
||||
|
||||
// SchemeGroupVersion is group version used to register these objects
|
||||
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
|
||||
|
||||
var (
|
||||
// SchemeBuilder is the scheme builder with scheme init functions to run for this API package
|
||||
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
|
||||
// AddToScheme is a global function that registers this API group & version to a scheme
|
||||
AddToScheme = SchemeBuilder.AddToScheme
|
||||
)
|
||||
|
||||
// addKnownTypes registers known types to the given scheme
|
||||
func addKnownTypes(scheme *runtime.Scheme) error {
|
||||
scheme.AddKnownTypes(SchemeGroupVersion,
|
||||
&KubeSchedulerConfiguration{},
|
||||
&DefaultPreemptionArgs{},
|
||||
&InterPodAffinityArgs{},
|
||||
&NodeResourcesFitArgs{},
|
||||
&PodTopologySpreadArgs{},
|
||||
&VolumeBindingArgs{},
|
||||
&NodeResourcesBalancedAllocationArgs{},
|
||||
&NodeAffinityArgs{},
|
||||
)
|
||||
return nil
|
||||
}
|
46
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/scheme/scheme.go
generated
vendored
Normal file
46
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/scheme/scheme.go
generated
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheme
|
||||
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/serializer"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
config "k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
configv1 "k8s.io/kubernetes/pkg/scheduler/apis/config/v1"
|
||||
)
|
||||
|
||||
var (
|
||||
// Scheme is the runtime.Scheme to which all kubescheduler api types are registered.
|
||||
Scheme = runtime.NewScheme()
|
||||
|
||||
// Codecs provides access to encoding and decoding for the scheme.
|
||||
Codecs = serializer.NewCodecFactory(Scheme, serializer.EnableStrict)
|
||||
)
|
||||
|
||||
func init() {
|
||||
AddToScheme(Scheme)
|
||||
}
|
||||
|
||||
// AddToScheme builds the kubescheduler scheme using all known versions of the kubescheduler api.
|
||||
func AddToScheme(scheme *runtime.Scheme) {
|
||||
utilruntime.Must(config.AddToScheme(scheme))
|
||||
utilruntime.Must(configv1.AddToScheme(scheme))
|
||||
utilruntime.Must(scheme.SetVersionPriority(
|
||||
configv1.SchemeGroupVersion,
|
||||
))
|
||||
}
|
336
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/types.go
generated
vendored
Normal file
336
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/types.go
generated
vendored
Normal file
@ -0,0 +1,336 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
componentbaseconfig "k8s.io/component-base/config"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultKubeSchedulerPort is the default port for the scheduler status server.
|
||||
// May be overridden by a flag at startup.
|
||||
DefaultKubeSchedulerPort = 10259
|
||||
)
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// KubeSchedulerConfiguration configures a scheduler
|
||||
type KubeSchedulerConfiguration struct {
|
||||
// TypeMeta contains the API version and kind. In kube-scheduler, after
|
||||
// conversion from the versioned KubeSchedulerConfiguration type to this
|
||||
// internal type, we set the APIVersion field to the scheme group/version of
|
||||
// the type we converted from. This is done in cmd/kube-scheduler in two
|
||||
// places: (1) when loading config from a file, (2) generating the default
|
||||
// config. Based on the versioned type set in this field, we make decisions;
|
||||
// for example (1) during validation to check for usage of removed plugins,
|
||||
// (2) writing config to a file, (3) initialising the scheduler.
|
||||
metav1.TypeMeta
|
||||
|
||||
// Parallelism defines the amount of parallelism in algorithms for scheduling a Pods. Must be greater than 0. Defaults to 16
|
||||
Parallelism int32
|
||||
|
||||
// LeaderElection defines the configuration of leader election client.
|
||||
LeaderElection componentbaseconfig.LeaderElectionConfiguration
|
||||
|
||||
// ClientConnection specifies the kubeconfig file and client connection
|
||||
// settings for the proxy server to use when communicating with the apiserver.
|
||||
ClientConnection componentbaseconfig.ClientConnectionConfiguration
|
||||
|
||||
// DebuggingConfiguration holds configuration for Debugging related features
|
||||
// TODO: We might wanna make this a substruct like Debugging componentbaseconfig.DebuggingConfiguration
|
||||
componentbaseconfig.DebuggingConfiguration
|
||||
|
||||
// PercentageOfNodesToScore is the percentage of all nodes that once found feasible
|
||||
// for running a pod, the scheduler stops its search for more feasible nodes in
|
||||
// the cluster. This helps improve scheduler's performance. Scheduler always tries to find
|
||||
// at least "minFeasibleNodesToFind" feasible nodes no matter what the value of this flag is.
|
||||
// Example: if the cluster size is 500 nodes and the value of this flag is 30,
|
||||
// then scheduler stops finding further feasible nodes once it finds 150 feasible ones.
|
||||
// When the value is 0, default percentage (5%--50% based on the size of the cluster) of the
|
||||
// nodes will be scored. It is overridden by profile level PercentageOfNodesToScore.
|
||||
PercentageOfNodesToScore *int32
|
||||
|
||||
// PodInitialBackoffSeconds is the initial backoff for unschedulable pods.
|
||||
// If specified, it must be greater than 0. If this value is null, the default value (1s)
|
||||
// will be used.
|
||||
PodInitialBackoffSeconds int64
|
||||
|
||||
// PodMaxBackoffSeconds is the max backoff for unschedulable pods.
|
||||
// If specified, it must be greater than or equal to podInitialBackoffSeconds. If this value is null,
|
||||
// the default value (10s) will be used.
|
||||
PodMaxBackoffSeconds int64
|
||||
|
||||
// Profiles are scheduling profiles that kube-scheduler supports. Pods can
|
||||
// choose to be scheduled under a particular profile by setting its associated
|
||||
// scheduler name. Pods that don't specify any scheduler name are scheduled
|
||||
// with the "default-scheduler" profile, if present here.
|
||||
Profiles []KubeSchedulerProfile
|
||||
|
||||
// Extenders are the list of scheduler extenders, each holding the values of how to communicate
|
||||
// with the extender. These extenders are shared by all scheduler profiles.
|
||||
Extenders []Extender
|
||||
|
||||
// DelayCacheUntilActive specifies when to start caching. If this is true and leader election is enabled,
|
||||
// the scheduler will wait to fill informer caches until it is the leader. Doing so will have slower
|
||||
// failover with the benefit of lower memory overhead while waiting to become leader.
|
||||
// Defaults to false.
|
||||
DelayCacheUntilActive bool
|
||||
}
|
||||
|
||||
// KubeSchedulerProfile is a scheduling profile.
|
||||
type KubeSchedulerProfile struct {
|
||||
// SchedulerName is the name of the scheduler associated to this profile.
|
||||
// If SchedulerName matches with the pod's "spec.schedulerName", then the pod
|
||||
// is scheduled with this profile.
|
||||
SchedulerName string
|
||||
|
||||
// PercentageOfNodesToScore is the percentage of all nodes that once found feasible
|
||||
// for running a pod, the scheduler stops its search for more feasible nodes in
|
||||
// the cluster. This helps improve scheduler's performance. Scheduler always tries to find
|
||||
// at least "minFeasibleNodesToFind" feasible nodes no matter what the value of this flag is.
|
||||
// Example: if the cluster size is 500 nodes and the value of this flag is 30,
|
||||
// then scheduler stops finding further feasible nodes once it finds 150 feasible ones.
|
||||
// When the value is 0, default percentage (5%--50% based on the size of the cluster) of the
|
||||
// nodes will be scored. It will override global PercentageOfNodesToScore. If it is empty,
|
||||
// global PercentageOfNodesToScore will be used.
|
||||
PercentageOfNodesToScore *int32
|
||||
|
||||
// Plugins specify the set of plugins that should be enabled or disabled.
|
||||
// Enabled plugins are the ones that should be enabled in addition to the
|
||||
// default plugins. Disabled plugins are any of the default plugins that
|
||||
// should be disabled.
|
||||
// When no enabled or disabled plugin is specified for an extension point,
|
||||
// default plugins for that extension point will be used if there is any.
|
||||
// If a QueueSort plugin is specified, the same QueueSort Plugin and
|
||||
// PluginConfig must be specified for all profiles.
|
||||
Plugins *Plugins
|
||||
|
||||
// PluginConfig is an optional set of custom plugin arguments for each plugin.
|
||||
// Omitting config args for a plugin is equivalent to using the default config
|
||||
// for that plugin.
|
||||
PluginConfig []PluginConfig
|
||||
}
|
||||
|
||||
// Plugins include multiple extension points. When specified, the list of plugins for
|
||||
// a particular extension point are the only ones enabled. If an extension point is
|
||||
// omitted from the config, then the default set of plugins is used for that extension point.
|
||||
// Enabled plugins are called in the order specified here, after default plugins. If they need to
|
||||
// be invoked before default plugins, default plugins must be disabled and re-enabled here in desired order.
|
||||
type Plugins struct {
|
||||
// PreEnqueue is a list of plugins that should be invoked before adding pods to the scheduling queue.
|
||||
PreEnqueue PluginSet
|
||||
|
||||
// QueueSort is a list of plugins that should be invoked when sorting pods in the scheduling queue.
|
||||
QueueSort PluginSet
|
||||
|
||||
// PreFilter is a list of plugins that should be invoked at "PreFilter" extension point of the scheduling framework.
|
||||
PreFilter PluginSet
|
||||
|
||||
// Filter is a list of plugins that should be invoked when filtering out nodes that cannot run the Pod.
|
||||
Filter PluginSet
|
||||
|
||||
// PostFilter is a list of plugins that are invoked after filtering phase, but only when no feasible nodes were found for the pod.
|
||||
PostFilter PluginSet
|
||||
|
||||
// PreScore is a list of plugins that are invoked before scoring.
|
||||
PreScore PluginSet
|
||||
|
||||
// Score is a list of plugins that should be invoked when ranking nodes that have passed the filtering phase.
|
||||
Score PluginSet
|
||||
|
||||
// Reserve is a list of plugins invoked when reserving/unreserving resources
|
||||
// after a node is assigned to run the pod.
|
||||
Reserve PluginSet
|
||||
|
||||
// Permit is a list of plugins that control binding of a Pod. These plugins can prevent or delay binding of a Pod.
|
||||
Permit PluginSet
|
||||
|
||||
// PreBind is a list of plugins that should be invoked before a pod is bound.
|
||||
PreBind PluginSet
|
||||
|
||||
// Bind is a list of plugins that should be invoked at "Bind" extension point of the scheduling framework.
|
||||
// The scheduler call these plugins in order. Scheduler skips the rest of these plugins as soon as one returns success.
|
||||
Bind PluginSet
|
||||
|
||||
// PostBind is a list of plugins that should be invoked after a pod is successfully bound.
|
||||
PostBind PluginSet
|
||||
|
||||
// MultiPoint is a simplified config field for enabling plugins for all valid extension points
|
||||
MultiPoint PluginSet
|
||||
}
|
||||
|
||||
// PluginSet specifies enabled and disabled plugins for an extension point.
|
||||
// If an array is empty, missing, or nil, default plugins at that extension point will be used.
|
||||
type PluginSet struct {
|
||||
// Enabled specifies plugins that should be enabled in addition to default plugins.
|
||||
// These are called after default plugins and in the same order specified here.
|
||||
Enabled []Plugin
|
||||
// Disabled specifies default plugins that should be disabled.
|
||||
// When all default plugins need to be disabled, an array containing only one "*" should be provided.
|
||||
Disabled []Plugin
|
||||
}
|
||||
|
||||
// Plugin specifies a plugin name and its weight when applicable. Weight is used only for Score plugins.
|
||||
type Plugin struct {
|
||||
// Name defines the name of plugin
|
||||
Name string
|
||||
// Weight defines the weight of plugin, only used for Score plugins.
|
||||
Weight int32
|
||||
}
|
||||
|
||||
// PluginConfig specifies arguments that should be passed to a plugin at the time of initialization.
|
||||
// A plugin that is invoked at multiple extension points is initialized once. Args can have arbitrary structure.
|
||||
// It is up to the plugin to process these Args.
|
||||
type PluginConfig struct {
|
||||
// Name defines the name of plugin being configured
|
||||
Name string
|
||||
// Args defines the arguments passed to the plugins at the time of initialization. Args can have arbitrary structure.
|
||||
Args runtime.Object
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: The following variables and methods are intentionally left out of the staging mirror.
|
||||
*/
|
||||
const (
|
||||
// DefaultPercentageOfNodesToScore defines the percentage of nodes of all nodes
|
||||
// that once found feasible, the scheduler stops looking for more nodes.
|
||||
// A value of 0 means adaptive, meaning the scheduler figures out a proper default.
|
||||
DefaultPercentageOfNodesToScore = 0
|
||||
|
||||
// MaxCustomPriorityScore is the max score UtilizationShapePoint expects.
|
||||
MaxCustomPriorityScore int64 = 10
|
||||
|
||||
// MaxTotalScore is the maximum total score.
|
||||
MaxTotalScore int64 = math.MaxInt64
|
||||
|
||||
// MaxWeight defines the max weight value allowed for custom PriorityPolicy
|
||||
MaxWeight = MaxTotalScore / MaxCustomPriorityScore
|
||||
)
|
||||
|
||||
// Names returns the list of enabled plugin names.
|
||||
func (p *Plugins) Names() []string {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
extensions := []PluginSet{
|
||||
p.PreEnqueue,
|
||||
p.PreFilter,
|
||||
p.Filter,
|
||||
p.PostFilter,
|
||||
p.Reserve,
|
||||
p.PreScore,
|
||||
p.Score,
|
||||
p.PreBind,
|
||||
p.Bind,
|
||||
p.PostBind,
|
||||
p.Permit,
|
||||
p.QueueSort,
|
||||
}
|
||||
n := sets.New[string]()
|
||||
for _, e := range extensions {
|
||||
for _, pg := range e.Enabled {
|
||||
n.Insert(pg.Name)
|
||||
}
|
||||
}
|
||||
return sets.List(n)
|
||||
}
|
||||
|
||||
// Extender holds the parameters used to communicate with the extender. If a verb is unspecified/empty,
|
||||
// it is assumed that the extender chose not to provide that extension.
|
||||
type Extender struct {
|
||||
// URLPrefix at which the extender is available
|
||||
URLPrefix string
|
||||
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
|
||||
FilterVerb string
|
||||
// Verb for the preempt call, empty if not supported. This verb is appended to the URLPrefix when issuing the preempt call to extender.
|
||||
PreemptVerb string
|
||||
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
|
||||
PrioritizeVerb string
|
||||
// The numeric multiplier for the node scores that the prioritize call generates.
|
||||
// The weight should be a positive integer
|
||||
Weight int64
|
||||
// Verb for the bind call, empty if not supported. This verb is appended to the URLPrefix when issuing the bind call to extender.
|
||||
// If this method is implemented by the extender, it is the extender's responsibility to bind the pod to apiserver. Only one extender
|
||||
// can implement this function.
|
||||
BindVerb string
|
||||
// EnableHTTPS specifies whether https should be used to communicate with the extender
|
||||
EnableHTTPS bool
|
||||
// TLSConfig specifies the transport layer security config
|
||||
TLSConfig *ExtenderTLSConfig
|
||||
// HTTPTimeout specifies the timeout duration for a call to the extender. Filter timeout fails the scheduling of the pod. Prioritize
|
||||
// timeout is ignored, k8s/other extenders priorities are used to select the node.
|
||||
HTTPTimeout metav1.Duration
|
||||
// NodeCacheCapable specifies that the extender is capable of caching node information,
|
||||
// so the scheduler should only send minimal information about the eligible nodes
|
||||
// assuming that the extender already cached full details of all nodes in the cluster
|
||||
NodeCacheCapable bool
|
||||
// ManagedResources is a list of extended resources that are managed by
|
||||
// this extender.
|
||||
// - A pod will be sent to the extender on the Filter, Prioritize and Bind
|
||||
// (if the extender is the binder) phases iff the pod requests at least
|
||||
// one of the extended resources in this list. If empty or unspecified,
|
||||
// all pods will be sent to this extender.
|
||||
// - If IgnoredByScheduler is set to true for a resource, kube-scheduler
|
||||
// will skip checking the resource in predicates.
|
||||
// +optional
|
||||
ManagedResources []ExtenderManagedResource
|
||||
// Ignorable specifies if the extender is ignorable, i.e. scheduling should not
|
||||
// fail when the extender returns an error or is not reachable.
|
||||
Ignorable bool
|
||||
}
|
||||
|
||||
// ExtenderManagedResource describes the arguments of extended resources
|
||||
// managed by an extender.
|
||||
type ExtenderManagedResource struct {
|
||||
// Name is the extended resource name.
|
||||
Name string
|
||||
// IgnoredByScheduler indicates whether kube-scheduler should ignore this
|
||||
// resource when applying predicates.
|
||||
IgnoredByScheduler bool
|
||||
}
|
||||
|
||||
// ExtenderTLSConfig contains settings to enable TLS with extender
|
||||
type ExtenderTLSConfig struct {
|
||||
// Server should be accessed without verifying the TLS certificate. For testing only.
|
||||
Insecure bool
|
||||
// ServerName is passed to the server for SNI and is used in the client to check server
|
||||
// certificates against. If ServerName is empty, the hostname used to contact the
|
||||
// server is used.
|
||||
ServerName string
|
||||
|
||||
// Server requires TLS client certificate authentication
|
||||
CertFile string
|
||||
// Server requires TLS client certificate authentication
|
||||
KeyFile string
|
||||
// Trusted root certificates for server
|
||||
CAFile string
|
||||
|
||||
// CertData holds PEM-encoded bytes (typically read from a client certificate file).
|
||||
// CertData takes precedence over CertFile
|
||||
CertData []byte
|
||||
// KeyData holds PEM-encoded bytes (typically read from a client certificate key file).
|
||||
// KeyData takes precedence over KeyFile
|
||||
KeyData []byte `datapolicy:"security-key"`
|
||||
// CAData holds PEM-encoded bytes (typically read from a root certificates bundle).
|
||||
// CAData takes precedence over CAFile
|
||||
CAData []byte
|
||||
}
|
218
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/types_pluginargs.go
generated
vendored
Normal file
218
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/types_pluginargs.go
generated
vendored
Normal file
@ -0,0 +1,218 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// DefaultPreemptionArgs holds arguments used to configure the
|
||||
// DefaultPreemption plugin.
|
||||
type DefaultPreemptionArgs struct {
|
||||
metav1.TypeMeta
|
||||
|
||||
// MinCandidateNodesPercentage is the minimum number of candidates to
|
||||
// shortlist when dry running preemption as a percentage of number of nodes.
|
||||
// Must be in the range [0, 100]. Defaults to 10% of the cluster size if
|
||||
// unspecified.
|
||||
MinCandidateNodesPercentage int32
|
||||
// MinCandidateNodesAbsolute is the absolute minimum number of candidates to
|
||||
// shortlist. The likely number of candidates enumerated for dry running
|
||||
// preemption is given by the formula:
|
||||
// numCandidates = max(numNodes * minCandidateNodesPercentage, minCandidateNodesAbsolute)
|
||||
// We say "likely" because there are other factors such as PDB violations
|
||||
// that play a role in the number of candidates shortlisted. Must be at least
|
||||
// 0 nodes. Defaults to 100 nodes if unspecified.
|
||||
MinCandidateNodesAbsolute int32
|
||||
}
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// InterPodAffinityArgs holds arguments used to configure the InterPodAffinity plugin.
|
||||
type InterPodAffinityArgs struct {
|
||||
metav1.TypeMeta
|
||||
|
||||
// HardPodAffinityWeight is the scoring weight for existing pods with a
|
||||
// matching hard affinity to the incoming pod.
|
||||
HardPodAffinityWeight int32
|
||||
|
||||
// IgnorePreferredTermsOfExistingPods configures the scheduler to ignore existing pods' preferred affinity
|
||||
// rules when scoring candidate nodes, unless the incoming pod has inter-pod affinities.
|
||||
IgnorePreferredTermsOfExistingPods bool
|
||||
}
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// NodeResourcesFitArgs holds arguments used to configure the NodeResourcesFit plugin.
|
||||
type NodeResourcesFitArgs struct {
|
||||
metav1.TypeMeta
|
||||
|
||||
// IgnoredResources is the list of resources that NodeResources fit filter
|
||||
// should ignore.
|
||||
IgnoredResources []string
|
||||
// IgnoredResourceGroups defines the list of resource groups that NodeResources fit filter should ignore.
|
||||
// e.g. if group is ["example.com"], it will ignore all resource names that begin
|
||||
// with "example.com", such as "example.com/aaa" and "example.com/bbb".
|
||||
// A resource group name can't contain '/'.
|
||||
IgnoredResourceGroups []string
|
||||
|
||||
// ScoringStrategy selects the node resource scoring strategy.
|
||||
ScoringStrategy *ScoringStrategy
|
||||
}
|
||||
|
||||
// PodTopologySpreadConstraintsDefaulting defines how to set default constraints
|
||||
// for the PodTopologySpread plugin.
|
||||
type PodTopologySpreadConstraintsDefaulting string
|
||||
|
||||
const (
|
||||
// SystemDefaulting instructs to use the kubernetes defined default.
|
||||
SystemDefaulting PodTopologySpreadConstraintsDefaulting = "System"
|
||||
// ListDefaulting instructs to use the config provided default.
|
||||
ListDefaulting PodTopologySpreadConstraintsDefaulting = "List"
|
||||
)
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// PodTopologySpreadArgs holds arguments used to configure the PodTopologySpread plugin.
|
||||
type PodTopologySpreadArgs struct {
|
||||
metav1.TypeMeta
|
||||
|
||||
// DefaultConstraints defines topology spread constraints to be applied to
|
||||
// Pods that don't define any in `pod.spec.topologySpreadConstraints`.
|
||||
// `.defaultConstraints[*].labelSelectors` must be empty, as they are
|
||||
// deduced from the Pod's membership to Services, ReplicationControllers,
|
||||
// ReplicaSets or StatefulSets.
|
||||
// When not empty, .defaultingType must be "List".
|
||||
DefaultConstraints []v1.TopologySpreadConstraint
|
||||
|
||||
// DefaultingType determines how .defaultConstraints are deduced. Can be one
|
||||
// of "System" or "List".
|
||||
//
|
||||
// - "System": Use kubernetes defined constraints that spread Pods among
|
||||
// Nodes and Zones.
|
||||
// - "List": Use constraints defined in .defaultConstraints.
|
||||
//
|
||||
// Defaults to "System".
|
||||
// +optional
|
||||
DefaultingType PodTopologySpreadConstraintsDefaulting
|
||||
}
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// NodeResourcesBalancedAllocationArgs holds arguments used to configure NodeResourcesBalancedAllocation plugin.
|
||||
type NodeResourcesBalancedAllocationArgs struct {
|
||||
metav1.TypeMeta
|
||||
|
||||
// Resources to be considered when scoring.
|
||||
// The default resource set includes "cpu" and "memory", only valid weight is 1.
|
||||
Resources []ResourceSpec
|
||||
}
|
||||
|
||||
// UtilizationShapePoint represents a single point of a priority function shape.
|
||||
type UtilizationShapePoint struct {
|
||||
// Utilization (x axis). Valid values are 0 to 100. Fully utilized node maps to 100.
|
||||
Utilization int32
|
||||
// Score assigned to a given utilization (y axis). Valid values are 0 to 10.
|
||||
Score int32
|
||||
}
|
||||
|
||||
// ResourceSpec represents single resource.
|
||||
type ResourceSpec struct {
|
||||
// Name of the resource.
|
||||
Name string
|
||||
// Weight of the resource.
|
||||
Weight int64
|
||||
}
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// VolumeBindingArgs holds arguments used to configure the VolumeBinding plugin.
|
||||
type VolumeBindingArgs struct {
|
||||
metav1.TypeMeta
|
||||
|
||||
// BindTimeoutSeconds is the timeout in seconds in volume binding operation.
|
||||
// Value must be non-negative integer. The value zero indicates no waiting.
|
||||
// If this value is nil, the default value will be used.
|
||||
BindTimeoutSeconds int64
|
||||
|
||||
// Shape specifies the points defining the score function shape, which is
|
||||
// used to score nodes based on the utilization of statically provisioned
|
||||
// PVs. The utilization is calculated by dividing the total requested
|
||||
// storage of the pod by the total capacity of feasible PVs on each node.
|
||||
// Each point contains utilization (ranges from 0 to 100) and its
|
||||
// associated score (ranges from 0 to 10). You can turn the priority by
|
||||
// specifying different scores for different utilization numbers.
|
||||
// The default shape points are:
|
||||
// 1) 0 for 0 utilization
|
||||
// 2) 10 for 100 utilization
|
||||
// All points must be sorted in increasing order by utilization.
|
||||
// +featureGate=VolumeCapacityPriority
|
||||
// +optional
|
||||
Shape []UtilizationShapePoint
|
||||
}
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
// NodeAffinityArgs holds arguments to configure the NodeAffinity plugin.
|
||||
type NodeAffinityArgs struct {
|
||||
metav1.TypeMeta
|
||||
|
||||
// AddedAffinity is applied to all Pods additionally to the NodeAffinity
|
||||
// specified in the PodSpec. That is, Nodes need to satisfy AddedAffinity
|
||||
// AND .spec.NodeAffinity. AddedAffinity is empty by default (all Nodes
|
||||
// match).
|
||||
// When AddedAffinity is used, some Pods with affinity requirements that match
|
||||
// a specific Node (such as Daemonset Pods) might remain unschedulable.
|
||||
AddedAffinity *v1.NodeAffinity
|
||||
}
|
||||
|
||||
// ScoringStrategyType the type of scoring strategy used in NodeResourcesFit plugin.
|
||||
type ScoringStrategyType string
|
||||
|
||||
const (
|
||||
// LeastAllocated strategy prioritizes nodes with least allocated resources.
|
||||
LeastAllocated ScoringStrategyType = "LeastAllocated"
|
||||
// MostAllocated strategy prioritizes nodes with most allocated resources.
|
||||
MostAllocated ScoringStrategyType = "MostAllocated"
|
||||
// RequestedToCapacityRatio strategy allows specifying a custom shape function
|
||||
// to score nodes based on the request to capacity ratio.
|
||||
RequestedToCapacityRatio ScoringStrategyType = "RequestedToCapacityRatio"
|
||||
)
|
||||
|
||||
// ScoringStrategy define ScoringStrategyType for node resource plugin
|
||||
type ScoringStrategy struct {
|
||||
// Type selects which strategy to run.
|
||||
Type ScoringStrategyType
|
||||
|
||||
// Resources to consider when scoring.
|
||||
// The default resource set includes "cpu" and "memory" with an equal weight.
|
||||
// Allowed weights go from 1 to 100.
|
||||
// Weight defaults to 1 if not specified or explicitly set to 0.
|
||||
Resources []ResourceSpec
|
||||
|
||||
// Arguments specific to RequestedToCapacityRatio strategy.
|
||||
RequestedToCapacityRatio *RequestedToCapacityRatioParam
|
||||
}
|
||||
|
||||
// RequestedToCapacityRatioParam define RequestedToCapacityRatio parameters
|
||||
type RequestedToCapacityRatioParam struct {
|
||||
// Shape is a list of points defining the scoring function shape.
|
||||
Shape []UtilizationShapePoint
|
||||
}
|
107
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/conversion.go
generated
vendored
Normal file
107
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/conversion.go
generated
vendored
Normal file
@ -0,0 +1,107 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"k8s.io/apimachinery/pkg/conversion"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
v1 "k8s.io/kube-scheduler/config/v1"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
)
|
||||
|
||||
var (
|
||||
// pluginArgConversionScheme is a scheme with internal and v1 registered,
|
||||
// used for defaulting/converting typed PluginConfig Args.
|
||||
// Access via getPluginArgConversionScheme()
|
||||
pluginArgConversionScheme *runtime.Scheme
|
||||
initPluginArgConversionScheme sync.Once
|
||||
)
|
||||
|
||||
func GetPluginArgConversionScheme() *runtime.Scheme {
|
||||
initPluginArgConversionScheme.Do(func() {
|
||||
// set up the scheme used for plugin arg conversion
|
||||
pluginArgConversionScheme = runtime.NewScheme()
|
||||
utilruntime.Must(AddToScheme(pluginArgConversionScheme))
|
||||
utilruntime.Must(config.AddToScheme(pluginArgConversionScheme))
|
||||
})
|
||||
return pluginArgConversionScheme
|
||||
}
|
||||
|
||||
func Convert_v1_KubeSchedulerConfiguration_To_config_KubeSchedulerConfiguration(in *v1.KubeSchedulerConfiguration, out *config.KubeSchedulerConfiguration, s conversion.Scope) error {
|
||||
if err := autoConvert_v1_KubeSchedulerConfiguration_To_config_KubeSchedulerConfiguration(in, out, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return convertToInternalPluginConfigArgs(out)
|
||||
}
|
||||
|
||||
// convertToInternalPluginConfigArgs converts PluginConfig#Args into internal
|
||||
// types using a scheme, after applying defaults.
|
||||
func convertToInternalPluginConfigArgs(out *config.KubeSchedulerConfiguration) error {
|
||||
scheme := GetPluginArgConversionScheme()
|
||||
for i := range out.Profiles {
|
||||
prof := &out.Profiles[i]
|
||||
for j := range prof.PluginConfig {
|
||||
args := prof.PluginConfig[j].Args
|
||||
if args == nil {
|
||||
continue
|
||||
}
|
||||
if _, isUnknown := args.(*runtime.Unknown); isUnknown {
|
||||
continue
|
||||
}
|
||||
internalArgs, err := scheme.ConvertToVersion(args, config.SchemeGroupVersion)
|
||||
if err != nil {
|
||||
return fmt.Errorf("converting .Profiles[%d].PluginConfig[%d].Args into internal type: %w", i, j, err)
|
||||
}
|
||||
prof.PluginConfig[j].Args = internalArgs
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func Convert_config_KubeSchedulerConfiguration_To_v1_KubeSchedulerConfiguration(in *config.KubeSchedulerConfiguration, out *v1.KubeSchedulerConfiguration, s conversion.Scope) error {
|
||||
if err := autoConvert_config_KubeSchedulerConfiguration_To_v1_KubeSchedulerConfiguration(in, out, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return convertToExternalPluginConfigArgs(out)
|
||||
}
|
||||
|
||||
// convertToExternalPluginConfigArgs converts PluginConfig#Args into
|
||||
// external (versioned) types using a scheme.
|
||||
func convertToExternalPluginConfigArgs(out *v1.KubeSchedulerConfiguration) error {
|
||||
scheme := GetPluginArgConversionScheme()
|
||||
for i := range out.Profiles {
|
||||
for j := range out.Profiles[i].PluginConfig {
|
||||
args := out.Profiles[i].PluginConfig[j].Args
|
||||
if args.Object == nil {
|
||||
continue
|
||||
}
|
||||
if _, isUnknown := args.Object.(*runtime.Unknown); isUnknown {
|
||||
continue
|
||||
}
|
||||
externalArgs, err := scheme.ConvertToVersion(args.Object, SchemeGroupVersion)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
out.Profiles[i].PluginConfig[j].Args.Object = externalArgs
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
157
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/default_plugins.go
generated
vendored
Normal file
157
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/default_plugins.go
generated
vendored
Normal file
@ -0,0 +1,157 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/klog/v2"
|
||||
v1 "k8s.io/kube-scheduler/config/v1"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
// getDefaultPlugins returns the default set of plugins.
|
||||
func getDefaultPlugins() *v1.Plugins {
|
||||
plugins := &v1.Plugins{
|
||||
MultiPoint: v1.PluginSet{
|
||||
Enabled: []v1.Plugin{
|
||||
{Name: names.SchedulingGates},
|
||||
{Name: names.PrioritySort},
|
||||
{Name: names.NodeUnschedulable},
|
||||
{Name: names.NodeName},
|
||||
{Name: names.TaintToleration, Weight: ptr.To[int32](3)},
|
||||
{Name: names.NodeAffinity, Weight: ptr.To[int32](2)},
|
||||
{Name: names.NodePorts},
|
||||
{Name: names.NodeResourcesFit, Weight: ptr.To[int32](1)},
|
||||
{Name: names.VolumeRestrictions},
|
||||
{Name: names.NodeVolumeLimits},
|
||||
{Name: names.VolumeBinding},
|
||||
{Name: names.VolumeZone},
|
||||
{Name: names.PodTopologySpread, Weight: ptr.To[int32](2)},
|
||||
{Name: names.InterPodAffinity, Weight: ptr.To[int32](2)},
|
||||
{Name: names.DefaultPreemption},
|
||||
{Name: names.NodeResourcesBalancedAllocation, Weight: ptr.To[int32](1)},
|
||||
{Name: names.ImageLocality, Weight: ptr.To[int32](1)},
|
||||
{Name: names.DefaultBinder},
|
||||
},
|
||||
},
|
||||
}
|
||||
applyFeatureGates(plugins)
|
||||
|
||||
return plugins
|
||||
}
|
||||
|
||||
func applyFeatureGates(config *v1.Plugins) {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
|
||||
// This plugin should come before DefaultPreemption because if
|
||||
// there is a problem with a Pod and PostFilter gets called to
|
||||
// resolve the problem, it is better to first deallocate an
|
||||
// idle ResourceClaim than it is to evict some Pod that might
|
||||
// be doing useful work.
|
||||
for i := range config.MultiPoint.Enabled {
|
||||
if config.MultiPoint.Enabled[i].Name == names.DefaultPreemption {
|
||||
extended := make([]v1.Plugin, 0, len(config.MultiPoint.Enabled)+1)
|
||||
extended = append(extended, config.MultiPoint.Enabled[:i]...)
|
||||
extended = append(extended, v1.Plugin{Name: names.DynamicResources})
|
||||
extended = append(extended, config.MultiPoint.Enabled[i:]...)
|
||||
config.MultiPoint.Enabled = extended
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// mergePlugins merges the custom set into the given default one, handling disabled sets.
|
||||
func mergePlugins(logger klog.Logger, defaultPlugins, customPlugins *v1.Plugins) *v1.Plugins {
|
||||
if customPlugins == nil {
|
||||
return defaultPlugins
|
||||
}
|
||||
|
||||
defaultPlugins.MultiPoint = mergePluginSet(logger, defaultPlugins.MultiPoint, customPlugins.MultiPoint)
|
||||
defaultPlugins.PreEnqueue = mergePluginSet(logger, defaultPlugins.PreEnqueue, customPlugins.PreEnqueue)
|
||||
defaultPlugins.QueueSort = mergePluginSet(logger, defaultPlugins.QueueSort, customPlugins.QueueSort)
|
||||
defaultPlugins.PreFilter = mergePluginSet(logger, defaultPlugins.PreFilter, customPlugins.PreFilter)
|
||||
defaultPlugins.Filter = mergePluginSet(logger, defaultPlugins.Filter, customPlugins.Filter)
|
||||
defaultPlugins.PostFilter = mergePluginSet(logger, defaultPlugins.PostFilter, customPlugins.PostFilter)
|
||||
defaultPlugins.PreScore = mergePluginSet(logger, defaultPlugins.PreScore, customPlugins.PreScore)
|
||||
defaultPlugins.Score = mergePluginSet(logger, defaultPlugins.Score, customPlugins.Score)
|
||||
defaultPlugins.Reserve = mergePluginSet(logger, defaultPlugins.Reserve, customPlugins.Reserve)
|
||||
defaultPlugins.Permit = mergePluginSet(logger, defaultPlugins.Permit, customPlugins.Permit)
|
||||
defaultPlugins.PreBind = mergePluginSet(logger, defaultPlugins.PreBind, customPlugins.PreBind)
|
||||
defaultPlugins.Bind = mergePluginSet(logger, defaultPlugins.Bind, customPlugins.Bind)
|
||||
defaultPlugins.PostBind = mergePluginSet(logger, defaultPlugins.PostBind, customPlugins.PostBind)
|
||||
return defaultPlugins
|
||||
}
|
||||
|
||||
type pluginIndex struct {
|
||||
index int
|
||||
plugin v1.Plugin
|
||||
}
|
||||
|
||||
func mergePluginSet(logger klog.Logger, defaultPluginSet, customPluginSet v1.PluginSet) v1.PluginSet {
|
||||
disabledPlugins := sets.New[string]()
|
||||
enabledCustomPlugins := make(map[string]pluginIndex)
|
||||
// replacedPluginIndex is a set of index of plugins, which have replaced the default plugins.
|
||||
replacedPluginIndex := sets.New[int]()
|
||||
var disabled []v1.Plugin
|
||||
for _, disabledPlugin := range customPluginSet.Disabled {
|
||||
// if the user is manually disabling any (or all, with "*") default plugins for an extension point,
|
||||
// we need to track that so that the MultiPoint extension logic in the framework can know to skip
|
||||
// inserting unspecified default plugins to this point.
|
||||
disabled = append(disabled, v1.Plugin{Name: disabledPlugin.Name})
|
||||
disabledPlugins.Insert(disabledPlugin.Name)
|
||||
}
|
||||
|
||||
// With MultiPoint, we may now have some disabledPlugins in the default registry
|
||||
// For example, we enable PluginX with Filter+Score through MultiPoint but disable its Score plugin by default.
|
||||
for _, disabledPlugin := range defaultPluginSet.Disabled {
|
||||
disabled = append(disabled, v1.Plugin{Name: disabledPlugin.Name})
|
||||
disabledPlugins.Insert(disabledPlugin.Name)
|
||||
}
|
||||
|
||||
for index, enabledPlugin := range customPluginSet.Enabled {
|
||||
enabledCustomPlugins[enabledPlugin.Name] = pluginIndex{index, enabledPlugin}
|
||||
}
|
||||
var enabledPlugins []v1.Plugin
|
||||
if !disabledPlugins.Has("*") {
|
||||
for _, defaultEnabledPlugin := range defaultPluginSet.Enabled {
|
||||
if disabledPlugins.Has(defaultEnabledPlugin.Name) {
|
||||
continue
|
||||
}
|
||||
// The default plugin is explicitly re-configured, update the default plugin accordingly.
|
||||
if customPlugin, ok := enabledCustomPlugins[defaultEnabledPlugin.Name]; ok {
|
||||
logger.Info("Default plugin is explicitly re-configured; overriding", "plugin", defaultEnabledPlugin.Name)
|
||||
// Update the default plugin in place to preserve order.
|
||||
defaultEnabledPlugin = customPlugin.plugin
|
||||
replacedPluginIndex.Insert(customPlugin.index)
|
||||
}
|
||||
enabledPlugins = append(enabledPlugins, defaultEnabledPlugin)
|
||||
}
|
||||
}
|
||||
|
||||
// Append all the custom plugins which haven't replaced any default plugins.
|
||||
// Note: duplicated custom plugins will still be appended here.
|
||||
// If so, the instantiation of scheduler framework will detect it and abort.
|
||||
for index, plugin := range customPluginSet.Enabled {
|
||||
if !replacedPluginIndex.Has(index) {
|
||||
enabledPlugins = append(enabledPlugins, plugin)
|
||||
}
|
||||
}
|
||||
return v1.PluginSet{Enabled: enabledPlugins, Disabled: disabled}
|
||||
}
|
244
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/defaults.go
generated
vendored
Normal file
244
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/defaults.go
generated
vendored
Normal file
@ -0,0 +1,244 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apiserver/pkg/util/feature"
|
||||
componentbaseconfigv1alpha1 "k8s.io/component-base/config/v1alpha1"
|
||||
"k8s.io/klog/v2"
|
||||
configv1 "k8s.io/kube-scheduler/config/v1"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
var defaultResourceSpec = []configv1.ResourceSpec{
|
||||
{Name: string(v1.ResourceCPU), Weight: 1},
|
||||
{Name: string(v1.ResourceMemory), Weight: 1},
|
||||
}
|
||||
|
||||
func addDefaultingFuncs(scheme *runtime.Scheme) error {
|
||||
return RegisterDefaults(scheme)
|
||||
}
|
||||
|
||||
func pluginsNames(p *configv1.Plugins) []string {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
extensions := []configv1.PluginSet{
|
||||
p.MultiPoint,
|
||||
p.PreFilter,
|
||||
p.Filter,
|
||||
p.PostFilter,
|
||||
p.Reserve,
|
||||
p.PreScore,
|
||||
p.Score,
|
||||
p.PreBind,
|
||||
p.Bind,
|
||||
p.PostBind,
|
||||
p.Permit,
|
||||
p.PreEnqueue,
|
||||
p.QueueSort,
|
||||
}
|
||||
n := sets.New[string]()
|
||||
for _, e := range extensions {
|
||||
for _, pg := range e.Enabled {
|
||||
n.Insert(pg.Name)
|
||||
}
|
||||
}
|
||||
return sets.List(n)
|
||||
}
|
||||
|
||||
func setDefaults_KubeSchedulerProfile(logger klog.Logger, prof *configv1.KubeSchedulerProfile) {
|
||||
// Set default plugins.
|
||||
prof.Plugins = mergePlugins(logger, getDefaultPlugins(), prof.Plugins)
|
||||
// Set default plugin configs.
|
||||
scheme := GetPluginArgConversionScheme()
|
||||
existingConfigs := sets.New[string]()
|
||||
for j := range prof.PluginConfig {
|
||||
existingConfigs.Insert(prof.PluginConfig[j].Name)
|
||||
args := prof.PluginConfig[j].Args.Object
|
||||
if _, isUnknown := args.(*runtime.Unknown); isUnknown {
|
||||
continue
|
||||
}
|
||||
scheme.Default(args)
|
||||
}
|
||||
|
||||
// Append default configs for plugins that didn't have one explicitly set.
|
||||
for _, name := range pluginsNames(prof.Plugins) {
|
||||
if existingConfigs.Has(name) {
|
||||
continue
|
||||
}
|
||||
gvk := configv1.SchemeGroupVersion.WithKind(name + "Args")
|
||||
args, err := scheme.New(gvk)
|
||||
if err != nil {
|
||||
// This plugin is out-of-tree or doesn't require configuration.
|
||||
continue
|
||||
}
|
||||
scheme.Default(args)
|
||||
args.GetObjectKind().SetGroupVersionKind(gvk)
|
||||
prof.PluginConfig = append(prof.PluginConfig, configv1.PluginConfig{
|
||||
Name: name,
|
||||
Args: runtime.RawExtension{Object: args},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// SetDefaults_KubeSchedulerConfiguration sets additional defaults
|
||||
func SetDefaults_KubeSchedulerConfiguration(obj *configv1.KubeSchedulerConfiguration) {
|
||||
logger := klog.TODO() // called by generated code that doesn't pass a logger. See #115724
|
||||
if obj.Parallelism == nil {
|
||||
obj.Parallelism = ptr.To[int32](16)
|
||||
}
|
||||
|
||||
if len(obj.Profiles) == 0 {
|
||||
obj.Profiles = append(obj.Profiles, configv1.KubeSchedulerProfile{})
|
||||
}
|
||||
// Only apply a default scheduler name when there is a single profile.
|
||||
// Validation will ensure that every profile has a non-empty unique name.
|
||||
if len(obj.Profiles) == 1 && obj.Profiles[0].SchedulerName == nil {
|
||||
obj.Profiles[0].SchedulerName = ptr.To(v1.DefaultSchedulerName)
|
||||
}
|
||||
|
||||
// Add the default set of plugins and apply the configuration.
|
||||
for i := range obj.Profiles {
|
||||
prof := &obj.Profiles[i]
|
||||
setDefaults_KubeSchedulerProfile(logger, prof)
|
||||
}
|
||||
|
||||
if obj.PercentageOfNodesToScore == nil {
|
||||
obj.PercentageOfNodesToScore = ptr.To[int32](config.DefaultPercentageOfNodesToScore)
|
||||
}
|
||||
|
||||
if len(obj.LeaderElection.ResourceLock) == 0 {
|
||||
// Use lease-based leader election to reduce cost.
|
||||
// We migrated for EndpointsLease lock in 1.17 and starting in 1.20 we
|
||||
// migrated to Lease lock.
|
||||
obj.LeaderElection.ResourceLock = "leases"
|
||||
}
|
||||
if len(obj.LeaderElection.ResourceNamespace) == 0 {
|
||||
obj.LeaderElection.ResourceNamespace = configv1.SchedulerDefaultLockObjectNamespace
|
||||
}
|
||||
if len(obj.LeaderElection.ResourceName) == 0 {
|
||||
obj.LeaderElection.ResourceName = configv1.SchedulerDefaultLockObjectName
|
||||
}
|
||||
|
||||
if len(obj.ClientConnection.ContentType) == 0 {
|
||||
obj.ClientConnection.ContentType = "application/vnd.kubernetes.protobuf"
|
||||
}
|
||||
// Scheduler has an opinion about QPS/Burst, setting specific defaults for itself, instead of generic settings.
|
||||
if obj.ClientConnection.QPS == 0.0 {
|
||||
obj.ClientConnection.QPS = 50.0
|
||||
}
|
||||
if obj.ClientConnection.Burst == 0 {
|
||||
obj.ClientConnection.Burst = 100
|
||||
}
|
||||
|
||||
// Use the default LeaderElectionConfiguration options
|
||||
componentbaseconfigv1alpha1.RecommendedDefaultLeaderElectionConfiguration(&obj.LeaderElection)
|
||||
|
||||
if obj.PodInitialBackoffSeconds == nil {
|
||||
obj.PodInitialBackoffSeconds = ptr.To[int64](1)
|
||||
}
|
||||
|
||||
if obj.PodMaxBackoffSeconds == nil {
|
||||
obj.PodMaxBackoffSeconds = ptr.To[int64](10)
|
||||
}
|
||||
|
||||
// Enable profiling by default in the scheduler
|
||||
if obj.EnableProfiling == nil {
|
||||
obj.EnableProfiling = ptr.To(true)
|
||||
}
|
||||
|
||||
// Enable contention profiling by default if profiling is enabled
|
||||
if *obj.EnableProfiling && obj.EnableContentionProfiling == nil {
|
||||
obj.EnableContentionProfiling = ptr.To(true)
|
||||
}
|
||||
}
|
||||
|
||||
func SetDefaults_DefaultPreemptionArgs(obj *configv1.DefaultPreemptionArgs) {
|
||||
if obj.MinCandidateNodesPercentage == nil {
|
||||
obj.MinCandidateNodesPercentage = ptr.To[int32](10)
|
||||
}
|
||||
if obj.MinCandidateNodesAbsolute == nil {
|
||||
obj.MinCandidateNodesAbsolute = ptr.To[int32](100)
|
||||
}
|
||||
}
|
||||
|
||||
func SetDefaults_InterPodAffinityArgs(obj *configv1.InterPodAffinityArgs) {
|
||||
if obj.HardPodAffinityWeight == nil {
|
||||
obj.HardPodAffinityWeight = ptr.To[int32](1)
|
||||
}
|
||||
}
|
||||
|
||||
func SetDefaults_VolumeBindingArgs(obj *configv1.VolumeBindingArgs) {
|
||||
if obj.BindTimeoutSeconds == nil {
|
||||
obj.BindTimeoutSeconds = ptr.To[int64](600)
|
||||
}
|
||||
if len(obj.Shape) == 0 && feature.DefaultFeatureGate.Enabled(features.VolumeCapacityPriority) {
|
||||
obj.Shape = []configv1.UtilizationShapePoint{
|
||||
{
|
||||
Utilization: 0,
|
||||
Score: 0,
|
||||
},
|
||||
{
|
||||
Utilization: 100,
|
||||
Score: int32(config.MaxCustomPriorityScore),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func SetDefaults_NodeResourcesBalancedAllocationArgs(obj *configv1.NodeResourcesBalancedAllocationArgs) {
|
||||
if len(obj.Resources) == 0 {
|
||||
obj.Resources = defaultResourceSpec
|
||||
return
|
||||
}
|
||||
// If the weight is not set or it is explicitly set to 0, then apply the default weight(1) instead.
|
||||
for i := range obj.Resources {
|
||||
if obj.Resources[i].Weight == 0 {
|
||||
obj.Resources[i].Weight = 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func SetDefaults_PodTopologySpreadArgs(obj *configv1.PodTopologySpreadArgs) {
|
||||
if obj.DefaultingType == "" {
|
||||
obj.DefaultingType = configv1.SystemDefaulting
|
||||
}
|
||||
}
|
||||
|
||||
func SetDefaults_NodeResourcesFitArgs(obj *configv1.NodeResourcesFitArgs) {
|
||||
if obj.ScoringStrategy == nil {
|
||||
obj.ScoringStrategy = &configv1.ScoringStrategy{
|
||||
Type: configv1.ScoringStrategyType(config.LeastAllocated),
|
||||
Resources: defaultResourceSpec,
|
||||
}
|
||||
}
|
||||
if len(obj.ScoringStrategy.Resources) == 0 {
|
||||
// If no resources specified, use the default set.
|
||||
obj.ScoringStrategy.Resources = append(obj.ScoringStrategy.Resources, defaultResourceSpec...)
|
||||
}
|
||||
for i := range obj.ScoringStrategy.Resources {
|
||||
if obj.ScoringStrategy.Resources[i].Weight == 0 {
|
||||
obj.ScoringStrategy.Resources[i].Weight = 1
|
||||
}
|
||||
}
|
||||
}
|
24
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/doc.go
generated
vendored
Normal file
24
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/doc.go
generated
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// +k8s:deepcopy-gen=package
|
||||
// +k8s:conversion-gen=k8s.io/kubernetes/pkg/scheduler/apis/config
|
||||
// +k8s:conversion-gen-external-types=k8s.io/kube-scheduler/config/v1
|
||||
// +k8s:defaulter-gen=TypeMeta
|
||||
// +k8s:defaulter-gen-input=k8s.io/kube-scheduler/config/v1
|
||||
// +groupName=kubescheduler.config.k8s.io
|
||||
|
||||
package v1 // import "k8s.io/kubernetes/pkg/scheduler/apis/config/v1"
|
42
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/register.go
generated
vendored
Normal file
42
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/register.go
generated
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
v1 "k8s.io/kube-scheduler/config/v1"
|
||||
)
|
||||
|
||||
// GroupName is the group name used in this package
|
||||
const GroupName = v1.GroupName
|
||||
|
||||
// SchemeGroupVersion is group version used to register these objects
|
||||
var SchemeGroupVersion = v1.SchemeGroupVersion
|
||||
|
||||
var (
|
||||
// localSchemeBuilder extends the SchemeBuilder instance with the external types. In this package,
|
||||
// defaulting and conversion init funcs are registered as well.
|
||||
localSchemeBuilder = &v1.SchemeBuilder
|
||||
// AddToScheme is a global function that registers this API group & version to a scheme
|
||||
AddToScheme = localSchemeBuilder.AddToScheme
|
||||
)
|
||||
|
||||
func init() {
|
||||
// We only register manually written functions here. The registration of the
|
||||
// generated functions takes place in the generated files. The separation
|
||||
// makes the code compile even when the generated files are missing.
|
||||
localSchemeBuilder.Register(addDefaultingFuncs)
|
||||
}
|
946
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/zz_generated.conversion.go
generated
vendored
Normal file
946
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/zz_generated.conversion.go
generated
vendored
Normal file
@ -0,0 +1,946 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Code generated by conversion-gen. DO NOT EDIT.
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
unsafe "unsafe"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
conversion "k8s.io/apimachinery/pkg/conversion"
|
||||
runtime "k8s.io/apimachinery/pkg/runtime"
|
||||
v1alpha1 "k8s.io/component-base/config/v1alpha1"
|
||||
configv1 "k8s.io/kube-scheduler/config/v1"
|
||||
config "k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
)
|
||||
|
||||
func init() {
|
||||
localSchemeBuilder.Register(RegisterConversions)
|
||||
}
|
||||
|
||||
// RegisterConversions adds conversion functions to the given scheme.
|
||||
// Public to allow building arbitrary schemes.
|
||||
func RegisterConversions(s *runtime.Scheme) error {
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.DefaultPreemptionArgs)(nil), (*config.DefaultPreemptionArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_DefaultPreemptionArgs_To_config_DefaultPreemptionArgs(a.(*configv1.DefaultPreemptionArgs), b.(*config.DefaultPreemptionArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.DefaultPreemptionArgs)(nil), (*configv1.DefaultPreemptionArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_DefaultPreemptionArgs_To_v1_DefaultPreemptionArgs(a.(*config.DefaultPreemptionArgs), b.(*configv1.DefaultPreemptionArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.Extender)(nil), (*config.Extender)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_Extender_To_config_Extender(a.(*configv1.Extender), b.(*config.Extender), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.Extender)(nil), (*configv1.Extender)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_Extender_To_v1_Extender(a.(*config.Extender), b.(*configv1.Extender), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.ExtenderManagedResource)(nil), (*config.ExtenderManagedResource)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_ExtenderManagedResource_To_config_ExtenderManagedResource(a.(*configv1.ExtenderManagedResource), b.(*config.ExtenderManagedResource), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.ExtenderManagedResource)(nil), (*configv1.ExtenderManagedResource)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_ExtenderManagedResource_To_v1_ExtenderManagedResource(a.(*config.ExtenderManagedResource), b.(*configv1.ExtenderManagedResource), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.ExtenderTLSConfig)(nil), (*config.ExtenderTLSConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_ExtenderTLSConfig_To_config_ExtenderTLSConfig(a.(*configv1.ExtenderTLSConfig), b.(*config.ExtenderTLSConfig), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.ExtenderTLSConfig)(nil), (*configv1.ExtenderTLSConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_ExtenderTLSConfig_To_v1_ExtenderTLSConfig(a.(*config.ExtenderTLSConfig), b.(*configv1.ExtenderTLSConfig), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.InterPodAffinityArgs)(nil), (*config.InterPodAffinityArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_InterPodAffinityArgs_To_config_InterPodAffinityArgs(a.(*configv1.InterPodAffinityArgs), b.(*config.InterPodAffinityArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.InterPodAffinityArgs)(nil), (*configv1.InterPodAffinityArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_InterPodAffinityArgs_To_v1_InterPodAffinityArgs(a.(*config.InterPodAffinityArgs), b.(*configv1.InterPodAffinityArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.KubeSchedulerProfile)(nil), (*config.KubeSchedulerProfile)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_KubeSchedulerProfile_To_config_KubeSchedulerProfile(a.(*configv1.KubeSchedulerProfile), b.(*config.KubeSchedulerProfile), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.KubeSchedulerProfile)(nil), (*configv1.KubeSchedulerProfile)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_KubeSchedulerProfile_To_v1_KubeSchedulerProfile(a.(*config.KubeSchedulerProfile), b.(*configv1.KubeSchedulerProfile), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.NodeAffinityArgs)(nil), (*config.NodeAffinityArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_NodeAffinityArgs_To_config_NodeAffinityArgs(a.(*configv1.NodeAffinityArgs), b.(*config.NodeAffinityArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.NodeAffinityArgs)(nil), (*configv1.NodeAffinityArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_NodeAffinityArgs_To_v1_NodeAffinityArgs(a.(*config.NodeAffinityArgs), b.(*configv1.NodeAffinityArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.NodeResourcesBalancedAllocationArgs)(nil), (*config.NodeResourcesBalancedAllocationArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_NodeResourcesBalancedAllocationArgs_To_config_NodeResourcesBalancedAllocationArgs(a.(*configv1.NodeResourcesBalancedAllocationArgs), b.(*config.NodeResourcesBalancedAllocationArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.NodeResourcesBalancedAllocationArgs)(nil), (*configv1.NodeResourcesBalancedAllocationArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_NodeResourcesBalancedAllocationArgs_To_v1_NodeResourcesBalancedAllocationArgs(a.(*config.NodeResourcesBalancedAllocationArgs), b.(*configv1.NodeResourcesBalancedAllocationArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.NodeResourcesFitArgs)(nil), (*config.NodeResourcesFitArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_NodeResourcesFitArgs_To_config_NodeResourcesFitArgs(a.(*configv1.NodeResourcesFitArgs), b.(*config.NodeResourcesFitArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.NodeResourcesFitArgs)(nil), (*configv1.NodeResourcesFitArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_NodeResourcesFitArgs_To_v1_NodeResourcesFitArgs(a.(*config.NodeResourcesFitArgs), b.(*configv1.NodeResourcesFitArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.Plugin)(nil), (*config.Plugin)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_Plugin_To_config_Plugin(a.(*configv1.Plugin), b.(*config.Plugin), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.Plugin)(nil), (*configv1.Plugin)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_Plugin_To_v1_Plugin(a.(*config.Plugin), b.(*configv1.Plugin), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.PluginConfig)(nil), (*config.PluginConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_PluginConfig_To_config_PluginConfig(a.(*configv1.PluginConfig), b.(*config.PluginConfig), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.PluginConfig)(nil), (*configv1.PluginConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_PluginConfig_To_v1_PluginConfig(a.(*config.PluginConfig), b.(*configv1.PluginConfig), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.PluginSet)(nil), (*config.PluginSet)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_PluginSet_To_config_PluginSet(a.(*configv1.PluginSet), b.(*config.PluginSet), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.PluginSet)(nil), (*configv1.PluginSet)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_PluginSet_To_v1_PluginSet(a.(*config.PluginSet), b.(*configv1.PluginSet), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.Plugins)(nil), (*config.Plugins)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_Plugins_To_config_Plugins(a.(*configv1.Plugins), b.(*config.Plugins), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.Plugins)(nil), (*configv1.Plugins)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_Plugins_To_v1_Plugins(a.(*config.Plugins), b.(*configv1.Plugins), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.PodTopologySpreadArgs)(nil), (*config.PodTopologySpreadArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_PodTopologySpreadArgs_To_config_PodTopologySpreadArgs(a.(*configv1.PodTopologySpreadArgs), b.(*config.PodTopologySpreadArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.PodTopologySpreadArgs)(nil), (*configv1.PodTopologySpreadArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_PodTopologySpreadArgs_To_v1_PodTopologySpreadArgs(a.(*config.PodTopologySpreadArgs), b.(*configv1.PodTopologySpreadArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.RequestedToCapacityRatioParam)(nil), (*config.RequestedToCapacityRatioParam)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_RequestedToCapacityRatioParam_To_config_RequestedToCapacityRatioParam(a.(*configv1.RequestedToCapacityRatioParam), b.(*config.RequestedToCapacityRatioParam), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.RequestedToCapacityRatioParam)(nil), (*configv1.RequestedToCapacityRatioParam)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_RequestedToCapacityRatioParam_To_v1_RequestedToCapacityRatioParam(a.(*config.RequestedToCapacityRatioParam), b.(*configv1.RequestedToCapacityRatioParam), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.ResourceSpec)(nil), (*config.ResourceSpec)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_ResourceSpec_To_config_ResourceSpec(a.(*configv1.ResourceSpec), b.(*config.ResourceSpec), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.ResourceSpec)(nil), (*configv1.ResourceSpec)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_ResourceSpec_To_v1_ResourceSpec(a.(*config.ResourceSpec), b.(*configv1.ResourceSpec), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.ScoringStrategy)(nil), (*config.ScoringStrategy)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_ScoringStrategy_To_config_ScoringStrategy(a.(*configv1.ScoringStrategy), b.(*config.ScoringStrategy), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.ScoringStrategy)(nil), (*configv1.ScoringStrategy)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_ScoringStrategy_To_v1_ScoringStrategy(a.(*config.ScoringStrategy), b.(*configv1.ScoringStrategy), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.UtilizationShapePoint)(nil), (*config.UtilizationShapePoint)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_UtilizationShapePoint_To_config_UtilizationShapePoint(a.(*configv1.UtilizationShapePoint), b.(*config.UtilizationShapePoint), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.UtilizationShapePoint)(nil), (*configv1.UtilizationShapePoint)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_UtilizationShapePoint_To_v1_UtilizationShapePoint(a.(*config.UtilizationShapePoint), b.(*configv1.UtilizationShapePoint), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*configv1.VolumeBindingArgs)(nil), (*config.VolumeBindingArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_VolumeBindingArgs_To_config_VolumeBindingArgs(a.(*configv1.VolumeBindingArgs), b.(*config.VolumeBindingArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*config.VolumeBindingArgs)(nil), (*configv1.VolumeBindingArgs)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_VolumeBindingArgs_To_v1_VolumeBindingArgs(a.(*config.VolumeBindingArgs), b.(*configv1.VolumeBindingArgs), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddConversionFunc((*config.KubeSchedulerConfiguration)(nil), (*configv1.KubeSchedulerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_config_KubeSchedulerConfiguration_To_v1_KubeSchedulerConfiguration(a.(*config.KubeSchedulerConfiguration), b.(*configv1.KubeSchedulerConfiguration), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddConversionFunc((*configv1.KubeSchedulerConfiguration)(nil), (*config.KubeSchedulerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1_KubeSchedulerConfiguration_To_config_KubeSchedulerConfiguration(a.(*configv1.KubeSchedulerConfiguration), b.(*config.KubeSchedulerConfiguration), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func autoConvert_v1_DefaultPreemptionArgs_To_config_DefaultPreemptionArgs(in *configv1.DefaultPreemptionArgs, out *config.DefaultPreemptionArgs, s conversion.Scope) error {
|
||||
if err := metav1.Convert_Pointer_int32_To_int32(&in.MinCandidateNodesPercentage, &out.MinCandidateNodesPercentage, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := metav1.Convert_Pointer_int32_To_int32(&in.MinCandidateNodesAbsolute, &out.MinCandidateNodesAbsolute, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_DefaultPreemptionArgs_To_config_DefaultPreemptionArgs is an autogenerated conversion function.
|
||||
func Convert_v1_DefaultPreemptionArgs_To_config_DefaultPreemptionArgs(in *configv1.DefaultPreemptionArgs, out *config.DefaultPreemptionArgs, s conversion.Scope) error {
|
||||
return autoConvert_v1_DefaultPreemptionArgs_To_config_DefaultPreemptionArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_DefaultPreemptionArgs_To_v1_DefaultPreemptionArgs(in *config.DefaultPreemptionArgs, out *configv1.DefaultPreemptionArgs, s conversion.Scope) error {
|
||||
if err := metav1.Convert_int32_To_Pointer_int32(&in.MinCandidateNodesPercentage, &out.MinCandidateNodesPercentage, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := metav1.Convert_int32_To_Pointer_int32(&in.MinCandidateNodesAbsolute, &out.MinCandidateNodesAbsolute, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_DefaultPreemptionArgs_To_v1_DefaultPreemptionArgs is an autogenerated conversion function.
|
||||
func Convert_config_DefaultPreemptionArgs_To_v1_DefaultPreemptionArgs(in *config.DefaultPreemptionArgs, out *configv1.DefaultPreemptionArgs, s conversion.Scope) error {
|
||||
return autoConvert_config_DefaultPreemptionArgs_To_v1_DefaultPreemptionArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_Extender_To_config_Extender(in *configv1.Extender, out *config.Extender, s conversion.Scope) error {
|
||||
out.URLPrefix = in.URLPrefix
|
||||
out.FilterVerb = in.FilterVerb
|
||||
out.PreemptVerb = in.PreemptVerb
|
||||
out.PrioritizeVerb = in.PrioritizeVerb
|
||||
out.Weight = in.Weight
|
||||
out.BindVerb = in.BindVerb
|
||||
out.EnableHTTPS = in.EnableHTTPS
|
||||
out.TLSConfig = (*config.ExtenderTLSConfig)(unsafe.Pointer(in.TLSConfig))
|
||||
out.HTTPTimeout = in.HTTPTimeout
|
||||
out.NodeCacheCapable = in.NodeCacheCapable
|
||||
out.ManagedResources = *(*[]config.ExtenderManagedResource)(unsafe.Pointer(&in.ManagedResources))
|
||||
out.Ignorable = in.Ignorable
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_Extender_To_config_Extender is an autogenerated conversion function.
|
||||
func Convert_v1_Extender_To_config_Extender(in *configv1.Extender, out *config.Extender, s conversion.Scope) error {
|
||||
return autoConvert_v1_Extender_To_config_Extender(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_Extender_To_v1_Extender(in *config.Extender, out *configv1.Extender, s conversion.Scope) error {
|
||||
out.URLPrefix = in.URLPrefix
|
||||
out.FilterVerb = in.FilterVerb
|
||||
out.PreemptVerb = in.PreemptVerb
|
||||
out.PrioritizeVerb = in.PrioritizeVerb
|
||||
out.Weight = in.Weight
|
||||
out.BindVerb = in.BindVerb
|
||||
out.EnableHTTPS = in.EnableHTTPS
|
||||
out.TLSConfig = (*configv1.ExtenderTLSConfig)(unsafe.Pointer(in.TLSConfig))
|
||||
out.HTTPTimeout = in.HTTPTimeout
|
||||
out.NodeCacheCapable = in.NodeCacheCapable
|
||||
out.ManagedResources = *(*[]configv1.ExtenderManagedResource)(unsafe.Pointer(&in.ManagedResources))
|
||||
out.Ignorable = in.Ignorable
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_Extender_To_v1_Extender is an autogenerated conversion function.
|
||||
func Convert_config_Extender_To_v1_Extender(in *config.Extender, out *configv1.Extender, s conversion.Scope) error {
|
||||
return autoConvert_config_Extender_To_v1_Extender(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_ExtenderManagedResource_To_config_ExtenderManagedResource(in *configv1.ExtenderManagedResource, out *config.ExtenderManagedResource, s conversion.Scope) error {
|
||||
out.Name = in.Name
|
||||
out.IgnoredByScheduler = in.IgnoredByScheduler
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_ExtenderManagedResource_To_config_ExtenderManagedResource is an autogenerated conversion function.
|
||||
func Convert_v1_ExtenderManagedResource_To_config_ExtenderManagedResource(in *configv1.ExtenderManagedResource, out *config.ExtenderManagedResource, s conversion.Scope) error {
|
||||
return autoConvert_v1_ExtenderManagedResource_To_config_ExtenderManagedResource(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_ExtenderManagedResource_To_v1_ExtenderManagedResource(in *config.ExtenderManagedResource, out *configv1.ExtenderManagedResource, s conversion.Scope) error {
|
||||
out.Name = in.Name
|
||||
out.IgnoredByScheduler = in.IgnoredByScheduler
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_ExtenderManagedResource_To_v1_ExtenderManagedResource is an autogenerated conversion function.
|
||||
func Convert_config_ExtenderManagedResource_To_v1_ExtenderManagedResource(in *config.ExtenderManagedResource, out *configv1.ExtenderManagedResource, s conversion.Scope) error {
|
||||
return autoConvert_config_ExtenderManagedResource_To_v1_ExtenderManagedResource(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_ExtenderTLSConfig_To_config_ExtenderTLSConfig(in *configv1.ExtenderTLSConfig, out *config.ExtenderTLSConfig, s conversion.Scope) error {
|
||||
out.Insecure = in.Insecure
|
||||
out.ServerName = in.ServerName
|
||||
out.CertFile = in.CertFile
|
||||
out.KeyFile = in.KeyFile
|
||||
out.CAFile = in.CAFile
|
||||
out.CertData = *(*[]byte)(unsafe.Pointer(&in.CertData))
|
||||
out.KeyData = *(*[]byte)(unsafe.Pointer(&in.KeyData))
|
||||
out.CAData = *(*[]byte)(unsafe.Pointer(&in.CAData))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_ExtenderTLSConfig_To_config_ExtenderTLSConfig is an autogenerated conversion function.
|
||||
func Convert_v1_ExtenderTLSConfig_To_config_ExtenderTLSConfig(in *configv1.ExtenderTLSConfig, out *config.ExtenderTLSConfig, s conversion.Scope) error {
|
||||
return autoConvert_v1_ExtenderTLSConfig_To_config_ExtenderTLSConfig(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_ExtenderTLSConfig_To_v1_ExtenderTLSConfig(in *config.ExtenderTLSConfig, out *configv1.ExtenderTLSConfig, s conversion.Scope) error {
|
||||
out.Insecure = in.Insecure
|
||||
out.ServerName = in.ServerName
|
||||
out.CertFile = in.CertFile
|
||||
out.KeyFile = in.KeyFile
|
||||
out.CAFile = in.CAFile
|
||||
out.CertData = *(*[]byte)(unsafe.Pointer(&in.CertData))
|
||||
out.KeyData = *(*[]byte)(unsafe.Pointer(&in.KeyData))
|
||||
out.CAData = *(*[]byte)(unsafe.Pointer(&in.CAData))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_ExtenderTLSConfig_To_v1_ExtenderTLSConfig is an autogenerated conversion function.
|
||||
func Convert_config_ExtenderTLSConfig_To_v1_ExtenderTLSConfig(in *config.ExtenderTLSConfig, out *configv1.ExtenderTLSConfig, s conversion.Scope) error {
|
||||
return autoConvert_config_ExtenderTLSConfig_To_v1_ExtenderTLSConfig(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_InterPodAffinityArgs_To_config_InterPodAffinityArgs(in *configv1.InterPodAffinityArgs, out *config.InterPodAffinityArgs, s conversion.Scope) error {
|
||||
if err := metav1.Convert_Pointer_int32_To_int32(&in.HardPodAffinityWeight, &out.HardPodAffinityWeight, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.IgnorePreferredTermsOfExistingPods = in.IgnorePreferredTermsOfExistingPods
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_InterPodAffinityArgs_To_config_InterPodAffinityArgs is an autogenerated conversion function.
|
||||
func Convert_v1_InterPodAffinityArgs_To_config_InterPodAffinityArgs(in *configv1.InterPodAffinityArgs, out *config.InterPodAffinityArgs, s conversion.Scope) error {
|
||||
return autoConvert_v1_InterPodAffinityArgs_To_config_InterPodAffinityArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_InterPodAffinityArgs_To_v1_InterPodAffinityArgs(in *config.InterPodAffinityArgs, out *configv1.InterPodAffinityArgs, s conversion.Scope) error {
|
||||
if err := metav1.Convert_int32_To_Pointer_int32(&in.HardPodAffinityWeight, &out.HardPodAffinityWeight, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.IgnorePreferredTermsOfExistingPods = in.IgnorePreferredTermsOfExistingPods
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_InterPodAffinityArgs_To_v1_InterPodAffinityArgs is an autogenerated conversion function.
|
||||
func Convert_config_InterPodAffinityArgs_To_v1_InterPodAffinityArgs(in *config.InterPodAffinityArgs, out *configv1.InterPodAffinityArgs, s conversion.Scope) error {
|
||||
return autoConvert_config_InterPodAffinityArgs_To_v1_InterPodAffinityArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_KubeSchedulerConfiguration_To_config_KubeSchedulerConfiguration(in *configv1.KubeSchedulerConfiguration, out *config.KubeSchedulerConfiguration, s conversion.Scope) error {
|
||||
if err := metav1.Convert_Pointer_int32_To_int32(&in.Parallelism, &out.Parallelism, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v1alpha1.Convert_v1alpha1_LeaderElectionConfiguration_To_config_LeaderElectionConfiguration(&in.LeaderElection, &out.LeaderElection, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v1alpha1.Convert_v1alpha1_ClientConnectionConfiguration_To_config_ClientConnectionConfiguration(&in.ClientConnection, &out.ClientConnection, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v1alpha1.Convert_v1alpha1_DebuggingConfiguration_To_config_DebuggingConfiguration(&in.DebuggingConfiguration, &out.DebuggingConfiguration, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.PercentageOfNodesToScore = (*int32)(unsafe.Pointer(in.PercentageOfNodesToScore))
|
||||
if err := metav1.Convert_Pointer_int64_To_int64(&in.PodInitialBackoffSeconds, &out.PodInitialBackoffSeconds, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := metav1.Convert_Pointer_int64_To_int64(&in.PodMaxBackoffSeconds, &out.PodMaxBackoffSeconds, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if in.Profiles != nil {
|
||||
in, out := &in.Profiles, &out.Profiles
|
||||
*out = make([]config.KubeSchedulerProfile, len(*in))
|
||||
for i := range *in {
|
||||
if err := Convert_v1_KubeSchedulerProfile_To_config_KubeSchedulerProfile(&(*in)[i], &(*out)[i], s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out.Profiles = nil
|
||||
}
|
||||
out.Extenders = *(*[]config.Extender)(unsafe.Pointer(&in.Extenders))
|
||||
out.DelayCacheUntilActive = in.DelayCacheUntilActive
|
||||
return nil
|
||||
}
|
||||
|
||||
func autoConvert_config_KubeSchedulerConfiguration_To_v1_KubeSchedulerConfiguration(in *config.KubeSchedulerConfiguration, out *configv1.KubeSchedulerConfiguration, s conversion.Scope) error {
|
||||
if err := metav1.Convert_int32_To_Pointer_int32(&in.Parallelism, &out.Parallelism, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v1alpha1.Convert_config_LeaderElectionConfiguration_To_v1alpha1_LeaderElectionConfiguration(&in.LeaderElection, &out.LeaderElection, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v1alpha1.Convert_config_ClientConnectionConfiguration_To_v1alpha1_ClientConnectionConfiguration(&in.ClientConnection, &out.ClientConnection, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v1alpha1.Convert_config_DebuggingConfiguration_To_v1alpha1_DebuggingConfiguration(&in.DebuggingConfiguration, &out.DebuggingConfiguration, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.PercentageOfNodesToScore = (*int32)(unsafe.Pointer(in.PercentageOfNodesToScore))
|
||||
if err := metav1.Convert_int64_To_Pointer_int64(&in.PodInitialBackoffSeconds, &out.PodInitialBackoffSeconds, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := metav1.Convert_int64_To_Pointer_int64(&in.PodMaxBackoffSeconds, &out.PodMaxBackoffSeconds, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if in.Profiles != nil {
|
||||
in, out := &in.Profiles, &out.Profiles
|
||||
*out = make([]configv1.KubeSchedulerProfile, len(*in))
|
||||
for i := range *in {
|
||||
if err := Convert_config_KubeSchedulerProfile_To_v1_KubeSchedulerProfile(&(*in)[i], &(*out)[i], s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out.Profiles = nil
|
||||
}
|
||||
out.Extenders = *(*[]configv1.Extender)(unsafe.Pointer(&in.Extenders))
|
||||
out.DelayCacheUntilActive = in.DelayCacheUntilActive
|
||||
return nil
|
||||
}
|
||||
|
||||
func autoConvert_v1_KubeSchedulerProfile_To_config_KubeSchedulerProfile(in *configv1.KubeSchedulerProfile, out *config.KubeSchedulerProfile, s conversion.Scope) error {
|
||||
if err := metav1.Convert_Pointer_string_To_string(&in.SchedulerName, &out.SchedulerName, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.PercentageOfNodesToScore = (*int32)(unsafe.Pointer(in.PercentageOfNodesToScore))
|
||||
if in.Plugins != nil {
|
||||
in, out := &in.Plugins, &out.Plugins
|
||||
*out = new(config.Plugins)
|
||||
if err := Convert_v1_Plugins_To_config_Plugins(*in, *out, s); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
out.Plugins = nil
|
||||
}
|
||||
if in.PluginConfig != nil {
|
||||
in, out := &in.PluginConfig, &out.PluginConfig
|
||||
*out = make([]config.PluginConfig, len(*in))
|
||||
for i := range *in {
|
||||
if err := Convert_v1_PluginConfig_To_config_PluginConfig(&(*in)[i], &(*out)[i], s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out.PluginConfig = nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_KubeSchedulerProfile_To_config_KubeSchedulerProfile is an autogenerated conversion function.
|
||||
func Convert_v1_KubeSchedulerProfile_To_config_KubeSchedulerProfile(in *configv1.KubeSchedulerProfile, out *config.KubeSchedulerProfile, s conversion.Scope) error {
|
||||
return autoConvert_v1_KubeSchedulerProfile_To_config_KubeSchedulerProfile(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_KubeSchedulerProfile_To_v1_KubeSchedulerProfile(in *config.KubeSchedulerProfile, out *configv1.KubeSchedulerProfile, s conversion.Scope) error {
|
||||
if err := metav1.Convert_string_To_Pointer_string(&in.SchedulerName, &out.SchedulerName, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.PercentageOfNodesToScore = (*int32)(unsafe.Pointer(in.PercentageOfNodesToScore))
|
||||
if in.Plugins != nil {
|
||||
in, out := &in.Plugins, &out.Plugins
|
||||
*out = new(configv1.Plugins)
|
||||
if err := Convert_config_Plugins_To_v1_Plugins(*in, *out, s); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
out.Plugins = nil
|
||||
}
|
||||
if in.PluginConfig != nil {
|
||||
in, out := &in.PluginConfig, &out.PluginConfig
|
||||
*out = make([]configv1.PluginConfig, len(*in))
|
||||
for i := range *in {
|
||||
if err := Convert_config_PluginConfig_To_v1_PluginConfig(&(*in)[i], &(*out)[i], s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out.PluginConfig = nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_KubeSchedulerProfile_To_v1_KubeSchedulerProfile is an autogenerated conversion function.
|
||||
func Convert_config_KubeSchedulerProfile_To_v1_KubeSchedulerProfile(in *config.KubeSchedulerProfile, out *configv1.KubeSchedulerProfile, s conversion.Scope) error {
|
||||
return autoConvert_config_KubeSchedulerProfile_To_v1_KubeSchedulerProfile(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_NodeAffinityArgs_To_config_NodeAffinityArgs(in *configv1.NodeAffinityArgs, out *config.NodeAffinityArgs, s conversion.Scope) error {
|
||||
out.AddedAffinity = (*corev1.NodeAffinity)(unsafe.Pointer(in.AddedAffinity))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_NodeAffinityArgs_To_config_NodeAffinityArgs is an autogenerated conversion function.
|
||||
func Convert_v1_NodeAffinityArgs_To_config_NodeAffinityArgs(in *configv1.NodeAffinityArgs, out *config.NodeAffinityArgs, s conversion.Scope) error {
|
||||
return autoConvert_v1_NodeAffinityArgs_To_config_NodeAffinityArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_NodeAffinityArgs_To_v1_NodeAffinityArgs(in *config.NodeAffinityArgs, out *configv1.NodeAffinityArgs, s conversion.Scope) error {
|
||||
out.AddedAffinity = (*corev1.NodeAffinity)(unsafe.Pointer(in.AddedAffinity))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_NodeAffinityArgs_To_v1_NodeAffinityArgs is an autogenerated conversion function.
|
||||
func Convert_config_NodeAffinityArgs_To_v1_NodeAffinityArgs(in *config.NodeAffinityArgs, out *configv1.NodeAffinityArgs, s conversion.Scope) error {
|
||||
return autoConvert_config_NodeAffinityArgs_To_v1_NodeAffinityArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_NodeResourcesBalancedAllocationArgs_To_config_NodeResourcesBalancedAllocationArgs(in *configv1.NodeResourcesBalancedAllocationArgs, out *config.NodeResourcesBalancedAllocationArgs, s conversion.Scope) error {
|
||||
out.Resources = *(*[]config.ResourceSpec)(unsafe.Pointer(&in.Resources))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_NodeResourcesBalancedAllocationArgs_To_config_NodeResourcesBalancedAllocationArgs is an autogenerated conversion function.
|
||||
func Convert_v1_NodeResourcesBalancedAllocationArgs_To_config_NodeResourcesBalancedAllocationArgs(in *configv1.NodeResourcesBalancedAllocationArgs, out *config.NodeResourcesBalancedAllocationArgs, s conversion.Scope) error {
|
||||
return autoConvert_v1_NodeResourcesBalancedAllocationArgs_To_config_NodeResourcesBalancedAllocationArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_NodeResourcesBalancedAllocationArgs_To_v1_NodeResourcesBalancedAllocationArgs(in *config.NodeResourcesBalancedAllocationArgs, out *configv1.NodeResourcesBalancedAllocationArgs, s conversion.Scope) error {
|
||||
out.Resources = *(*[]configv1.ResourceSpec)(unsafe.Pointer(&in.Resources))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_NodeResourcesBalancedAllocationArgs_To_v1_NodeResourcesBalancedAllocationArgs is an autogenerated conversion function.
|
||||
func Convert_config_NodeResourcesBalancedAllocationArgs_To_v1_NodeResourcesBalancedAllocationArgs(in *config.NodeResourcesBalancedAllocationArgs, out *configv1.NodeResourcesBalancedAllocationArgs, s conversion.Scope) error {
|
||||
return autoConvert_config_NodeResourcesBalancedAllocationArgs_To_v1_NodeResourcesBalancedAllocationArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_NodeResourcesFitArgs_To_config_NodeResourcesFitArgs(in *configv1.NodeResourcesFitArgs, out *config.NodeResourcesFitArgs, s conversion.Scope) error {
|
||||
out.IgnoredResources = *(*[]string)(unsafe.Pointer(&in.IgnoredResources))
|
||||
out.IgnoredResourceGroups = *(*[]string)(unsafe.Pointer(&in.IgnoredResourceGroups))
|
||||
out.ScoringStrategy = (*config.ScoringStrategy)(unsafe.Pointer(in.ScoringStrategy))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_NodeResourcesFitArgs_To_config_NodeResourcesFitArgs is an autogenerated conversion function.
|
||||
func Convert_v1_NodeResourcesFitArgs_To_config_NodeResourcesFitArgs(in *configv1.NodeResourcesFitArgs, out *config.NodeResourcesFitArgs, s conversion.Scope) error {
|
||||
return autoConvert_v1_NodeResourcesFitArgs_To_config_NodeResourcesFitArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_NodeResourcesFitArgs_To_v1_NodeResourcesFitArgs(in *config.NodeResourcesFitArgs, out *configv1.NodeResourcesFitArgs, s conversion.Scope) error {
|
||||
out.IgnoredResources = *(*[]string)(unsafe.Pointer(&in.IgnoredResources))
|
||||
out.IgnoredResourceGroups = *(*[]string)(unsafe.Pointer(&in.IgnoredResourceGroups))
|
||||
out.ScoringStrategy = (*configv1.ScoringStrategy)(unsafe.Pointer(in.ScoringStrategy))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_NodeResourcesFitArgs_To_v1_NodeResourcesFitArgs is an autogenerated conversion function.
|
||||
func Convert_config_NodeResourcesFitArgs_To_v1_NodeResourcesFitArgs(in *config.NodeResourcesFitArgs, out *configv1.NodeResourcesFitArgs, s conversion.Scope) error {
|
||||
return autoConvert_config_NodeResourcesFitArgs_To_v1_NodeResourcesFitArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_Plugin_To_config_Plugin(in *configv1.Plugin, out *config.Plugin, s conversion.Scope) error {
|
||||
out.Name = in.Name
|
||||
if err := metav1.Convert_Pointer_int32_To_int32(&in.Weight, &out.Weight, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_Plugin_To_config_Plugin is an autogenerated conversion function.
|
||||
func Convert_v1_Plugin_To_config_Plugin(in *configv1.Plugin, out *config.Plugin, s conversion.Scope) error {
|
||||
return autoConvert_v1_Plugin_To_config_Plugin(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_Plugin_To_v1_Plugin(in *config.Plugin, out *configv1.Plugin, s conversion.Scope) error {
|
||||
out.Name = in.Name
|
||||
if err := metav1.Convert_int32_To_Pointer_int32(&in.Weight, &out.Weight, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_Plugin_To_v1_Plugin is an autogenerated conversion function.
|
||||
func Convert_config_Plugin_To_v1_Plugin(in *config.Plugin, out *configv1.Plugin, s conversion.Scope) error {
|
||||
return autoConvert_config_Plugin_To_v1_Plugin(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_PluginConfig_To_config_PluginConfig(in *configv1.PluginConfig, out *config.PluginConfig, s conversion.Scope) error {
|
||||
out.Name = in.Name
|
||||
if err := runtime.Convert_runtime_RawExtension_To_runtime_Object(&in.Args, &out.Args, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_PluginConfig_To_config_PluginConfig is an autogenerated conversion function.
|
||||
func Convert_v1_PluginConfig_To_config_PluginConfig(in *configv1.PluginConfig, out *config.PluginConfig, s conversion.Scope) error {
|
||||
return autoConvert_v1_PluginConfig_To_config_PluginConfig(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_PluginConfig_To_v1_PluginConfig(in *config.PluginConfig, out *configv1.PluginConfig, s conversion.Scope) error {
|
||||
out.Name = in.Name
|
||||
if err := runtime.Convert_runtime_Object_To_runtime_RawExtension(&in.Args, &out.Args, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_PluginConfig_To_v1_PluginConfig is an autogenerated conversion function.
|
||||
func Convert_config_PluginConfig_To_v1_PluginConfig(in *config.PluginConfig, out *configv1.PluginConfig, s conversion.Scope) error {
|
||||
return autoConvert_config_PluginConfig_To_v1_PluginConfig(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_PluginSet_To_config_PluginSet(in *configv1.PluginSet, out *config.PluginSet, s conversion.Scope) error {
|
||||
if in.Enabled != nil {
|
||||
in, out := &in.Enabled, &out.Enabled
|
||||
*out = make([]config.Plugin, len(*in))
|
||||
for i := range *in {
|
||||
if err := Convert_v1_Plugin_To_config_Plugin(&(*in)[i], &(*out)[i], s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out.Enabled = nil
|
||||
}
|
||||
if in.Disabled != nil {
|
||||
in, out := &in.Disabled, &out.Disabled
|
||||
*out = make([]config.Plugin, len(*in))
|
||||
for i := range *in {
|
||||
if err := Convert_v1_Plugin_To_config_Plugin(&(*in)[i], &(*out)[i], s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out.Disabled = nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_PluginSet_To_config_PluginSet is an autogenerated conversion function.
|
||||
func Convert_v1_PluginSet_To_config_PluginSet(in *configv1.PluginSet, out *config.PluginSet, s conversion.Scope) error {
|
||||
return autoConvert_v1_PluginSet_To_config_PluginSet(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_PluginSet_To_v1_PluginSet(in *config.PluginSet, out *configv1.PluginSet, s conversion.Scope) error {
|
||||
if in.Enabled != nil {
|
||||
in, out := &in.Enabled, &out.Enabled
|
||||
*out = make([]configv1.Plugin, len(*in))
|
||||
for i := range *in {
|
||||
if err := Convert_config_Plugin_To_v1_Plugin(&(*in)[i], &(*out)[i], s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out.Enabled = nil
|
||||
}
|
||||
if in.Disabled != nil {
|
||||
in, out := &in.Disabled, &out.Disabled
|
||||
*out = make([]configv1.Plugin, len(*in))
|
||||
for i := range *in {
|
||||
if err := Convert_config_Plugin_To_v1_Plugin(&(*in)[i], &(*out)[i], s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
out.Disabled = nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_PluginSet_To_v1_PluginSet is an autogenerated conversion function.
|
||||
func Convert_config_PluginSet_To_v1_PluginSet(in *config.PluginSet, out *configv1.PluginSet, s conversion.Scope) error {
|
||||
return autoConvert_config_PluginSet_To_v1_PluginSet(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_Plugins_To_config_Plugins(in *configv1.Plugins, out *config.Plugins, s conversion.Scope) error {
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.PreEnqueue, &out.PreEnqueue, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.QueueSort, &out.QueueSort, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.PreFilter, &out.PreFilter, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.Filter, &out.Filter, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.PostFilter, &out.PostFilter, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.PreScore, &out.PreScore, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.Score, &out.Score, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.Reserve, &out.Reserve, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.Permit, &out.Permit, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.PreBind, &out.PreBind, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.Bind, &out.Bind, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.PostBind, &out.PostBind, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_v1_PluginSet_To_config_PluginSet(&in.MultiPoint, &out.MultiPoint, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_Plugins_To_config_Plugins is an autogenerated conversion function.
|
||||
func Convert_v1_Plugins_To_config_Plugins(in *configv1.Plugins, out *config.Plugins, s conversion.Scope) error {
|
||||
return autoConvert_v1_Plugins_To_config_Plugins(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_Plugins_To_v1_Plugins(in *config.Plugins, out *configv1.Plugins, s conversion.Scope) error {
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.PreEnqueue, &out.PreEnqueue, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.QueueSort, &out.QueueSort, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.PreFilter, &out.PreFilter, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.Filter, &out.Filter, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.PostFilter, &out.PostFilter, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.PreScore, &out.PreScore, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.Score, &out.Score, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.Reserve, &out.Reserve, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.Permit, &out.Permit, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.PreBind, &out.PreBind, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.Bind, &out.Bind, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.PostBind, &out.PostBind, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := Convert_config_PluginSet_To_v1_PluginSet(&in.MultiPoint, &out.MultiPoint, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_Plugins_To_v1_Plugins is an autogenerated conversion function.
|
||||
func Convert_config_Plugins_To_v1_Plugins(in *config.Plugins, out *configv1.Plugins, s conversion.Scope) error {
|
||||
return autoConvert_config_Plugins_To_v1_Plugins(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_PodTopologySpreadArgs_To_config_PodTopologySpreadArgs(in *configv1.PodTopologySpreadArgs, out *config.PodTopologySpreadArgs, s conversion.Scope) error {
|
||||
out.DefaultConstraints = *(*[]corev1.TopologySpreadConstraint)(unsafe.Pointer(&in.DefaultConstraints))
|
||||
out.DefaultingType = config.PodTopologySpreadConstraintsDefaulting(in.DefaultingType)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_PodTopologySpreadArgs_To_config_PodTopologySpreadArgs is an autogenerated conversion function.
|
||||
func Convert_v1_PodTopologySpreadArgs_To_config_PodTopologySpreadArgs(in *configv1.PodTopologySpreadArgs, out *config.PodTopologySpreadArgs, s conversion.Scope) error {
|
||||
return autoConvert_v1_PodTopologySpreadArgs_To_config_PodTopologySpreadArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_PodTopologySpreadArgs_To_v1_PodTopologySpreadArgs(in *config.PodTopologySpreadArgs, out *configv1.PodTopologySpreadArgs, s conversion.Scope) error {
|
||||
out.DefaultConstraints = *(*[]corev1.TopologySpreadConstraint)(unsafe.Pointer(&in.DefaultConstraints))
|
||||
out.DefaultingType = configv1.PodTopologySpreadConstraintsDefaulting(in.DefaultingType)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_PodTopologySpreadArgs_To_v1_PodTopologySpreadArgs is an autogenerated conversion function.
|
||||
func Convert_config_PodTopologySpreadArgs_To_v1_PodTopologySpreadArgs(in *config.PodTopologySpreadArgs, out *configv1.PodTopologySpreadArgs, s conversion.Scope) error {
|
||||
return autoConvert_config_PodTopologySpreadArgs_To_v1_PodTopologySpreadArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_RequestedToCapacityRatioParam_To_config_RequestedToCapacityRatioParam(in *configv1.RequestedToCapacityRatioParam, out *config.RequestedToCapacityRatioParam, s conversion.Scope) error {
|
||||
out.Shape = *(*[]config.UtilizationShapePoint)(unsafe.Pointer(&in.Shape))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_RequestedToCapacityRatioParam_To_config_RequestedToCapacityRatioParam is an autogenerated conversion function.
|
||||
func Convert_v1_RequestedToCapacityRatioParam_To_config_RequestedToCapacityRatioParam(in *configv1.RequestedToCapacityRatioParam, out *config.RequestedToCapacityRatioParam, s conversion.Scope) error {
|
||||
return autoConvert_v1_RequestedToCapacityRatioParam_To_config_RequestedToCapacityRatioParam(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_RequestedToCapacityRatioParam_To_v1_RequestedToCapacityRatioParam(in *config.RequestedToCapacityRatioParam, out *configv1.RequestedToCapacityRatioParam, s conversion.Scope) error {
|
||||
out.Shape = *(*[]configv1.UtilizationShapePoint)(unsafe.Pointer(&in.Shape))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_RequestedToCapacityRatioParam_To_v1_RequestedToCapacityRatioParam is an autogenerated conversion function.
|
||||
func Convert_config_RequestedToCapacityRatioParam_To_v1_RequestedToCapacityRatioParam(in *config.RequestedToCapacityRatioParam, out *configv1.RequestedToCapacityRatioParam, s conversion.Scope) error {
|
||||
return autoConvert_config_RequestedToCapacityRatioParam_To_v1_RequestedToCapacityRatioParam(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_ResourceSpec_To_config_ResourceSpec(in *configv1.ResourceSpec, out *config.ResourceSpec, s conversion.Scope) error {
|
||||
out.Name = in.Name
|
||||
out.Weight = in.Weight
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_ResourceSpec_To_config_ResourceSpec is an autogenerated conversion function.
|
||||
func Convert_v1_ResourceSpec_To_config_ResourceSpec(in *configv1.ResourceSpec, out *config.ResourceSpec, s conversion.Scope) error {
|
||||
return autoConvert_v1_ResourceSpec_To_config_ResourceSpec(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_ResourceSpec_To_v1_ResourceSpec(in *config.ResourceSpec, out *configv1.ResourceSpec, s conversion.Scope) error {
|
||||
out.Name = in.Name
|
||||
out.Weight = in.Weight
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_ResourceSpec_To_v1_ResourceSpec is an autogenerated conversion function.
|
||||
func Convert_config_ResourceSpec_To_v1_ResourceSpec(in *config.ResourceSpec, out *configv1.ResourceSpec, s conversion.Scope) error {
|
||||
return autoConvert_config_ResourceSpec_To_v1_ResourceSpec(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_ScoringStrategy_To_config_ScoringStrategy(in *configv1.ScoringStrategy, out *config.ScoringStrategy, s conversion.Scope) error {
|
||||
out.Type = config.ScoringStrategyType(in.Type)
|
||||
out.Resources = *(*[]config.ResourceSpec)(unsafe.Pointer(&in.Resources))
|
||||
out.RequestedToCapacityRatio = (*config.RequestedToCapacityRatioParam)(unsafe.Pointer(in.RequestedToCapacityRatio))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_ScoringStrategy_To_config_ScoringStrategy is an autogenerated conversion function.
|
||||
func Convert_v1_ScoringStrategy_To_config_ScoringStrategy(in *configv1.ScoringStrategy, out *config.ScoringStrategy, s conversion.Scope) error {
|
||||
return autoConvert_v1_ScoringStrategy_To_config_ScoringStrategy(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_ScoringStrategy_To_v1_ScoringStrategy(in *config.ScoringStrategy, out *configv1.ScoringStrategy, s conversion.Scope) error {
|
||||
out.Type = configv1.ScoringStrategyType(in.Type)
|
||||
out.Resources = *(*[]configv1.ResourceSpec)(unsafe.Pointer(&in.Resources))
|
||||
out.RequestedToCapacityRatio = (*configv1.RequestedToCapacityRatioParam)(unsafe.Pointer(in.RequestedToCapacityRatio))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_ScoringStrategy_To_v1_ScoringStrategy is an autogenerated conversion function.
|
||||
func Convert_config_ScoringStrategy_To_v1_ScoringStrategy(in *config.ScoringStrategy, out *configv1.ScoringStrategy, s conversion.Scope) error {
|
||||
return autoConvert_config_ScoringStrategy_To_v1_ScoringStrategy(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_UtilizationShapePoint_To_config_UtilizationShapePoint(in *configv1.UtilizationShapePoint, out *config.UtilizationShapePoint, s conversion.Scope) error {
|
||||
out.Utilization = in.Utilization
|
||||
out.Score = in.Score
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_UtilizationShapePoint_To_config_UtilizationShapePoint is an autogenerated conversion function.
|
||||
func Convert_v1_UtilizationShapePoint_To_config_UtilizationShapePoint(in *configv1.UtilizationShapePoint, out *config.UtilizationShapePoint, s conversion.Scope) error {
|
||||
return autoConvert_v1_UtilizationShapePoint_To_config_UtilizationShapePoint(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_UtilizationShapePoint_To_v1_UtilizationShapePoint(in *config.UtilizationShapePoint, out *configv1.UtilizationShapePoint, s conversion.Scope) error {
|
||||
out.Utilization = in.Utilization
|
||||
out.Score = in.Score
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_UtilizationShapePoint_To_v1_UtilizationShapePoint is an autogenerated conversion function.
|
||||
func Convert_config_UtilizationShapePoint_To_v1_UtilizationShapePoint(in *config.UtilizationShapePoint, out *configv1.UtilizationShapePoint, s conversion.Scope) error {
|
||||
return autoConvert_config_UtilizationShapePoint_To_v1_UtilizationShapePoint(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1_VolumeBindingArgs_To_config_VolumeBindingArgs(in *configv1.VolumeBindingArgs, out *config.VolumeBindingArgs, s conversion.Scope) error {
|
||||
if err := metav1.Convert_Pointer_int64_To_int64(&in.BindTimeoutSeconds, &out.BindTimeoutSeconds, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.Shape = *(*[]config.UtilizationShapePoint)(unsafe.Pointer(&in.Shape))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1_VolumeBindingArgs_To_config_VolumeBindingArgs is an autogenerated conversion function.
|
||||
func Convert_v1_VolumeBindingArgs_To_config_VolumeBindingArgs(in *configv1.VolumeBindingArgs, out *config.VolumeBindingArgs, s conversion.Scope) error {
|
||||
return autoConvert_v1_VolumeBindingArgs_To_config_VolumeBindingArgs(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_config_VolumeBindingArgs_To_v1_VolumeBindingArgs(in *config.VolumeBindingArgs, out *configv1.VolumeBindingArgs, s conversion.Scope) error {
|
||||
if err := metav1.Convert_int64_To_Pointer_int64(&in.BindTimeoutSeconds, &out.BindTimeoutSeconds, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.Shape = *(*[]configv1.UtilizationShapePoint)(unsafe.Pointer(&in.Shape))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_config_VolumeBindingArgs_To_v1_VolumeBindingArgs is an autogenerated conversion function.
|
||||
func Convert_config_VolumeBindingArgs_To_v1_VolumeBindingArgs(in *config.VolumeBindingArgs, out *configv1.VolumeBindingArgs, s conversion.Scope) error {
|
||||
return autoConvert_config_VolumeBindingArgs_To_v1_VolumeBindingArgs(in, out, s)
|
||||
}
|
22
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/zz_generated.deepcopy.go
generated
vendored
Normal file
22
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/zz_generated.deepcopy.go
generated
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Code generated by deepcopy-gen. DO NOT EDIT.
|
||||
|
||||
package v1
|
73
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/zz_generated.defaults.go
generated
vendored
Normal file
73
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/v1/zz_generated.defaults.go
generated
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Code generated by defaulter-gen. DO NOT EDIT.
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
runtime "k8s.io/apimachinery/pkg/runtime"
|
||||
configv1 "k8s.io/kube-scheduler/config/v1"
|
||||
)
|
||||
|
||||
// RegisterDefaults adds defaulters functions to the given scheme.
|
||||
// Public to allow building arbitrary schemes.
|
||||
// All generated defaulters are covering - they call all nested defaulters.
|
||||
func RegisterDefaults(scheme *runtime.Scheme) error {
|
||||
scheme.AddTypeDefaultingFunc(&configv1.DefaultPreemptionArgs{}, func(obj interface{}) { SetObjectDefaults_DefaultPreemptionArgs(obj.(*configv1.DefaultPreemptionArgs)) })
|
||||
scheme.AddTypeDefaultingFunc(&configv1.InterPodAffinityArgs{}, func(obj interface{}) { SetObjectDefaults_InterPodAffinityArgs(obj.(*configv1.InterPodAffinityArgs)) })
|
||||
scheme.AddTypeDefaultingFunc(&configv1.KubeSchedulerConfiguration{}, func(obj interface{}) {
|
||||
SetObjectDefaults_KubeSchedulerConfiguration(obj.(*configv1.KubeSchedulerConfiguration))
|
||||
})
|
||||
scheme.AddTypeDefaultingFunc(&configv1.NodeResourcesBalancedAllocationArgs{}, func(obj interface{}) {
|
||||
SetObjectDefaults_NodeResourcesBalancedAllocationArgs(obj.(*configv1.NodeResourcesBalancedAllocationArgs))
|
||||
})
|
||||
scheme.AddTypeDefaultingFunc(&configv1.NodeResourcesFitArgs{}, func(obj interface{}) { SetObjectDefaults_NodeResourcesFitArgs(obj.(*configv1.NodeResourcesFitArgs)) })
|
||||
scheme.AddTypeDefaultingFunc(&configv1.PodTopologySpreadArgs{}, func(obj interface{}) { SetObjectDefaults_PodTopologySpreadArgs(obj.(*configv1.PodTopologySpreadArgs)) })
|
||||
scheme.AddTypeDefaultingFunc(&configv1.VolumeBindingArgs{}, func(obj interface{}) { SetObjectDefaults_VolumeBindingArgs(obj.(*configv1.VolumeBindingArgs)) })
|
||||
return nil
|
||||
}
|
||||
|
||||
func SetObjectDefaults_DefaultPreemptionArgs(in *configv1.DefaultPreemptionArgs) {
|
||||
SetDefaults_DefaultPreemptionArgs(in)
|
||||
}
|
||||
|
||||
func SetObjectDefaults_InterPodAffinityArgs(in *configv1.InterPodAffinityArgs) {
|
||||
SetDefaults_InterPodAffinityArgs(in)
|
||||
}
|
||||
|
||||
func SetObjectDefaults_KubeSchedulerConfiguration(in *configv1.KubeSchedulerConfiguration) {
|
||||
SetDefaults_KubeSchedulerConfiguration(in)
|
||||
}
|
||||
|
||||
func SetObjectDefaults_NodeResourcesBalancedAllocationArgs(in *configv1.NodeResourcesBalancedAllocationArgs) {
|
||||
SetDefaults_NodeResourcesBalancedAllocationArgs(in)
|
||||
}
|
||||
|
||||
func SetObjectDefaults_NodeResourcesFitArgs(in *configv1.NodeResourcesFitArgs) {
|
||||
SetDefaults_NodeResourcesFitArgs(in)
|
||||
}
|
||||
|
||||
func SetObjectDefaults_PodTopologySpreadArgs(in *configv1.PodTopologySpreadArgs) {
|
||||
SetDefaults_PodTopologySpreadArgs(in)
|
||||
}
|
||||
|
||||
func SetObjectDefaults_VolumeBindingArgs(in *configv1.VolumeBindingArgs) {
|
||||
SetDefaults_VolumeBindingArgs(in)
|
||||
}
|
296
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/validation/validation.go
generated
vendored
Normal file
296
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/validation/validation.go
generated
vendored
Normal file
@ -0,0 +1,296 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package validation
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/validation"
|
||||
"k8s.io/apimachinery/pkg/util/validation/field"
|
||||
componentbasevalidation "k8s.io/component-base/config/validation"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
)
|
||||
|
||||
// ValidateKubeSchedulerConfiguration ensures validation of the KubeSchedulerConfiguration struct
|
||||
func ValidateKubeSchedulerConfiguration(cc *config.KubeSchedulerConfiguration) utilerrors.Aggregate {
|
||||
var errs []error
|
||||
errs = append(errs, componentbasevalidation.ValidateClientConnectionConfiguration(&cc.ClientConnection, field.NewPath("clientConnection")).ToAggregate())
|
||||
errs = append(errs, componentbasevalidation.ValidateLeaderElectionConfiguration(&cc.LeaderElection, field.NewPath("leaderElection")).ToAggregate())
|
||||
|
||||
// TODO: This can be removed when ResourceLock is not available
|
||||
// Only ResourceLock values with leases are allowed
|
||||
if cc.LeaderElection.LeaderElect && cc.LeaderElection.ResourceLock != "leases" {
|
||||
leaderElectionPath := field.NewPath("leaderElection")
|
||||
errs = append(errs, field.Invalid(leaderElectionPath.Child("resourceLock"), cc.LeaderElection.ResourceLock, `resourceLock value must be "leases"`))
|
||||
}
|
||||
|
||||
profilesPath := field.NewPath("profiles")
|
||||
if cc.Parallelism <= 0 {
|
||||
errs = append(errs, field.Invalid(field.NewPath("parallelism"), cc.Parallelism, "should be an integer value greater than zero"))
|
||||
}
|
||||
|
||||
if len(cc.Profiles) == 0 {
|
||||
errs = append(errs, field.Required(profilesPath, ""))
|
||||
} else {
|
||||
existingProfiles := make(map[string]int, len(cc.Profiles))
|
||||
for i := range cc.Profiles {
|
||||
profile := &cc.Profiles[i]
|
||||
path := profilesPath.Index(i)
|
||||
errs = append(errs, validateKubeSchedulerProfile(path, cc.APIVersion, profile)...)
|
||||
if idx, ok := existingProfiles[profile.SchedulerName]; ok {
|
||||
errs = append(errs, field.Duplicate(path.Child("schedulerName"), profilesPath.Index(idx).Child("schedulerName")))
|
||||
}
|
||||
existingProfiles[profile.SchedulerName] = i
|
||||
}
|
||||
errs = append(errs, validateCommonQueueSort(profilesPath, cc.Profiles)...)
|
||||
}
|
||||
|
||||
errs = append(errs, validatePercentageOfNodesToScore(field.NewPath("percentageOfNodesToScore"), cc.PercentageOfNodesToScore))
|
||||
|
||||
if cc.PodInitialBackoffSeconds <= 0 {
|
||||
errs = append(errs, field.Invalid(field.NewPath("podInitialBackoffSeconds"),
|
||||
cc.PodInitialBackoffSeconds, "must be greater than 0"))
|
||||
}
|
||||
if cc.PodMaxBackoffSeconds < cc.PodInitialBackoffSeconds {
|
||||
errs = append(errs, field.Invalid(field.NewPath("podMaxBackoffSeconds"),
|
||||
cc.PodMaxBackoffSeconds, "must be greater than or equal to PodInitialBackoffSeconds"))
|
||||
}
|
||||
|
||||
errs = append(errs, validateExtenders(field.NewPath("extenders"), cc.Extenders)...)
|
||||
return utilerrors.Flatten(utilerrors.NewAggregate(errs))
|
||||
}
|
||||
|
||||
func validatePercentageOfNodesToScore(path *field.Path, percentageOfNodesToScore *int32) error {
|
||||
if percentageOfNodesToScore != nil {
|
||||
if *percentageOfNodesToScore < 0 || *percentageOfNodesToScore > 100 {
|
||||
return field.Invalid(path, *percentageOfNodesToScore, "not in valid range [0-100]")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type invalidPlugins struct {
|
||||
schemeGroupVersion string
|
||||
plugins []string
|
||||
}
|
||||
|
||||
// invalidPluginsByVersion maintains a list of removed/deprecated plugins in each version.
|
||||
// Remember to add an entry to that list when creating a new component config
|
||||
// version (even if the list of invalid plugins is empty).
|
||||
var invalidPluginsByVersion = []invalidPlugins{
|
||||
{
|
||||
schemeGroupVersion: v1.SchemeGroupVersion.String(),
|
||||
plugins: []string{
|
||||
"AzureDiskLimits",
|
||||
"CinderLimits",
|
||||
"EBSLimits",
|
||||
"GCEPDLimits",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// isPluginInvalid checks if a given plugin was removed/deprecated in the given component
|
||||
// config version or earlier.
|
||||
func isPluginInvalid(apiVersion string, name string) (bool, string) {
|
||||
for _, dp := range invalidPluginsByVersion {
|
||||
for _, plugin := range dp.plugins {
|
||||
if name == plugin {
|
||||
return true, dp.schemeGroupVersion
|
||||
}
|
||||
}
|
||||
if apiVersion == dp.schemeGroupVersion {
|
||||
break
|
||||
}
|
||||
}
|
||||
return false, ""
|
||||
}
|
||||
|
||||
func validatePluginSetForInvalidPlugins(path *field.Path, apiVersion string, ps config.PluginSet) []error {
|
||||
var errs []error
|
||||
for i, plugin := range ps.Enabled {
|
||||
if invalid, invalidVersion := isPluginInvalid(apiVersion, plugin.Name); invalid {
|
||||
errs = append(errs, field.Invalid(path.Child("enabled").Index(i), plugin.Name, fmt.Sprintf("was invalid in version %q (KubeSchedulerConfiguration is version %q)", invalidVersion, apiVersion)))
|
||||
}
|
||||
}
|
||||
return errs
|
||||
}
|
||||
|
||||
func validateKubeSchedulerProfile(path *field.Path, apiVersion string, profile *config.KubeSchedulerProfile) []error {
|
||||
var errs []error
|
||||
if len(profile.SchedulerName) == 0 {
|
||||
errs = append(errs, field.Required(path.Child("schedulerName"), ""))
|
||||
}
|
||||
errs = append(errs, validatePercentageOfNodesToScore(path.Child("percentageOfNodesToScore"), profile.PercentageOfNodesToScore))
|
||||
errs = append(errs, validatePluginConfig(path, apiVersion, profile)...)
|
||||
return errs
|
||||
}
|
||||
|
||||
func validatePluginConfig(path *field.Path, apiVersion string, profile *config.KubeSchedulerProfile) []error {
|
||||
var errs []error
|
||||
m := map[string]interface{}{
|
||||
"DefaultPreemption": ValidateDefaultPreemptionArgs,
|
||||
"InterPodAffinity": ValidateInterPodAffinityArgs,
|
||||
"NodeAffinity": ValidateNodeAffinityArgs,
|
||||
"NodeResourcesBalancedAllocation": ValidateNodeResourcesBalancedAllocationArgs,
|
||||
"NodeResourcesFitArgs": ValidateNodeResourcesFitArgs,
|
||||
"PodTopologySpread": ValidatePodTopologySpreadArgs,
|
||||
"VolumeBinding": ValidateVolumeBindingArgs,
|
||||
}
|
||||
|
||||
if profile.Plugins != nil {
|
||||
stagesToPluginSet := map[string]config.PluginSet{
|
||||
"preEnqueue": profile.Plugins.PreEnqueue,
|
||||
"queueSort": profile.Plugins.QueueSort,
|
||||
"preFilter": profile.Plugins.PreFilter,
|
||||
"filter": profile.Plugins.Filter,
|
||||
"postFilter": profile.Plugins.PostFilter,
|
||||
"preScore": profile.Plugins.PreScore,
|
||||
"score": profile.Plugins.Score,
|
||||
"reserve": profile.Plugins.Reserve,
|
||||
"permit": profile.Plugins.Permit,
|
||||
"preBind": profile.Plugins.PreBind,
|
||||
"bind": profile.Plugins.Bind,
|
||||
"postBind": profile.Plugins.PostBind,
|
||||
}
|
||||
|
||||
pluginsPath := path.Child("plugins")
|
||||
for s, p := range stagesToPluginSet {
|
||||
errs = append(errs, validatePluginSetForInvalidPlugins(
|
||||
pluginsPath.Child(s), apiVersion, p)...)
|
||||
}
|
||||
}
|
||||
|
||||
seenPluginConfig := sets.New[string]()
|
||||
|
||||
for i := range profile.PluginConfig {
|
||||
pluginConfigPath := path.Child("pluginConfig").Index(i)
|
||||
name := profile.PluginConfig[i].Name
|
||||
args := profile.PluginConfig[i].Args
|
||||
if seenPluginConfig.Has(name) {
|
||||
errs = append(errs, field.Duplicate(pluginConfigPath, name))
|
||||
} else {
|
||||
seenPluginConfig.Insert(name)
|
||||
}
|
||||
if invalid, invalidVersion := isPluginInvalid(apiVersion, name); invalid {
|
||||
errs = append(errs, field.Invalid(pluginConfigPath, name, fmt.Sprintf("was invalid in version %q (KubeSchedulerConfiguration is version %q)", invalidVersion, apiVersion)))
|
||||
} else if validateFunc, ok := m[name]; ok {
|
||||
// type mismatch, no need to validate the `args`.
|
||||
if reflect.TypeOf(args) != reflect.ValueOf(validateFunc).Type().In(1) {
|
||||
errs = append(errs, field.Invalid(pluginConfigPath.Child("args"), args, "has to match plugin args"))
|
||||
} else {
|
||||
in := []reflect.Value{reflect.ValueOf(pluginConfigPath.Child("args")), reflect.ValueOf(args)}
|
||||
res := reflect.ValueOf(validateFunc).Call(in)
|
||||
// It's possible that validation function return a Aggregate, just append here and it will be flattened at the end of CC validation.
|
||||
if res[0].Interface() != nil {
|
||||
errs = append(errs, res[0].Interface().(error))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return errs
|
||||
}
|
||||
|
||||
func validateCommonQueueSort(path *field.Path, profiles []config.KubeSchedulerProfile) []error {
|
||||
var errs []error
|
||||
var canon config.PluginSet
|
||||
var queueSortName string
|
||||
var queueSortArgs runtime.Object
|
||||
if profiles[0].Plugins != nil {
|
||||
canon = profiles[0].Plugins.QueueSort
|
||||
if len(profiles[0].Plugins.QueueSort.Enabled) != 0 {
|
||||
queueSortName = profiles[0].Plugins.QueueSort.Enabled[0].Name
|
||||
}
|
||||
length := len(profiles[0].Plugins.QueueSort.Enabled)
|
||||
if length > 1 {
|
||||
errs = append(errs, field.Invalid(path.Index(0).Child("plugins", "queueSort", "Enabled"), length, "only one queue sort plugin can be enabled"))
|
||||
}
|
||||
}
|
||||
for _, cfg := range profiles[0].PluginConfig {
|
||||
if len(queueSortName) > 0 && cfg.Name == queueSortName {
|
||||
queueSortArgs = cfg.Args
|
||||
}
|
||||
}
|
||||
for i := 1; i < len(profiles); i++ {
|
||||
var curr config.PluginSet
|
||||
if profiles[i].Plugins != nil {
|
||||
curr = profiles[i].Plugins.QueueSort
|
||||
}
|
||||
if !apiequality.Semantic.DeepEqual(canon, curr) {
|
||||
errs = append(errs, field.Invalid(path.Index(i).Child("plugins", "queueSort"), curr, "queueSort must be the same for all profiles"))
|
||||
}
|
||||
for _, cfg := range profiles[i].PluginConfig {
|
||||
if cfg.Name == queueSortName && !apiequality.Semantic.DeepEqual(queueSortArgs, cfg.Args) {
|
||||
errs = append(errs, field.Invalid(path.Index(i).Child("pluginConfig", "args"), cfg.Args, "queueSort must be the same for all profiles"))
|
||||
}
|
||||
}
|
||||
}
|
||||
return errs
|
||||
}
|
||||
|
||||
// validateExtenders validates the configured extenders for the Scheduler
|
||||
func validateExtenders(fldPath *field.Path, extenders []config.Extender) []error {
|
||||
var errs []error
|
||||
binders := 0
|
||||
extenderManagedResources := sets.New[string]()
|
||||
for i, extender := range extenders {
|
||||
path := fldPath.Index(i)
|
||||
if len(extender.PrioritizeVerb) > 0 && extender.Weight <= 0 {
|
||||
errs = append(errs, field.Invalid(path.Child("weight"),
|
||||
extender.Weight, "must have a positive weight applied to it"))
|
||||
}
|
||||
if extender.BindVerb != "" {
|
||||
binders++
|
||||
}
|
||||
for j, resource := range extender.ManagedResources {
|
||||
managedResourcesPath := path.Child("managedResources").Index(j)
|
||||
validationErrors := validateExtendedResourceName(managedResourcesPath.Child("name"), v1.ResourceName(resource.Name))
|
||||
errs = append(errs, validationErrors...)
|
||||
if extenderManagedResources.Has(resource.Name) {
|
||||
errs = append(errs, field.Invalid(managedResourcesPath.Child("name"),
|
||||
resource.Name, "duplicate extender managed resource name"))
|
||||
}
|
||||
extenderManagedResources.Insert(resource.Name)
|
||||
}
|
||||
}
|
||||
if binders > 1 {
|
||||
errs = append(errs, field.Invalid(fldPath, fmt.Sprintf("found %d extenders implementing bind", binders), "only one extender can implement bind"))
|
||||
}
|
||||
return errs
|
||||
}
|
||||
|
||||
// validateExtendedResourceName checks whether the specified name is a valid
|
||||
// extended resource name.
|
||||
func validateExtendedResourceName(path *field.Path, name v1.ResourceName) []error {
|
||||
var validationErrors []error
|
||||
for _, msg := range validation.IsQualifiedName(string(name)) {
|
||||
validationErrors = append(validationErrors, field.Invalid(path, name, msg))
|
||||
}
|
||||
if len(validationErrors) != 0 {
|
||||
return validationErrors
|
||||
}
|
||||
if !v1helper.IsExtendedResourceName(name) {
|
||||
validationErrors = append(validationErrors, field.Invalid(path, string(name), "is an invalid extended resource name"))
|
||||
}
|
||||
return validationErrors
|
||||
}
|
329
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/validation/validation_pluginargs.go
generated
vendored
Normal file
329
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/validation/validation_pluginargs.go
generated
vendored
Normal file
@ -0,0 +1,329 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package validation
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1validation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
|
||||
"k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/validation/field"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
)
|
||||
|
||||
// supportedScoringStrategyTypes has to be a set of strings for use with field.Unsupported
|
||||
var supportedScoringStrategyTypes = sets.New(
|
||||
string(config.LeastAllocated),
|
||||
string(config.MostAllocated),
|
||||
string(config.RequestedToCapacityRatio),
|
||||
)
|
||||
|
||||
// ValidateDefaultPreemptionArgs validates that DefaultPreemptionArgs are correct.
|
||||
func ValidateDefaultPreemptionArgs(path *field.Path, args *config.DefaultPreemptionArgs) error {
|
||||
var allErrs field.ErrorList
|
||||
percentagePath := path.Child("minCandidateNodesPercentage")
|
||||
absolutePath := path.Child("minCandidateNodesAbsolute")
|
||||
if err := validateMinCandidateNodesPercentage(args.MinCandidateNodesPercentage, percentagePath); err != nil {
|
||||
allErrs = append(allErrs, err)
|
||||
}
|
||||
if err := validateMinCandidateNodesAbsolute(args.MinCandidateNodesAbsolute, absolutePath); err != nil {
|
||||
allErrs = append(allErrs, err)
|
||||
}
|
||||
if args.MinCandidateNodesPercentage == 0 && args.MinCandidateNodesAbsolute == 0 {
|
||||
allErrs = append(allErrs,
|
||||
field.Invalid(percentagePath, args.MinCandidateNodesPercentage, "cannot be zero at the same time as minCandidateNodesAbsolute"),
|
||||
field.Invalid(absolutePath, args.MinCandidateNodesAbsolute, "cannot be zero at the same time as minCandidateNodesPercentage"))
|
||||
}
|
||||
return allErrs.ToAggregate()
|
||||
}
|
||||
|
||||
// validateMinCandidateNodesPercentage validates that
|
||||
// minCandidateNodesPercentage is within the allowed range.
|
||||
func validateMinCandidateNodesPercentage(minCandidateNodesPercentage int32, p *field.Path) *field.Error {
|
||||
if minCandidateNodesPercentage < 0 || minCandidateNodesPercentage > 100 {
|
||||
return field.Invalid(p, minCandidateNodesPercentage, "not in valid range [0, 100]")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// validateMinCandidateNodesAbsolute validates that minCandidateNodesAbsolute
|
||||
// is within the allowed range.
|
||||
func validateMinCandidateNodesAbsolute(minCandidateNodesAbsolute int32, p *field.Path) *field.Error {
|
||||
if minCandidateNodesAbsolute < 0 {
|
||||
return field.Invalid(p, minCandidateNodesAbsolute, "not in valid range [0, inf)")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ValidateInterPodAffinityArgs validates that InterPodAffinityArgs are correct.
|
||||
func ValidateInterPodAffinityArgs(path *field.Path, args *config.InterPodAffinityArgs) error {
|
||||
return validateHardPodAffinityWeight(path.Child("hardPodAffinityWeight"), args.HardPodAffinityWeight)
|
||||
}
|
||||
|
||||
// validateHardPodAffinityWeight validates that weight is within allowed range.
|
||||
func validateHardPodAffinityWeight(path *field.Path, w int32) error {
|
||||
const (
|
||||
minHardPodAffinityWeight = 0
|
||||
maxHardPodAffinityWeight = 100
|
||||
)
|
||||
|
||||
if w < minHardPodAffinityWeight || w > maxHardPodAffinityWeight {
|
||||
msg := fmt.Sprintf("not in valid range [%d, %d]", minHardPodAffinityWeight, maxHardPodAffinityWeight)
|
||||
return field.Invalid(path, w, msg)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ValidatePodTopologySpreadArgs validates that PodTopologySpreadArgs are correct.
|
||||
// It replicates the validation from pkg/apis/core/validation.validateTopologySpreadConstraints
|
||||
// with an additional check for .labelSelector to be nil.
|
||||
func ValidatePodTopologySpreadArgs(path *field.Path, args *config.PodTopologySpreadArgs) error {
|
||||
var allErrs field.ErrorList
|
||||
if err := validateDefaultingType(path.Child("defaultingType"), args.DefaultingType, args.DefaultConstraints); err != nil {
|
||||
allErrs = append(allErrs, err)
|
||||
}
|
||||
|
||||
defaultConstraintsPath := path.Child("defaultConstraints")
|
||||
for i, c := range args.DefaultConstraints {
|
||||
p := defaultConstraintsPath.Index(i)
|
||||
if c.MaxSkew <= 0 {
|
||||
f := p.Child("maxSkew")
|
||||
allErrs = append(allErrs, field.Invalid(f, c.MaxSkew, "not in valid range (0, inf)"))
|
||||
}
|
||||
allErrs = append(allErrs, validateTopologyKey(p.Child("topologyKey"), c.TopologyKey)...)
|
||||
if err := validateWhenUnsatisfiable(p.Child("whenUnsatisfiable"), c.WhenUnsatisfiable); err != nil {
|
||||
allErrs = append(allErrs, err)
|
||||
}
|
||||
if c.LabelSelector != nil {
|
||||
f := field.Forbidden(p.Child("labelSelector"), "constraint must not define a selector, as they deduced for each pod")
|
||||
allErrs = append(allErrs, f)
|
||||
}
|
||||
if err := validateConstraintNotRepeat(defaultConstraintsPath, args.DefaultConstraints, i); err != nil {
|
||||
allErrs = append(allErrs, err)
|
||||
}
|
||||
}
|
||||
if len(allErrs) == 0 {
|
||||
return nil
|
||||
}
|
||||
return allErrs.ToAggregate()
|
||||
}
|
||||
|
||||
func validateDefaultingType(p *field.Path, v config.PodTopologySpreadConstraintsDefaulting, constraints []v1.TopologySpreadConstraint) *field.Error {
|
||||
if v != config.SystemDefaulting && v != config.ListDefaulting {
|
||||
return field.NotSupported(p, v, []string{string(config.SystemDefaulting), string(config.ListDefaulting)})
|
||||
}
|
||||
if v == config.SystemDefaulting && len(constraints) > 0 {
|
||||
return field.Invalid(p, v, "when .defaultConstraints are not empty")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateTopologyKey(p *field.Path, v string) field.ErrorList {
|
||||
var allErrs field.ErrorList
|
||||
if len(v) == 0 {
|
||||
allErrs = append(allErrs, field.Required(p, "can not be empty"))
|
||||
} else {
|
||||
allErrs = append(allErrs, metav1validation.ValidateLabelName(v, p)...)
|
||||
}
|
||||
return allErrs
|
||||
}
|
||||
|
||||
func validateWhenUnsatisfiable(p *field.Path, v v1.UnsatisfiableConstraintAction) *field.Error {
|
||||
supportedScheduleActions := sets.New(string(v1.DoNotSchedule), string(v1.ScheduleAnyway))
|
||||
|
||||
if len(v) == 0 {
|
||||
return field.Required(p, "can not be empty")
|
||||
}
|
||||
if !supportedScheduleActions.Has(string(v)) {
|
||||
return field.NotSupported(p, v, sets.List(supportedScheduleActions))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateConstraintNotRepeat(path *field.Path, constraints []v1.TopologySpreadConstraint, idx int) *field.Error {
|
||||
c := &constraints[idx]
|
||||
for i := range constraints[:idx] {
|
||||
other := &constraints[i]
|
||||
if c.TopologyKey == other.TopologyKey && c.WhenUnsatisfiable == other.WhenUnsatisfiable {
|
||||
return field.Duplicate(path.Index(idx), fmt.Sprintf("{%v, %v}", c.TopologyKey, c.WhenUnsatisfiable))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateFunctionShape(shape []config.UtilizationShapePoint, path *field.Path) field.ErrorList {
|
||||
const (
|
||||
minUtilization = 0
|
||||
maxUtilization = 100
|
||||
minScore = 0
|
||||
maxScore = int32(config.MaxCustomPriorityScore)
|
||||
)
|
||||
|
||||
var allErrs field.ErrorList
|
||||
|
||||
if len(shape) == 0 {
|
||||
allErrs = append(allErrs, field.Required(path, "at least one point must be specified"))
|
||||
return allErrs
|
||||
}
|
||||
|
||||
for i := 1; i < len(shape); i++ {
|
||||
if shape[i-1].Utilization >= shape[i].Utilization {
|
||||
allErrs = append(allErrs, field.Invalid(path.Index(i).Child("utilization"), shape[i].Utilization, "utilization values must be sorted in increasing order"))
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for i, point := range shape {
|
||||
if point.Utilization < minUtilization || point.Utilization > maxUtilization {
|
||||
msg := fmt.Sprintf("not in valid range [%d, %d]", minUtilization, maxUtilization)
|
||||
allErrs = append(allErrs, field.Invalid(path.Index(i).Child("utilization"), point.Utilization, msg))
|
||||
}
|
||||
|
||||
if point.Score < minScore || point.Score > maxScore {
|
||||
msg := fmt.Sprintf("not in valid range [%d, %d]", minScore, maxScore)
|
||||
allErrs = append(allErrs, field.Invalid(path.Index(i).Child("score"), point.Score, msg))
|
||||
}
|
||||
}
|
||||
|
||||
return allErrs
|
||||
}
|
||||
|
||||
func validateResources(resources []config.ResourceSpec, p *field.Path) field.ErrorList {
|
||||
var allErrs field.ErrorList
|
||||
for i, resource := range resources {
|
||||
if resource.Weight <= 0 || resource.Weight > 100 {
|
||||
msg := fmt.Sprintf("resource weight of %v not in valid range (0, 100]", resource.Name)
|
||||
allErrs = append(allErrs, field.Invalid(p.Index(i).Child("weight"), resource.Weight, msg))
|
||||
}
|
||||
}
|
||||
return allErrs
|
||||
}
|
||||
|
||||
// ValidateNodeResourcesBalancedAllocationArgs validates that NodeResourcesBalancedAllocationArgs are set correctly.
|
||||
func ValidateNodeResourcesBalancedAllocationArgs(path *field.Path, args *config.NodeResourcesBalancedAllocationArgs) error {
|
||||
var allErrs field.ErrorList
|
||||
seenResources := sets.New[string]()
|
||||
for i, resource := range args.Resources {
|
||||
if seenResources.Has(resource.Name) {
|
||||
allErrs = append(allErrs, field.Duplicate(path.Child("resources").Index(i).Child("name"), resource.Name))
|
||||
} else {
|
||||
seenResources.Insert(resource.Name)
|
||||
}
|
||||
if resource.Weight != 1 {
|
||||
allErrs = append(allErrs, field.Invalid(path.Child("resources").Index(i).Child("weight"), resource.Weight, "must be 1"))
|
||||
}
|
||||
}
|
||||
return allErrs.ToAggregate()
|
||||
}
|
||||
|
||||
// ValidateNodeAffinityArgs validates that NodeAffinityArgs are correct.
|
||||
func ValidateNodeAffinityArgs(path *field.Path, args *config.NodeAffinityArgs) error {
|
||||
if args.AddedAffinity == nil {
|
||||
return nil
|
||||
}
|
||||
affinity := args.AddedAffinity
|
||||
var errs []error
|
||||
if ns := affinity.RequiredDuringSchedulingIgnoredDuringExecution; ns != nil {
|
||||
_, err := nodeaffinity.NewNodeSelector(ns, field.WithPath(path.Child("addedAffinity", "requiredDuringSchedulingIgnoredDuringExecution")))
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// TODO: Add validation for requiredDuringSchedulingRequiredDuringExecution when it gets added to the API.
|
||||
if terms := affinity.PreferredDuringSchedulingIgnoredDuringExecution; len(terms) != 0 {
|
||||
_, err := nodeaffinity.NewPreferredSchedulingTerms(terms, field.WithPath(path.Child("addedAffinity", "preferredDuringSchedulingIgnoredDuringExecution")))
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
return errors.Flatten(errors.NewAggregate(errs))
|
||||
}
|
||||
|
||||
// VolumeBindingArgsValidationOptions contains the different settings for validation.
|
||||
type VolumeBindingArgsValidationOptions struct {
|
||||
AllowVolumeCapacityPriority bool
|
||||
}
|
||||
|
||||
// ValidateVolumeBindingArgs validates that VolumeBindingArgs are set correctly.
|
||||
func ValidateVolumeBindingArgs(path *field.Path, args *config.VolumeBindingArgs) error {
|
||||
return ValidateVolumeBindingArgsWithOptions(path, args, VolumeBindingArgsValidationOptions{
|
||||
AllowVolumeCapacityPriority: utilfeature.DefaultFeatureGate.Enabled(features.VolumeCapacityPriority),
|
||||
})
|
||||
}
|
||||
|
||||
// ValidateVolumeBindingArgsWithOptions validates that VolumeBindingArgs and VolumeBindingArgsValidationOptions with scheduler features.
|
||||
func ValidateVolumeBindingArgsWithOptions(path *field.Path, args *config.VolumeBindingArgs, opts VolumeBindingArgsValidationOptions) error {
|
||||
var allErrs field.ErrorList
|
||||
|
||||
if args.BindTimeoutSeconds < 0 {
|
||||
allErrs = append(allErrs, field.Invalid(path.Child("bindTimeoutSeconds"), args.BindTimeoutSeconds, "invalid BindTimeoutSeconds, should not be a negative value"))
|
||||
}
|
||||
|
||||
if opts.AllowVolumeCapacityPriority {
|
||||
allErrs = append(allErrs, validateFunctionShape(args.Shape, path.Child("shape"))...)
|
||||
} else if args.Shape != nil {
|
||||
// When the feature is off, return an error if the config is not nil.
|
||||
// This prevents unexpected configuration from taking effect when the
|
||||
// feature turns on in the future.
|
||||
allErrs = append(allErrs, field.Invalid(path.Child("shape"), args.Shape, "unexpected field `shape`, remove it or turn on the feature gate VolumeCapacityPriority"))
|
||||
}
|
||||
return allErrs.ToAggregate()
|
||||
}
|
||||
|
||||
func ValidateNodeResourcesFitArgs(path *field.Path, args *config.NodeResourcesFitArgs) error {
|
||||
var allErrs field.ErrorList
|
||||
resPath := path.Child("ignoredResources")
|
||||
for i, res := range args.IgnoredResources {
|
||||
path := resPath.Index(i)
|
||||
if errs := metav1validation.ValidateLabelName(res, path); len(errs) != 0 {
|
||||
allErrs = append(allErrs, errs...)
|
||||
}
|
||||
}
|
||||
|
||||
groupPath := path.Child("ignoredResourceGroups")
|
||||
for i, group := range args.IgnoredResourceGroups {
|
||||
path := groupPath.Index(i)
|
||||
if strings.Contains(group, "/") {
|
||||
allErrs = append(allErrs, field.Invalid(path, group, "resource group name can't contain '/'"))
|
||||
}
|
||||
if errs := metav1validation.ValidateLabelName(group, path); len(errs) != 0 {
|
||||
allErrs = append(allErrs, errs...)
|
||||
}
|
||||
}
|
||||
|
||||
strategyPath := path.Child("scoringStrategy")
|
||||
if args.ScoringStrategy != nil {
|
||||
if !supportedScoringStrategyTypes.Has(string(args.ScoringStrategy.Type)) {
|
||||
allErrs = append(allErrs, field.NotSupported(strategyPath.Child("type"), args.ScoringStrategy.Type, sets.List(supportedScoringStrategyTypes)))
|
||||
}
|
||||
allErrs = append(allErrs, validateResources(args.ScoringStrategy.Resources, strategyPath.Child("resources"))...)
|
||||
if args.ScoringStrategy.RequestedToCapacityRatio != nil {
|
||||
allErrs = append(allErrs, validateFunctionShape(args.ScoringStrategy.RequestedToCapacityRatio.Shape, strategyPath.Child("shape"))...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(allErrs) == 0 {
|
||||
return nil
|
||||
}
|
||||
return allErrs.ToAggregate()
|
||||
}
|
562
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/zz_generated.deepcopy.go
generated
vendored
Normal file
562
vendor/k8s.io/kubernetes/pkg/scheduler/apis/config/zz_generated.deepcopy.go
generated
vendored
Normal file
@ -0,0 +1,562 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Code generated by deepcopy-gen. DO NOT EDIT.
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
runtime "k8s.io/apimachinery/pkg/runtime"
|
||||
)
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *DefaultPreemptionArgs) DeepCopyInto(out *DefaultPreemptionArgs) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DefaultPreemptionArgs.
|
||||
func (in *DefaultPreemptionArgs) DeepCopy() *DefaultPreemptionArgs {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(DefaultPreemptionArgs)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *DefaultPreemptionArgs) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *Extender) DeepCopyInto(out *Extender) {
|
||||
*out = *in
|
||||
if in.TLSConfig != nil {
|
||||
in, out := &in.TLSConfig, &out.TLSConfig
|
||||
*out = new(ExtenderTLSConfig)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
out.HTTPTimeout = in.HTTPTimeout
|
||||
if in.ManagedResources != nil {
|
||||
in, out := &in.ManagedResources, &out.ManagedResources
|
||||
*out = make([]ExtenderManagedResource, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extender.
|
||||
func (in *Extender) DeepCopy() *Extender {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(Extender)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ExtenderManagedResource) DeepCopyInto(out *ExtenderManagedResource) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderManagedResource.
|
||||
func (in *ExtenderManagedResource) DeepCopy() *ExtenderManagedResource {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ExtenderManagedResource)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ExtenderTLSConfig) DeepCopyInto(out *ExtenderTLSConfig) {
|
||||
*out = *in
|
||||
if in.CertData != nil {
|
||||
in, out := &in.CertData, &out.CertData
|
||||
*out = make([]byte, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.KeyData != nil {
|
||||
in, out := &in.KeyData, &out.KeyData
|
||||
*out = make([]byte, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.CAData != nil {
|
||||
in, out := &in.CAData, &out.CAData
|
||||
*out = make([]byte, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderTLSConfig.
|
||||
func (in *ExtenderTLSConfig) DeepCopy() *ExtenderTLSConfig {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ExtenderTLSConfig)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *InterPodAffinityArgs) DeepCopyInto(out *InterPodAffinityArgs) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InterPodAffinityArgs.
|
||||
func (in *InterPodAffinityArgs) DeepCopy() *InterPodAffinityArgs {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(InterPodAffinityArgs)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *InterPodAffinityArgs) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *KubeSchedulerConfiguration) DeepCopyInto(out *KubeSchedulerConfiguration) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
out.LeaderElection = in.LeaderElection
|
||||
out.ClientConnection = in.ClientConnection
|
||||
out.DebuggingConfiguration = in.DebuggingConfiguration
|
||||
if in.PercentageOfNodesToScore != nil {
|
||||
in, out := &in.PercentageOfNodesToScore, &out.PercentageOfNodesToScore
|
||||
*out = new(int32)
|
||||
**out = **in
|
||||
}
|
||||
if in.Profiles != nil {
|
||||
in, out := &in.Profiles, &out.Profiles
|
||||
*out = make([]KubeSchedulerProfile, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
if in.Extenders != nil {
|
||||
in, out := &in.Extenders, &out.Extenders
|
||||
*out = make([]Extender, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerConfiguration.
|
||||
func (in *KubeSchedulerConfiguration) DeepCopy() *KubeSchedulerConfiguration {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(KubeSchedulerConfiguration)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *KubeSchedulerConfiguration) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *KubeSchedulerProfile) DeepCopyInto(out *KubeSchedulerProfile) {
|
||||
*out = *in
|
||||
if in.PercentageOfNodesToScore != nil {
|
||||
in, out := &in.PercentageOfNodesToScore, &out.PercentageOfNodesToScore
|
||||
*out = new(int32)
|
||||
**out = **in
|
||||
}
|
||||
if in.Plugins != nil {
|
||||
in, out := &in.Plugins, &out.Plugins
|
||||
*out = new(Plugins)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.PluginConfig != nil {
|
||||
in, out := &in.PluginConfig, &out.PluginConfig
|
||||
*out = make([]PluginConfig, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerProfile.
|
||||
func (in *KubeSchedulerProfile) DeepCopy() *KubeSchedulerProfile {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(KubeSchedulerProfile)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *NodeAffinityArgs) DeepCopyInto(out *NodeAffinityArgs) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
if in.AddedAffinity != nil {
|
||||
in, out := &in.AddedAffinity, &out.AddedAffinity
|
||||
*out = new(v1.NodeAffinity)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeAffinityArgs.
|
||||
func (in *NodeAffinityArgs) DeepCopy() *NodeAffinityArgs {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(NodeAffinityArgs)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *NodeAffinityArgs) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *NodeResourcesBalancedAllocationArgs) DeepCopyInto(out *NodeResourcesBalancedAllocationArgs) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
if in.Resources != nil {
|
||||
in, out := &in.Resources, &out.Resources
|
||||
*out = make([]ResourceSpec, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeResourcesBalancedAllocationArgs.
|
||||
func (in *NodeResourcesBalancedAllocationArgs) DeepCopy() *NodeResourcesBalancedAllocationArgs {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(NodeResourcesBalancedAllocationArgs)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *NodeResourcesBalancedAllocationArgs) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *NodeResourcesFitArgs) DeepCopyInto(out *NodeResourcesFitArgs) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
if in.IgnoredResources != nil {
|
||||
in, out := &in.IgnoredResources, &out.IgnoredResources
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.IgnoredResourceGroups != nil {
|
||||
in, out := &in.IgnoredResourceGroups, &out.IgnoredResourceGroups
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.ScoringStrategy != nil {
|
||||
in, out := &in.ScoringStrategy, &out.ScoringStrategy
|
||||
*out = new(ScoringStrategy)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeResourcesFitArgs.
|
||||
func (in *NodeResourcesFitArgs) DeepCopy() *NodeResourcesFitArgs {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(NodeResourcesFitArgs)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *NodeResourcesFitArgs) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *Plugin) DeepCopyInto(out *Plugin) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Plugin.
|
||||
func (in *Plugin) DeepCopy() *Plugin {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(Plugin)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *PluginConfig) DeepCopyInto(out *PluginConfig) {
|
||||
*out = *in
|
||||
if in.Args != nil {
|
||||
out.Args = in.Args.DeepCopyObject()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PluginConfig.
|
||||
func (in *PluginConfig) DeepCopy() *PluginConfig {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(PluginConfig)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *PluginSet) DeepCopyInto(out *PluginSet) {
|
||||
*out = *in
|
||||
if in.Enabled != nil {
|
||||
in, out := &in.Enabled, &out.Enabled
|
||||
*out = make([]Plugin, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.Disabled != nil {
|
||||
in, out := &in.Disabled, &out.Disabled
|
||||
*out = make([]Plugin, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PluginSet.
|
||||
func (in *PluginSet) DeepCopy() *PluginSet {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(PluginSet)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *Plugins) DeepCopyInto(out *Plugins) {
|
||||
*out = *in
|
||||
in.PreEnqueue.DeepCopyInto(&out.PreEnqueue)
|
||||
in.QueueSort.DeepCopyInto(&out.QueueSort)
|
||||
in.PreFilter.DeepCopyInto(&out.PreFilter)
|
||||
in.Filter.DeepCopyInto(&out.Filter)
|
||||
in.PostFilter.DeepCopyInto(&out.PostFilter)
|
||||
in.PreScore.DeepCopyInto(&out.PreScore)
|
||||
in.Score.DeepCopyInto(&out.Score)
|
||||
in.Reserve.DeepCopyInto(&out.Reserve)
|
||||
in.Permit.DeepCopyInto(&out.Permit)
|
||||
in.PreBind.DeepCopyInto(&out.PreBind)
|
||||
in.Bind.DeepCopyInto(&out.Bind)
|
||||
in.PostBind.DeepCopyInto(&out.PostBind)
|
||||
in.MultiPoint.DeepCopyInto(&out.MultiPoint)
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Plugins.
|
||||
func (in *Plugins) DeepCopy() *Plugins {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(Plugins)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *PodTopologySpreadArgs) DeepCopyInto(out *PodTopologySpreadArgs) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
if in.DefaultConstraints != nil {
|
||||
in, out := &in.DefaultConstraints, &out.DefaultConstraints
|
||||
*out = make([]v1.TopologySpreadConstraint, len(*in))
|
||||
for i := range *in {
|
||||
(*in)[i].DeepCopyInto(&(*out)[i])
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodTopologySpreadArgs.
|
||||
func (in *PodTopologySpreadArgs) DeepCopy() *PodTopologySpreadArgs {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(PodTopologySpreadArgs)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *PodTopologySpreadArgs) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *RequestedToCapacityRatioParam) DeepCopyInto(out *RequestedToCapacityRatioParam) {
|
||||
*out = *in
|
||||
if in.Shape != nil {
|
||||
in, out := &in.Shape, &out.Shape
|
||||
*out = make([]UtilizationShapePoint, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RequestedToCapacityRatioParam.
|
||||
func (in *RequestedToCapacityRatioParam) DeepCopy() *RequestedToCapacityRatioParam {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(RequestedToCapacityRatioParam)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ResourceSpec) DeepCopyInto(out *ResourceSpec) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceSpec.
|
||||
func (in *ResourceSpec) DeepCopy() *ResourceSpec {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ResourceSpec)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ScoringStrategy) DeepCopyInto(out *ScoringStrategy) {
|
||||
*out = *in
|
||||
if in.Resources != nil {
|
||||
in, out := &in.Resources, &out.Resources
|
||||
*out = make([]ResourceSpec, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.RequestedToCapacityRatio != nil {
|
||||
in, out := &in.RequestedToCapacityRatio, &out.RequestedToCapacityRatio
|
||||
*out = new(RequestedToCapacityRatioParam)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScoringStrategy.
|
||||
func (in *ScoringStrategy) DeepCopy() *ScoringStrategy {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ScoringStrategy)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *UtilizationShapePoint) DeepCopyInto(out *UtilizationShapePoint) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UtilizationShapePoint.
|
||||
func (in *UtilizationShapePoint) DeepCopy() *UtilizationShapePoint {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(UtilizationShapePoint)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *VolumeBindingArgs) DeepCopyInto(out *VolumeBindingArgs) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
if in.Shape != nil {
|
||||
in, out := &in.Shape, &out.Shape
|
||||
*out = make([]UtilizationShapePoint, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VolumeBindingArgs.
|
||||
func (in *VolumeBindingArgs) DeepCopy() *VolumeBindingArgs {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(VolumeBindingArgs)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
|
||||
func (in *VolumeBindingArgs) DeepCopyObject() runtime.Object {
|
||||
if c := in.DeepCopy(); c != nil {
|
||||
return c
|
||||
}
|
||||
return nil
|
||||
}
|
760
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/cache.go
generated
vendored
Normal file
760
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/cache.go
generated
vendored
Normal file
@ -0,0 +1,760 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cache
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
cleanAssumedPeriod = 1 * time.Second
|
||||
)
|
||||
|
||||
// New returns a Cache implementation.
|
||||
// It automatically starts a go routine that manages expiration of assumed pods.
|
||||
// "ttl" is how long the assumed pod will get expired.
|
||||
// "ctx" is the context that would close the background goroutine.
|
||||
func New(ctx context.Context, ttl time.Duration) Cache {
|
||||
logger := klog.FromContext(ctx)
|
||||
cache := newCache(ctx, ttl, cleanAssumedPeriod)
|
||||
cache.run(logger)
|
||||
return cache
|
||||
}
|
||||
|
||||
// nodeInfoListItem holds a NodeInfo pointer and acts as an item in a doubly
|
||||
// linked list. When a NodeInfo is updated, it goes to the head of the list.
|
||||
// The items closer to the head are the most recently updated items.
|
||||
type nodeInfoListItem struct {
|
||||
info *framework.NodeInfo
|
||||
next *nodeInfoListItem
|
||||
prev *nodeInfoListItem
|
||||
}
|
||||
|
||||
type cacheImpl struct {
|
||||
stop <-chan struct{}
|
||||
ttl time.Duration
|
||||
period time.Duration
|
||||
|
||||
// This mutex guards all fields within this cache struct.
|
||||
mu sync.RWMutex
|
||||
// a set of assumed pod keys.
|
||||
// The key could further be used to get an entry in podStates.
|
||||
assumedPods sets.Set[string]
|
||||
// a map from pod key to podState.
|
||||
podStates map[string]*podState
|
||||
nodes map[string]*nodeInfoListItem
|
||||
// headNode points to the most recently updated NodeInfo in "nodes". It is the
|
||||
// head of the linked list.
|
||||
headNode *nodeInfoListItem
|
||||
nodeTree *nodeTree
|
||||
// A map from image name to its ImageStateSummary.
|
||||
imageStates map[string]*framework.ImageStateSummary
|
||||
}
|
||||
|
||||
type podState struct {
|
||||
pod *v1.Pod
|
||||
// Used by assumedPod to determinate expiration.
|
||||
// If deadline is nil, assumedPod will never expire.
|
||||
deadline *time.Time
|
||||
// Used to block cache from expiring assumedPod if binding still runs
|
||||
bindingFinished bool
|
||||
}
|
||||
|
||||
func newCache(ctx context.Context, ttl, period time.Duration) *cacheImpl {
|
||||
logger := klog.FromContext(ctx)
|
||||
return &cacheImpl{
|
||||
ttl: ttl,
|
||||
period: period,
|
||||
stop: ctx.Done(),
|
||||
|
||||
nodes: make(map[string]*nodeInfoListItem),
|
||||
nodeTree: newNodeTree(logger, nil),
|
||||
assumedPods: sets.New[string](),
|
||||
podStates: make(map[string]*podState),
|
||||
imageStates: make(map[string]*framework.ImageStateSummary),
|
||||
}
|
||||
}
|
||||
|
||||
// newNodeInfoListItem initializes a new nodeInfoListItem.
|
||||
func newNodeInfoListItem(ni *framework.NodeInfo) *nodeInfoListItem {
|
||||
return &nodeInfoListItem{
|
||||
info: ni,
|
||||
}
|
||||
}
|
||||
|
||||
// moveNodeInfoToHead moves a NodeInfo to the head of "cache.nodes" doubly
|
||||
// linked list. The head is the most recently updated NodeInfo.
|
||||
// We assume cache lock is already acquired.
|
||||
func (cache *cacheImpl) moveNodeInfoToHead(logger klog.Logger, name string) {
|
||||
ni, ok := cache.nodes[name]
|
||||
if !ok {
|
||||
logger.Error(nil, "No node info with given name found in the cache", "node", klog.KRef("", name))
|
||||
return
|
||||
}
|
||||
// if the node info list item is already at the head, we are done.
|
||||
if ni == cache.headNode {
|
||||
return
|
||||
}
|
||||
|
||||
if ni.prev != nil {
|
||||
ni.prev.next = ni.next
|
||||
}
|
||||
if ni.next != nil {
|
||||
ni.next.prev = ni.prev
|
||||
}
|
||||
if cache.headNode != nil {
|
||||
cache.headNode.prev = ni
|
||||
}
|
||||
ni.next = cache.headNode
|
||||
ni.prev = nil
|
||||
cache.headNode = ni
|
||||
}
|
||||
|
||||
// removeNodeInfoFromList removes a NodeInfo from the "cache.nodes" doubly
|
||||
// linked list.
|
||||
// We assume cache lock is already acquired.
|
||||
func (cache *cacheImpl) removeNodeInfoFromList(logger klog.Logger, name string) {
|
||||
ni, ok := cache.nodes[name]
|
||||
if !ok {
|
||||
logger.Error(nil, "No node info with given name found in the cache", "node", klog.KRef("", name))
|
||||
return
|
||||
}
|
||||
|
||||
if ni.prev != nil {
|
||||
ni.prev.next = ni.next
|
||||
}
|
||||
if ni.next != nil {
|
||||
ni.next.prev = ni.prev
|
||||
}
|
||||
// if the removed item was at the head, we must update the head.
|
||||
if ni == cache.headNode {
|
||||
cache.headNode = ni.next
|
||||
}
|
||||
delete(cache.nodes, name)
|
||||
}
|
||||
|
||||
// Dump produces a dump of the current scheduler cache. This is used for
|
||||
// debugging purposes only and shouldn't be confused with UpdateSnapshot
|
||||
// function.
|
||||
// This method is expensive, and should be only used in non-critical path.
|
||||
func (cache *cacheImpl) Dump() *Dump {
|
||||
cache.mu.RLock()
|
||||
defer cache.mu.RUnlock()
|
||||
|
||||
nodes := make(map[string]*framework.NodeInfo, len(cache.nodes))
|
||||
for k, v := range cache.nodes {
|
||||
nodes[k] = v.info.Snapshot()
|
||||
}
|
||||
|
||||
return &Dump{
|
||||
Nodes: nodes,
|
||||
AssumedPods: cache.assumedPods.Union(nil),
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateSnapshot takes a snapshot of cached NodeInfo map. This is called at
|
||||
// beginning of every scheduling cycle.
|
||||
// The snapshot only includes Nodes that are not deleted at the time this function is called.
|
||||
// nodeInfo.Node() is guaranteed to be not nil for all the nodes in the snapshot.
|
||||
// This function tracks generation number of NodeInfo and updates only the
|
||||
// entries of an existing snapshot that have changed after the snapshot was taken.
|
||||
func (cache *cacheImpl) UpdateSnapshot(logger klog.Logger, nodeSnapshot *Snapshot) error {
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
|
||||
// Get the last generation of the snapshot.
|
||||
snapshotGeneration := nodeSnapshot.generation
|
||||
|
||||
// NodeInfoList and HavePodsWithAffinityNodeInfoList must be re-created if a node was added
|
||||
// or removed from the cache.
|
||||
updateAllLists := false
|
||||
// HavePodsWithAffinityNodeInfoList must be re-created if a node changed its
|
||||
// status from having pods with affinity to NOT having pods with affinity or the other
|
||||
// way around.
|
||||
updateNodesHavePodsWithAffinity := false
|
||||
// HavePodsWithRequiredAntiAffinityNodeInfoList must be re-created if a node changed its
|
||||
// status from having pods with required anti-affinity to NOT having pods with required
|
||||
// anti-affinity or the other way around.
|
||||
updateNodesHavePodsWithRequiredAntiAffinity := false
|
||||
// usedPVCSet must be re-created whenever the head node generation is greater than
|
||||
// last snapshot generation.
|
||||
updateUsedPVCSet := false
|
||||
|
||||
// Start from the head of the NodeInfo doubly linked list and update snapshot
|
||||
// of NodeInfos updated after the last snapshot.
|
||||
for node := cache.headNode; node != nil; node = node.next {
|
||||
if node.info.Generation <= snapshotGeneration {
|
||||
// all the nodes are updated before the existing snapshot. We are done.
|
||||
break
|
||||
}
|
||||
if np := node.info.Node(); np != nil {
|
||||
existing, ok := nodeSnapshot.nodeInfoMap[np.Name]
|
||||
if !ok {
|
||||
updateAllLists = true
|
||||
existing = &framework.NodeInfo{}
|
||||
nodeSnapshot.nodeInfoMap[np.Name] = existing
|
||||
}
|
||||
clone := node.info.Snapshot()
|
||||
// We track nodes that have pods with affinity, here we check if this node changed its
|
||||
// status from having pods with affinity to NOT having pods with affinity or the other
|
||||
// way around.
|
||||
if (len(existing.PodsWithAffinity) > 0) != (len(clone.PodsWithAffinity) > 0) {
|
||||
updateNodesHavePodsWithAffinity = true
|
||||
}
|
||||
if (len(existing.PodsWithRequiredAntiAffinity) > 0) != (len(clone.PodsWithRequiredAntiAffinity) > 0) {
|
||||
updateNodesHavePodsWithRequiredAntiAffinity = true
|
||||
}
|
||||
if !updateUsedPVCSet {
|
||||
if len(existing.PVCRefCounts) != len(clone.PVCRefCounts) {
|
||||
updateUsedPVCSet = true
|
||||
} else {
|
||||
for pvcKey := range clone.PVCRefCounts {
|
||||
if _, found := existing.PVCRefCounts[pvcKey]; !found {
|
||||
updateUsedPVCSet = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// We need to preserve the original pointer of the NodeInfo struct since it
|
||||
// is used in the NodeInfoList, which we may not update.
|
||||
*existing = *clone
|
||||
}
|
||||
}
|
||||
// Update the snapshot generation with the latest NodeInfo generation.
|
||||
if cache.headNode != nil {
|
||||
nodeSnapshot.generation = cache.headNode.info.Generation
|
||||
}
|
||||
|
||||
// Comparing to pods in nodeTree.
|
||||
// Deleted nodes get removed from the tree, but they might remain in the nodes map
|
||||
// if they still have non-deleted Pods.
|
||||
if len(nodeSnapshot.nodeInfoMap) > cache.nodeTree.numNodes {
|
||||
cache.removeDeletedNodesFromSnapshot(nodeSnapshot)
|
||||
updateAllLists = true
|
||||
}
|
||||
|
||||
if updateAllLists || updateNodesHavePodsWithAffinity || updateNodesHavePodsWithRequiredAntiAffinity || updateUsedPVCSet {
|
||||
cache.updateNodeInfoSnapshotList(logger, nodeSnapshot, updateAllLists)
|
||||
}
|
||||
|
||||
if len(nodeSnapshot.nodeInfoList) != cache.nodeTree.numNodes {
|
||||
errMsg := fmt.Sprintf("snapshot state is not consistent, length of NodeInfoList=%v not equal to length of nodes in tree=%v "+
|
||||
", length of NodeInfoMap=%v, length of nodes in cache=%v"+
|
||||
", trying to recover",
|
||||
len(nodeSnapshot.nodeInfoList), cache.nodeTree.numNodes,
|
||||
len(nodeSnapshot.nodeInfoMap), len(cache.nodes))
|
||||
logger.Error(nil, errMsg)
|
||||
// We will try to recover by re-creating the lists for the next scheduling cycle, but still return an
|
||||
// error to surface the problem, the error will likely cause a failure to the current scheduling cycle.
|
||||
cache.updateNodeInfoSnapshotList(logger, nodeSnapshot, true)
|
||||
return errors.New(errMsg)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) updateNodeInfoSnapshotList(logger klog.Logger, snapshot *Snapshot, updateAll bool) {
|
||||
snapshot.havePodsWithAffinityNodeInfoList = make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes)
|
||||
snapshot.havePodsWithRequiredAntiAffinityNodeInfoList = make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes)
|
||||
snapshot.usedPVCSet = sets.New[string]()
|
||||
if updateAll {
|
||||
// Take a snapshot of the nodes order in the tree
|
||||
snapshot.nodeInfoList = make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes)
|
||||
nodesList, err := cache.nodeTree.list()
|
||||
if err != nil {
|
||||
logger.Error(err, "Error occurred while retrieving the list of names of the nodes from node tree")
|
||||
}
|
||||
for _, nodeName := range nodesList {
|
||||
if nodeInfo := snapshot.nodeInfoMap[nodeName]; nodeInfo != nil {
|
||||
snapshot.nodeInfoList = append(snapshot.nodeInfoList, nodeInfo)
|
||||
if len(nodeInfo.PodsWithAffinity) > 0 {
|
||||
snapshot.havePodsWithAffinityNodeInfoList = append(snapshot.havePodsWithAffinityNodeInfoList, nodeInfo)
|
||||
}
|
||||
if len(nodeInfo.PodsWithRequiredAntiAffinity) > 0 {
|
||||
snapshot.havePodsWithRequiredAntiAffinityNodeInfoList = append(snapshot.havePodsWithRequiredAntiAffinityNodeInfoList, nodeInfo)
|
||||
}
|
||||
for key := range nodeInfo.PVCRefCounts {
|
||||
snapshot.usedPVCSet.Insert(key)
|
||||
}
|
||||
} else {
|
||||
logger.Error(nil, "Node exists in nodeTree but not in NodeInfoMap, this should not happen", "node", klog.KRef("", nodeName))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for _, nodeInfo := range snapshot.nodeInfoList {
|
||||
if len(nodeInfo.PodsWithAffinity) > 0 {
|
||||
snapshot.havePodsWithAffinityNodeInfoList = append(snapshot.havePodsWithAffinityNodeInfoList, nodeInfo)
|
||||
}
|
||||
if len(nodeInfo.PodsWithRequiredAntiAffinity) > 0 {
|
||||
snapshot.havePodsWithRequiredAntiAffinityNodeInfoList = append(snapshot.havePodsWithRequiredAntiAffinityNodeInfoList, nodeInfo)
|
||||
}
|
||||
for key := range nodeInfo.PVCRefCounts {
|
||||
snapshot.usedPVCSet.Insert(key)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If certain nodes were deleted after the last snapshot was taken, we should remove them from the snapshot.
|
||||
func (cache *cacheImpl) removeDeletedNodesFromSnapshot(snapshot *Snapshot) {
|
||||
toDelete := len(snapshot.nodeInfoMap) - cache.nodeTree.numNodes
|
||||
for name := range snapshot.nodeInfoMap {
|
||||
if toDelete <= 0 {
|
||||
break
|
||||
}
|
||||
if n, ok := cache.nodes[name]; !ok || n.info.Node() == nil {
|
||||
delete(snapshot.nodeInfoMap, name)
|
||||
toDelete--
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NodeCount returns the number of nodes in the cache.
|
||||
// DO NOT use outside of tests.
|
||||
func (cache *cacheImpl) NodeCount() int {
|
||||
cache.mu.RLock()
|
||||
defer cache.mu.RUnlock()
|
||||
return len(cache.nodes)
|
||||
}
|
||||
|
||||
// PodCount returns the number of pods in the cache (including those from deleted nodes).
|
||||
// DO NOT use outside of tests.
|
||||
func (cache *cacheImpl) PodCount() (int, error) {
|
||||
cache.mu.RLock()
|
||||
defer cache.mu.RUnlock()
|
||||
// podFilter is expected to return true for most or all of the pods. We
|
||||
// can avoid expensive array growth without wasting too much memory by
|
||||
// pre-allocating capacity.
|
||||
count := 0
|
||||
for _, n := range cache.nodes {
|
||||
count += len(n.info.Pods)
|
||||
}
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) AssumePod(logger klog.Logger, pod *v1.Pod) error {
|
||||
key, err := framework.GetPodKey(pod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
if _, ok := cache.podStates[key]; ok {
|
||||
return fmt.Errorf("pod %v(%v) is in the cache, so can't be assumed", key, klog.KObj(pod))
|
||||
}
|
||||
|
||||
return cache.addPod(logger, pod, true)
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) FinishBinding(logger klog.Logger, pod *v1.Pod) error {
|
||||
return cache.finishBinding(logger, pod, time.Now())
|
||||
}
|
||||
|
||||
// finishBinding exists to make tests deterministic by injecting now as an argument
|
||||
func (cache *cacheImpl) finishBinding(logger klog.Logger, pod *v1.Pod, now time.Time) error {
|
||||
key, err := framework.GetPodKey(pod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cache.mu.RLock()
|
||||
defer cache.mu.RUnlock()
|
||||
|
||||
logger.V(5).Info("Finished binding for pod, can be expired", "podKey", key, "pod", klog.KObj(pod))
|
||||
currState, ok := cache.podStates[key]
|
||||
if ok && cache.assumedPods.Has(key) {
|
||||
if cache.ttl == time.Duration(0) {
|
||||
currState.deadline = nil
|
||||
} else {
|
||||
dl := now.Add(cache.ttl)
|
||||
currState.deadline = &dl
|
||||
}
|
||||
currState.bindingFinished = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) ForgetPod(logger klog.Logger, pod *v1.Pod) error {
|
||||
key, err := framework.GetPodKey(pod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
|
||||
currState, ok := cache.podStates[key]
|
||||
if ok && currState.pod.Spec.NodeName != pod.Spec.NodeName {
|
||||
return fmt.Errorf("pod %v(%v) was assumed on %v but assigned to %v", key, klog.KObj(pod), pod.Spec.NodeName, currState.pod.Spec.NodeName)
|
||||
}
|
||||
|
||||
// Only assumed pod can be forgotten.
|
||||
if ok && cache.assumedPods.Has(key) {
|
||||
return cache.removePod(logger, pod)
|
||||
}
|
||||
return fmt.Errorf("pod %v(%v) wasn't assumed so cannot be forgotten", key, klog.KObj(pod))
|
||||
}
|
||||
|
||||
// Assumes that lock is already acquired.
|
||||
func (cache *cacheImpl) addPod(logger klog.Logger, pod *v1.Pod, assumePod bool) error {
|
||||
key, err := framework.GetPodKey(pod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n, ok := cache.nodes[pod.Spec.NodeName]
|
||||
if !ok {
|
||||
n = newNodeInfoListItem(framework.NewNodeInfo())
|
||||
cache.nodes[pod.Spec.NodeName] = n
|
||||
}
|
||||
n.info.AddPod(pod)
|
||||
cache.moveNodeInfoToHead(logger, pod.Spec.NodeName)
|
||||
ps := &podState{
|
||||
pod: pod,
|
||||
}
|
||||
cache.podStates[key] = ps
|
||||
if assumePod {
|
||||
cache.assumedPods.Insert(key)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Assumes that lock is already acquired.
|
||||
func (cache *cacheImpl) updatePod(logger klog.Logger, oldPod, newPod *v1.Pod) error {
|
||||
if err := cache.removePod(logger, oldPod); err != nil {
|
||||
return err
|
||||
}
|
||||
return cache.addPod(logger, newPod, false)
|
||||
}
|
||||
|
||||
// Assumes that lock is already acquired.
|
||||
// Removes a pod from the cached node info. If the node information was already
|
||||
// removed and there are no more pods left in the node, cleans up the node from
|
||||
// the cache.
|
||||
func (cache *cacheImpl) removePod(logger klog.Logger, pod *v1.Pod) error {
|
||||
key, err := framework.GetPodKey(pod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
n, ok := cache.nodes[pod.Spec.NodeName]
|
||||
if !ok {
|
||||
logger.Error(nil, "Node not found when trying to remove pod", "node", klog.KRef("", pod.Spec.NodeName), "podKey", key, "pod", klog.KObj(pod))
|
||||
} else {
|
||||
if err := n.info.RemovePod(logger, pod); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(n.info.Pods) == 0 && n.info.Node() == nil {
|
||||
cache.removeNodeInfoFromList(logger, pod.Spec.NodeName)
|
||||
} else {
|
||||
cache.moveNodeInfoToHead(logger, pod.Spec.NodeName)
|
||||
}
|
||||
}
|
||||
|
||||
delete(cache.podStates, key)
|
||||
delete(cache.assumedPods, key)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) AddPod(logger klog.Logger, pod *v1.Pod) error {
|
||||
key, err := framework.GetPodKey(pod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
|
||||
currState, ok := cache.podStates[key]
|
||||
switch {
|
||||
case ok && cache.assumedPods.Has(key):
|
||||
// When assuming, we've already added the Pod to cache,
|
||||
// Just update here to make sure the Pod's status is up-to-date.
|
||||
if err = cache.updatePod(logger, currState.pod, pod); err != nil {
|
||||
logger.Error(err, "Error occurred while updating pod")
|
||||
}
|
||||
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
|
||||
// The pod was added to a different node than it was assumed to.
|
||||
logger.Info("Pod was added to a different node than it was assumed", "podKey", key, "pod", klog.KObj(pod), "assumedNode", klog.KRef("", pod.Spec.NodeName), "currentNode", klog.KRef("", currState.pod.Spec.NodeName))
|
||||
return nil
|
||||
}
|
||||
case !ok:
|
||||
// Pod was expired. We should add it back.
|
||||
if err = cache.addPod(logger, pod, false); err != nil {
|
||||
logger.Error(err, "Error occurred while adding pod")
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("pod %v(%v) was already in added state", key, klog.KObj(pod))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) UpdatePod(logger klog.Logger, oldPod, newPod *v1.Pod) error {
|
||||
key, err := framework.GetPodKey(oldPod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
|
||||
currState, ok := cache.podStates[key]
|
||||
if !ok {
|
||||
return fmt.Errorf("pod %v(%v) is not added to scheduler cache, so cannot be updated", key, klog.KObj(oldPod))
|
||||
}
|
||||
|
||||
// An assumed pod won't have Update/Remove event. It needs to have Add event
|
||||
// before Update event, in which case the state would change from Assumed to Added.
|
||||
if cache.assumedPods.Has(key) {
|
||||
return fmt.Errorf("assumed pod %v(%v) should not be updated", key, klog.KObj(oldPod))
|
||||
}
|
||||
|
||||
if currState.pod.Spec.NodeName != newPod.Spec.NodeName {
|
||||
logger.Error(nil, "Pod updated on a different node than previously added to", "podKey", key, "pod", klog.KObj(oldPod))
|
||||
logger.Error(nil, "scheduler cache is corrupted and can badly affect scheduling decisions")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
return cache.updatePod(logger, oldPod, newPod)
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) RemovePod(logger klog.Logger, pod *v1.Pod) error {
|
||||
key, err := framework.GetPodKey(pod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
|
||||
currState, ok := cache.podStates[key]
|
||||
if !ok {
|
||||
return fmt.Errorf("pod %v(%v) is not found in scheduler cache, so cannot be removed from it", key, klog.KObj(pod))
|
||||
}
|
||||
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
|
||||
logger.Error(nil, "Pod was added to a different node than it was assumed", "podKey", key, "pod", klog.KObj(pod), "assumedNode", klog.KRef("", pod.Spec.NodeName), "currentNode", klog.KRef("", currState.pod.Spec.NodeName))
|
||||
if pod.Spec.NodeName != "" {
|
||||
// An empty NodeName is possible when the scheduler misses a Delete
|
||||
// event and it gets the last known state from the informer cache.
|
||||
logger.Error(nil, "scheduler cache is corrupted and can badly affect scheduling decisions")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
}
|
||||
return cache.removePod(logger, currState.pod)
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) IsAssumedPod(pod *v1.Pod) (bool, error) {
|
||||
key, err := framework.GetPodKey(pod)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
cache.mu.RLock()
|
||||
defer cache.mu.RUnlock()
|
||||
|
||||
return cache.assumedPods.Has(key), nil
|
||||
}
|
||||
|
||||
// GetPod might return a pod for which its node has already been deleted from
|
||||
// the main cache. This is useful to properly process pod update events.
|
||||
func (cache *cacheImpl) GetPod(pod *v1.Pod) (*v1.Pod, error) {
|
||||
key, err := framework.GetPodKey(pod)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cache.mu.RLock()
|
||||
defer cache.mu.RUnlock()
|
||||
|
||||
podState, ok := cache.podStates[key]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("pod %v(%v) does not exist in scheduler cache", key, klog.KObj(pod))
|
||||
}
|
||||
|
||||
return podState.pod, nil
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) AddNode(logger klog.Logger, node *v1.Node) *framework.NodeInfo {
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
|
||||
n, ok := cache.nodes[node.Name]
|
||||
if !ok {
|
||||
n = newNodeInfoListItem(framework.NewNodeInfo())
|
||||
cache.nodes[node.Name] = n
|
||||
} else {
|
||||
cache.removeNodeImageStates(n.info.Node())
|
||||
}
|
||||
cache.moveNodeInfoToHead(logger, node.Name)
|
||||
|
||||
cache.nodeTree.addNode(logger, node)
|
||||
cache.addNodeImageStates(node, n.info)
|
||||
n.info.SetNode(node)
|
||||
return n.info.Snapshot()
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) UpdateNode(logger klog.Logger, oldNode, newNode *v1.Node) *framework.NodeInfo {
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
n, ok := cache.nodes[newNode.Name]
|
||||
if !ok {
|
||||
n = newNodeInfoListItem(framework.NewNodeInfo())
|
||||
cache.nodes[newNode.Name] = n
|
||||
cache.nodeTree.addNode(logger, newNode)
|
||||
} else {
|
||||
cache.removeNodeImageStates(n.info.Node())
|
||||
}
|
||||
cache.moveNodeInfoToHead(logger, newNode.Name)
|
||||
|
||||
cache.nodeTree.updateNode(logger, oldNode, newNode)
|
||||
cache.addNodeImageStates(newNode, n.info)
|
||||
n.info.SetNode(newNode)
|
||||
return n.info.Snapshot()
|
||||
}
|
||||
|
||||
// RemoveNode removes a node from the cache's tree.
|
||||
// The node might still have pods because their deletion events didn't arrive
|
||||
// yet. Those pods are considered removed from the cache, being the node tree
|
||||
// the source of truth.
|
||||
// However, we keep a ghost node with the list of pods until all pod deletion
|
||||
// events have arrived. A ghost node is skipped from snapshots.
|
||||
func (cache *cacheImpl) RemoveNode(logger klog.Logger, node *v1.Node) error {
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
|
||||
n, ok := cache.nodes[node.Name]
|
||||
if !ok {
|
||||
return fmt.Errorf("node %v is not found", node.Name)
|
||||
}
|
||||
n.info.RemoveNode()
|
||||
// We remove NodeInfo for this node only if there aren't any pods on this node.
|
||||
// We can't do it unconditionally, because notifications about pods are delivered
|
||||
// in a different watch, and thus can potentially be observed later, even though
|
||||
// they happened before node removal.
|
||||
if len(n.info.Pods) == 0 {
|
||||
cache.removeNodeInfoFromList(logger, node.Name)
|
||||
} else {
|
||||
cache.moveNodeInfoToHead(logger, node.Name)
|
||||
}
|
||||
if err := cache.nodeTree.removeNode(logger, node); err != nil {
|
||||
return err
|
||||
}
|
||||
cache.removeNodeImageStates(node)
|
||||
return nil
|
||||
}
|
||||
|
||||
// addNodeImageStates adds states of the images on given node to the given nodeInfo and update the imageStates in
|
||||
// scheduler cache. This function assumes the lock to scheduler cache has been acquired.
|
||||
func (cache *cacheImpl) addNodeImageStates(node *v1.Node, nodeInfo *framework.NodeInfo) {
|
||||
newSum := make(map[string]*framework.ImageStateSummary)
|
||||
|
||||
for _, image := range node.Status.Images {
|
||||
for _, name := range image.Names {
|
||||
// update the entry in imageStates
|
||||
state, ok := cache.imageStates[name]
|
||||
if !ok {
|
||||
state = &framework.ImageStateSummary{
|
||||
Size: image.SizeBytes,
|
||||
Nodes: sets.New(node.Name),
|
||||
}
|
||||
cache.imageStates[name] = state
|
||||
} else {
|
||||
state.Nodes.Insert(node.Name)
|
||||
}
|
||||
// create the ImageStateSummary for this image
|
||||
if _, ok := newSum[name]; !ok {
|
||||
newSum[name] = state
|
||||
}
|
||||
}
|
||||
}
|
||||
nodeInfo.ImageStates = newSum
|
||||
}
|
||||
|
||||
// removeNodeImageStates removes the given node record from image entries having the node
|
||||
// in imageStates cache. After the removal, if any image becomes free, i.e., the image
|
||||
// is no longer available on any node, the image entry will be removed from imageStates.
|
||||
func (cache *cacheImpl) removeNodeImageStates(node *v1.Node) {
|
||||
if node == nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, image := range node.Status.Images {
|
||||
for _, name := range image.Names {
|
||||
state, ok := cache.imageStates[name]
|
||||
if ok {
|
||||
state.Nodes.Delete(node.Name)
|
||||
if state.Nodes.Len() == 0 {
|
||||
// Remove the unused image to make sure the length of
|
||||
// imageStates represents the total number of different
|
||||
// images on all nodes
|
||||
delete(cache.imageStates, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *cacheImpl) run(logger klog.Logger) {
|
||||
go wait.Until(func() {
|
||||
cache.cleanupAssumedPods(logger, time.Now())
|
||||
}, cache.period, cache.stop)
|
||||
}
|
||||
|
||||
// cleanupAssumedPods exists for making test deterministic by taking time as input argument.
|
||||
// It also reports metrics on the cache size for nodes, pods, and assumed pods.
|
||||
func (cache *cacheImpl) cleanupAssumedPods(logger klog.Logger, now time.Time) {
|
||||
cache.mu.Lock()
|
||||
defer cache.mu.Unlock()
|
||||
defer cache.updateMetrics()
|
||||
|
||||
// The size of assumedPods should be small
|
||||
for key := range cache.assumedPods {
|
||||
ps, ok := cache.podStates[key]
|
||||
if !ok {
|
||||
logger.Error(nil, "Key found in assumed set but not in podStates, potentially a logical error")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
if !ps.bindingFinished {
|
||||
logger.V(5).Info("Could not expire cache for pod as binding is still in progress", "podKey", key, "pod", klog.KObj(ps.pod))
|
||||
continue
|
||||
}
|
||||
if cache.ttl != 0 && now.After(*ps.deadline) {
|
||||
logger.Info("Pod expired", "podKey", key, "pod", klog.KObj(ps.pod))
|
||||
if err := cache.removePod(logger, ps.pod); err != nil {
|
||||
logger.Error(err, "ExpirePod failed", "podKey", key, "pod", klog.KObj(ps.pod))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// updateMetrics updates cache size metric values for pods, assumed pods, and nodes
|
||||
func (cache *cacheImpl) updateMetrics() {
|
||||
metrics.CacheSize.WithLabelValues("assumed_pods").Set(float64(len(cache.assumedPods)))
|
||||
metrics.CacheSize.WithLabelValues("pods").Set(float64(len(cache.podStates)))
|
||||
metrics.CacheSize.WithLabelValues("nodes").Set(float64(len(cache.nodes)))
|
||||
}
|
135
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger/comparer.go
generated
vendored
Normal file
135
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger/comparer.go
generated
vendored
Normal file
@ -0,0 +1,135 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package debugger
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// CacheComparer is an implementation of the Scheduler's cache comparer.
|
||||
type CacheComparer struct {
|
||||
NodeLister corelisters.NodeLister
|
||||
PodLister corelisters.PodLister
|
||||
Cache internalcache.Cache
|
||||
PodQueue internalqueue.SchedulingQueue
|
||||
}
|
||||
|
||||
// Compare compares the nodes and pods of NodeLister with Cache.Snapshot.
|
||||
func (c *CacheComparer) Compare(logger klog.Logger) error {
|
||||
logger.V(3).Info("Cache comparer started")
|
||||
defer logger.V(3).Info("Cache comparer finished")
|
||||
|
||||
nodes, err := c.NodeLister.List(labels.Everything())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pods, err := c.PodLister.List(labels.Everything())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dump := c.Cache.Dump()
|
||||
|
||||
pendingPods, _ := c.PodQueue.PendingPods()
|
||||
|
||||
if missed, redundant := c.CompareNodes(nodes, dump.Nodes); len(missed)+len(redundant) != 0 {
|
||||
logger.Info("Cache mismatch", "missedNodes", missed, "redundantNodes", redundant)
|
||||
}
|
||||
|
||||
if missed, redundant := c.ComparePods(pods, pendingPods, dump.Nodes); len(missed)+len(redundant) != 0 {
|
||||
logger.Info("Cache mismatch", "missedPods", missed, "redundantPods", redundant)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CompareNodes compares actual nodes with cached nodes.
|
||||
func (c *CacheComparer) CompareNodes(nodes []*v1.Node, nodeinfos map[string]*framework.NodeInfo) (missed, redundant []string) {
|
||||
actual := []string{}
|
||||
for _, node := range nodes {
|
||||
actual = append(actual, node.Name)
|
||||
}
|
||||
|
||||
cached := []string{}
|
||||
for nodeName := range nodeinfos {
|
||||
cached = append(cached, nodeName)
|
||||
}
|
||||
|
||||
return compareStrings(actual, cached)
|
||||
}
|
||||
|
||||
// ComparePods compares actual pods with cached pods.
|
||||
func (c *CacheComparer) ComparePods(pods, waitingPods []*v1.Pod, nodeinfos map[string]*framework.NodeInfo) (missed, redundant []string) {
|
||||
actual := []string{}
|
||||
for _, pod := range pods {
|
||||
actual = append(actual, string(pod.UID))
|
||||
}
|
||||
|
||||
cached := []string{}
|
||||
for _, nodeinfo := range nodeinfos {
|
||||
for _, p := range nodeinfo.Pods {
|
||||
cached = append(cached, string(p.Pod.UID))
|
||||
}
|
||||
}
|
||||
for _, pod := range waitingPods {
|
||||
cached = append(cached, string(pod.UID))
|
||||
}
|
||||
|
||||
return compareStrings(actual, cached)
|
||||
}
|
||||
|
||||
func compareStrings(actual, cached []string) (missed, redundant []string) {
|
||||
missed, redundant = []string{}, []string{}
|
||||
|
||||
sort.Strings(actual)
|
||||
sort.Strings(cached)
|
||||
|
||||
compare := func(i, j int) int {
|
||||
if i == len(actual) {
|
||||
return 1
|
||||
} else if j == len(cached) {
|
||||
return -1
|
||||
}
|
||||
return strings.Compare(actual[i], cached[j])
|
||||
}
|
||||
|
||||
for i, j := 0, 0; i < len(actual) || j < len(cached); {
|
||||
switch compare(i, j) {
|
||||
case 0:
|
||||
i++
|
||||
j++
|
||||
case -1:
|
||||
missed = append(missed, actual[i])
|
||||
i++
|
||||
case 1:
|
||||
redundant = append(redundant, cached[j])
|
||||
j++
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
76
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger/debugger.go
generated
vendored
Normal file
76
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger/debugger.go
generated
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package debugger
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"os/signal"
|
||||
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
internalqueue "k8s.io/kubernetes/pkg/scheduler/backend/queue"
|
||||
)
|
||||
|
||||
// CacheDebugger provides ways to check and write cache information for debugging.
|
||||
type CacheDebugger struct {
|
||||
Comparer CacheComparer
|
||||
Dumper CacheDumper
|
||||
}
|
||||
|
||||
// New creates a CacheDebugger.
|
||||
func New(
|
||||
nodeLister corelisters.NodeLister,
|
||||
podLister corelisters.PodLister,
|
||||
cache internalcache.Cache,
|
||||
podQueue internalqueue.SchedulingQueue,
|
||||
) *CacheDebugger {
|
||||
return &CacheDebugger{
|
||||
Comparer: CacheComparer{
|
||||
NodeLister: nodeLister,
|
||||
PodLister: podLister,
|
||||
Cache: cache,
|
||||
PodQueue: podQueue,
|
||||
},
|
||||
Dumper: CacheDumper{
|
||||
cache: cache,
|
||||
podQueue: podQueue,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ListenForSignal starts a goroutine that will trigger the CacheDebugger's
|
||||
// behavior when the process receives SIGINT (Windows) or SIGUSER2 (non-Windows).
|
||||
func (d *CacheDebugger) ListenForSignal(ctx context.Context) {
|
||||
logger := klog.FromContext(ctx)
|
||||
stopCh := ctx.Done()
|
||||
ch := make(chan os.Signal, 1)
|
||||
signal.Notify(ch, compareSignal)
|
||||
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-stopCh:
|
||||
return
|
||||
case <-ch:
|
||||
d.Comparer.Compare(logger)
|
||||
d.Dumper.DumpAll(logger)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
88
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger/dumper.go
generated
vendored
Normal file
88
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger/dumper.go
generated
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package debugger
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
internalcache "k8s.io/kubernetes/pkg/scheduler/backend/cache"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/queue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// CacheDumper writes some information from the scheduler cache and the scheduling queue to the
|
||||
// scheduler logs for debugging purposes.
|
||||
type CacheDumper struct {
|
||||
cache internalcache.Cache
|
||||
podQueue queue.SchedulingQueue
|
||||
}
|
||||
|
||||
// DumpAll writes cached nodes and scheduling queue information to the scheduler logs.
|
||||
func (d *CacheDumper) DumpAll(logger klog.Logger) {
|
||||
d.dumpNodes(logger)
|
||||
d.dumpSchedulingQueue(logger)
|
||||
}
|
||||
|
||||
// dumpNodes writes NodeInfo to the scheduler logs.
|
||||
func (d *CacheDumper) dumpNodes(logger klog.Logger) {
|
||||
dump := d.cache.Dump()
|
||||
nodeInfos := make([]string, 0, len(dump.Nodes))
|
||||
for name, nodeInfo := range dump.Nodes {
|
||||
nodeInfos = append(nodeInfos, d.printNodeInfo(name, nodeInfo))
|
||||
}
|
||||
// Extra blank line added between node entries for readability.
|
||||
logger.Info("Dump of cached NodeInfo", "nodes", strings.Join(nodeInfos, "\n\n"))
|
||||
}
|
||||
|
||||
// dumpSchedulingQueue writes pods in the scheduling queue to the scheduler logs.
|
||||
func (d *CacheDumper) dumpSchedulingQueue(logger klog.Logger) {
|
||||
pendingPods, s := d.podQueue.PendingPods()
|
||||
var podData strings.Builder
|
||||
for _, p := range pendingPods {
|
||||
podData.WriteString(printPod(p))
|
||||
}
|
||||
logger.Info("Dump of scheduling queue", "summary", s, "pods", podData.String())
|
||||
}
|
||||
|
||||
// printNodeInfo writes parts of NodeInfo to a string.
|
||||
func (d *CacheDumper) printNodeInfo(name string, n *framework.NodeInfo) string {
|
||||
var nodeData strings.Builder
|
||||
nodeData.WriteString(fmt.Sprintf("Node name: %s\nDeleted: %t\nRequested Resources: %+v\nAllocatable Resources:%+v\nScheduled Pods(number: %v):\n",
|
||||
name, n.Node() == nil, n.Requested, n.Allocatable, len(n.Pods)))
|
||||
// Dumping Pod Info
|
||||
for _, p := range n.Pods {
|
||||
nodeData.WriteString(printPod(p.Pod))
|
||||
}
|
||||
// Dumping nominated pods info on the node
|
||||
nominatedPodInfos := d.podQueue.NominatedPodsForNode(name)
|
||||
if len(nominatedPodInfos) != 0 {
|
||||
nodeData.WriteString(fmt.Sprintf("Nominated Pods(number: %v):\n", len(nominatedPodInfos)))
|
||||
for _, pi := range nominatedPodInfos {
|
||||
nodeData.WriteString(printPod(pi.Pod))
|
||||
}
|
||||
}
|
||||
return nodeData.String()
|
||||
}
|
||||
|
||||
// printPod writes parts of a Pod object to a string.
|
||||
func printPod(p *v1.Pod) string {
|
||||
return fmt.Sprintf("name: %v, namespace: %v, uid: %v, phase: %v, nominated node: %v\n", p.Name, p.Namespace, p.UID, p.Status.Phase, p.Status.NominatedNodeName)
|
||||
}
|
26
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger/signal.go
generated
vendored
Normal file
26
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger/signal.go
generated
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package debugger
|
||||
|
||||
import "syscall"
|
||||
|
||||
// compareSignal is the signal to trigger cache compare. For non-windows
|
||||
// environment it's SIGUSR2.
|
||||
var compareSignal = syscall.SIGUSR2
|
23
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger/signal_windows.go
generated
vendored
Normal file
23
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/debugger/signal_windows.go
generated
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package debugger
|
||||
|
||||
import "os"
|
||||
|
||||
// compareSignal is the signal to trigger cache compare. For windows,
|
||||
// it's SIGINT.
|
||||
var compareSignal = os.Interrupt
|
123
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/interface.go
generated
vendored
Normal file
123
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/interface.go
generated
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cache
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// Cache collects pods' information and provides node-level aggregated information.
|
||||
// It's intended for generic scheduler to do efficient lookup.
|
||||
// Cache's operations are pod centric. It does incremental updates based on pod events.
|
||||
// Pod events are sent via network. We don't have guaranteed delivery of all events:
|
||||
// We use Reflector to list and watch from remote.
|
||||
// Reflector might be slow and do a relist, which would lead to missing events.
|
||||
//
|
||||
// State Machine of a pod's events in scheduler's cache:
|
||||
//
|
||||
// +-------------------------------------------+ +----+
|
||||
// | Add | | |
|
||||
// | | | | Update
|
||||
// + Assume Add v v |
|
||||
//
|
||||
// Initial +--------> Assumed +------------+---> Added <--+
|
||||
//
|
||||
// ^ + + | +
|
||||
// | | | | |
|
||||
// | | | Add | | Remove
|
||||
// | | | | |
|
||||
// | | | + |
|
||||
// +----------------+ +-----------> Expired +----> Deleted
|
||||
// Forget Expire
|
||||
//
|
||||
// Note that an assumed pod can expire, because if we haven't received Add event notifying us
|
||||
// for a while, there might be some problems and we shouldn't keep the pod in cache anymore.
|
||||
//
|
||||
// Note that "Initial", "Expired", and "Deleted" pods do not actually exist in cache.
|
||||
// Based on existing use cases, we are making the following assumptions:
|
||||
// - No pod would be assumed twice
|
||||
// - A pod could be added without going through scheduler. In this case, we will see Add but not Assume event.
|
||||
// - If a pod wasn't added, it wouldn't be removed or updated.
|
||||
// - Both "Expired" and "Deleted" are valid end states. In case of some problems, e.g. network issue,
|
||||
// a pod might have changed its state (e.g. added and deleted) without delivering notification to the cache.
|
||||
type Cache interface {
|
||||
// NodeCount returns the number of nodes in the cache.
|
||||
// DO NOT use outside of tests.
|
||||
NodeCount() int
|
||||
|
||||
// PodCount returns the number of pods in the cache (including those from deleted nodes).
|
||||
// DO NOT use outside of tests.
|
||||
PodCount() (int, error)
|
||||
|
||||
// AssumePod assumes a pod scheduled and aggregates the pod's information into its node.
|
||||
// The implementation also decides the policy to expire pod before being confirmed (receiving Add event).
|
||||
// After expiration, its information would be subtracted.
|
||||
AssumePod(logger klog.Logger, pod *v1.Pod) error
|
||||
|
||||
// FinishBinding signals that cache for assumed pod can be expired
|
||||
FinishBinding(logger klog.Logger, pod *v1.Pod) error
|
||||
|
||||
// ForgetPod removes an assumed pod from cache.
|
||||
ForgetPod(logger klog.Logger, pod *v1.Pod) error
|
||||
|
||||
// AddPod either confirms a pod if it's assumed, or adds it back if it's expired.
|
||||
// If added back, the pod's information would be added again.
|
||||
AddPod(logger klog.Logger, pod *v1.Pod) error
|
||||
|
||||
// UpdatePod removes oldPod's information and adds newPod's information.
|
||||
UpdatePod(logger klog.Logger, oldPod, newPod *v1.Pod) error
|
||||
|
||||
// RemovePod removes a pod. The pod's information would be subtracted from assigned node.
|
||||
RemovePod(logger klog.Logger, pod *v1.Pod) error
|
||||
|
||||
// GetPod returns the pod from the cache with the same namespace and the
|
||||
// same name of the specified pod.
|
||||
GetPod(pod *v1.Pod) (*v1.Pod, error)
|
||||
|
||||
// IsAssumedPod returns true if the pod is assumed and not expired.
|
||||
IsAssumedPod(pod *v1.Pod) (bool, error)
|
||||
|
||||
// AddNode adds overall information about node.
|
||||
// It returns a clone of added NodeInfo object.
|
||||
AddNode(logger klog.Logger, node *v1.Node) *framework.NodeInfo
|
||||
|
||||
// UpdateNode updates overall information about node.
|
||||
// It returns a clone of updated NodeInfo object.
|
||||
UpdateNode(logger klog.Logger, oldNode, newNode *v1.Node) *framework.NodeInfo
|
||||
|
||||
// RemoveNode removes overall information about node.
|
||||
RemoveNode(logger klog.Logger, node *v1.Node) error
|
||||
|
||||
// UpdateSnapshot updates the passed infoSnapshot to the current contents of Cache.
|
||||
// The node info contains aggregated information of pods scheduled (including assumed to be)
|
||||
// on this node.
|
||||
// The snapshot only includes Nodes that are not deleted at the time this function is called.
|
||||
// nodeinfo.Node() is guaranteed to be not nil for all the nodes in the snapshot.
|
||||
UpdateSnapshot(logger klog.Logger, nodeSnapshot *Snapshot) error
|
||||
|
||||
// Dump produces a dump of the current cache.
|
||||
Dump() *Dump
|
||||
}
|
||||
|
||||
// Dump is a dump of the cache state.
|
||||
type Dump struct {
|
||||
AssumedPods sets.Set[string]
|
||||
Nodes map[string]*framework.NodeInfo
|
||||
}
|
143
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/node_tree.go
generated
vendored
Normal file
143
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/node_tree.go
generated
vendored
Normal file
@ -0,0 +1,143 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cache
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
utilnode "k8s.io/component-helpers/node/topology"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// nodeTree is a tree-like data structure that holds node names in each zone. Zone names are
|
||||
// keys to "NodeTree.tree" and values of "NodeTree.tree" are arrays of node names.
|
||||
// NodeTree is NOT thread-safe, any concurrent updates/reads from it must be synchronized by the caller.
|
||||
// It is used only by schedulerCache, and should stay as such.
|
||||
type nodeTree struct {
|
||||
tree map[string][]string // a map from zone (region-zone) to an array of nodes in the zone.
|
||||
zones []string // a list of all the zones in the tree (keys)
|
||||
numNodes int
|
||||
}
|
||||
|
||||
// newNodeTree creates a NodeTree from nodes.
|
||||
func newNodeTree(logger klog.Logger, nodes []*v1.Node) *nodeTree {
|
||||
nt := &nodeTree{
|
||||
tree: make(map[string][]string, len(nodes)),
|
||||
}
|
||||
for _, n := range nodes {
|
||||
nt.addNode(logger, n)
|
||||
}
|
||||
return nt
|
||||
}
|
||||
|
||||
// addNode adds a node and its corresponding zone to the tree. If the zone already exists, the node
|
||||
// is added to the array of nodes in that zone.
|
||||
func (nt *nodeTree) addNode(logger klog.Logger, n *v1.Node) {
|
||||
zone := utilnode.GetZoneKey(n)
|
||||
if na, ok := nt.tree[zone]; ok {
|
||||
for _, nodeName := range na {
|
||||
if nodeName == n.Name {
|
||||
logger.Info("Did not add to the NodeTree because it already exists", "node", klog.KObj(n))
|
||||
return
|
||||
}
|
||||
}
|
||||
nt.tree[zone] = append(na, n.Name)
|
||||
} else {
|
||||
nt.zones = append(nt.zones, zone)
|
||||
nt.tree[zone] = []string{n.Name}
|
||||
}
|
||||
logger.V(2).Info("Added node to NodeTree", "node", klog.KObj(n), "zone", zone)
|
||||
nt.numNodes++
|
||||
}
|
||||
|
||||
// removeNode removes a node from the NodeTree.
|
||||
func (nt *nodeTree) removeNode(logger klog.Logger, n *v1.Node) error {
|
||||
zone := utilnode.GetZoneKey(n)
|
||||
if na, ok := nt.tree[zone]; ok {
|
||||
for i, nodeName := range na {
|
||||
if nodeName == n.Name {
|
||||
nt.tree[zone] = append(na[:i], na[i+1:]...)
|
||||
if len(nt.tree[zone]) == 0 {
|
||||
nt.removeZone(zone)
|
||||
}
|
||||
logger.V(2).Info("Removed node from NodeTree", "node", klog.KObj(n), "zone", zone)
|
||||
nt.numNodes--
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
logger.Error(nil, "Did not remove Node in NodeTree because it was not found", "node", klog.KObj(n), "zone", zone)
|
||||
return fmt.Errorf("node %q in group %q was not found", n.Name, zone)
|
||||
}
|
||||
|
||||
// removeZone removes a zone from tree.
|
||||
// This function must be called while writer locks are hold.
|
||||
func (nt *nodeTree) removeZone(zone string) {
|
||||
delete(nt.tree, zone)
|
||||
for i, z := range nt.zones {
|
||||
if z == zone {
|
||||
nt.zones = append(nt.zones[:i], nt.zones[i+1:]...)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// updateNode updates a node in the NodeTree.
|
||||
func (nt *nodeTree) updateNode(logger klog.Logger, old, new *v1.Node) {
|
||||
var oldZone string
|
||||
if old != nil {
|
||||
oldZone = utilnode.GetZoneKey(old)
|
||||
}
|
||||
newZone := utilnode.GetZoneKey(new)
|
||||
// If the zone ID of the node has not changed, we don't need to do anything. Name of the node
|
||||
// cannot be changed in an update.
|
||||
if oldZone == newZone {
|
||||
return
|
||||
}
|
||||
nt.removeNode(logger, old) // No error checking. We ignore whether the old node exists or not.
|
||||
nt.addNode(logger, new)
|
||||
}
|
||||
|
||||
// list returns the list of names of the node. NodeTree iterates over zones and in each zone iterates
|
||||
// over nodes in a round robin fashion.
|
||||
func (nt *nodeTree) list() ([]string, error) {
|
||||
if len(nt.zones) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
nodesList := make([]string, 0, nt.numNodes)
|
||||
numExhaustedZones := 0
|
||||
nodeIndex := 0
|
||||
for len(nodesList) < nt.numNodes {
|
||||
if numExhaustedZones >= len(nt.zones) { // all zones are exhausted.
|
||||
return nodesList, errors.New("all zones exhausted before reaching count of nodes expected")
|
||||
}
|
||||
for zoneIndex := 0; zoneIndex < len(nt.zones); zoneIndex++ {
|
||||
na := nt.tree[nt.zones[zoneIndex]]
|
||||
if nodeIndex >= len(na) { // If the zone is exhausted, continue
|
||||
if nodeIndex == len(na) { // If it is the first time the zone is exhausted
|
||||
numExhaustedZones++
|
||||
}
|
||||
continue
|
||||
}
|
||||
nodesList = append(nodesList, na[nodeIndex])
|
||||
}
|
||||
nodeIndex++
|
||||
}
|
||||
return nodesList, nil
|
||||
}
|
198
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/snapshot.go
generated
vendored
Normal file
198
vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/snapshot.go
generated
vendored
Normal file
@ -0,0 +1,198 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cache
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// Snapshot is a snapshot of cache NodeInfo and NodeTree order. The scheduler takes a
|
||||
// snapshot at the beginning of each scheduling cycle and uses it for its operations in that cycle.
|
||||
type Snapshot struct {
|
||||
// nodeInfoMap a map of node name to a snapshot of its NodeInfo.
|
||||
nodeInfoMap map[string]*framework.NodeInfo
|
||||
// nodeInfoList is the list of nodes as ordered in the cache's nodeTree.
|
||||
nodeInfoList []*framework.NodeInfo
|
||||
// havePodsWithAffinityNodeInfoList is the list of nodes with at least one pod declaring affinity terms.
|
||||
havePodsWithAffinityNodeInfoList []*framework.NodeInfo
|
||||
// havePodsWithRequiredAntiAffinityNodeInfoList is the list of nodes with at least one pod declaring
|
||||
// required anti-affinity terms.
|
||||
havePodsWithRequiredAntiAffinityNodeInfoList []*framework.NodeInfo
|
||||
// usedPVCSet contains a set of PVC names that have one or more scheduled pods using them,
|
||||
// keyed in the format "namespace/name".
|
||||
usedPVCSet sets.Set[string]
|
||||
generation int64
|
||||
}
|
||||
|
||||
var _ framework.SharedLister = &Snapshot{}
|
||||
|
||||
// NewEmptySnapshot initializes a Snapshot struct and returns it.
|
||||
func NewEmptySnapshot() *Snapshot {
|
||||
return &Snapshot{
|
||||
nodeInfoMap: make(map[string]*framework.NodeInfo),
|
||||
usedPVCSet: sets.New[string](),
|
||||
}
|
||||
}
|
||||
|
||||
// NewSnapshot initializes a Snapshot struct and returns it.
|
||||
func NewSnapshot(pods []*v1.Pod, nodes []*v1.Node) *Snapshot {
|
||||
nodeInfoMap := createNodeInfoMap(pods, nodes)
|
||||
nodeInfoList := make([]*framework.NodeInfo, 0, len(nodeInfoMap))
|
||||
havePodsWithAffinityNodeInfoList := make([]*framework.NodeInfo, 0, len(nodeInfoMap))
|
||||
havePodsWithRequiredAntiAffinityNodeInfoList := make([]*framework.NodeInfo, 0, len(nodeInfoMap))
|
||||
for _, v := range nodeInfoMap {
|
||||
nodeInfoList = append(nodeInfoList, v)
|
||||
if len(v.PodsWithAffinity) > 0 {
|
||||
havePodsWithAffinityNodeInfoList = append(havePodsWithAffinityNodeInfoList, v)
|
||||
}
|
||||
if len(v.PodsWithRequiredAntiAffinity) > 0 {
|
||||
havePodsWithRequiredAntiAffinityNodeInfoList = append(havePodsWithRequiredAntiAffinityNodeInfoList, v)
|
||||
}
|
||||
}
|
||||
|
||||
s := NewEmptySnapshot()
|
||||
s.nodeInfoMap = nodeInfoMap
|
||||
s.nodeInfoList = nodeInfoList
|
||||
s.havePodsWithAffinityNodeInfoList = havePodsWithAffinityNodeInfoList
|
||||
s.havePodsWithRequiredAntiAffinityNodeInfoList = havePodsWithRequiredAntiAffinityNodeInfoList
|
||||
s.usedPVCSet = createUsedPVCSet(pods)
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// createNodeInfoMap obtains a list of pods and pivots that list into a map
|
||||
// where the keys are node names and the values are the aggregated information
|
||||
// for that node.
|
||||
func createNodeInfoMap(pods []*v1.Pod, nodes []*v1.Node) map[string]*framework.NodeInfo {
|
||||
nodeNameToInfo := make(map[string]*framework.NodeInfo)
|
||||
for _, pod := range pods {
|
||||
nodeName := pod.Spec.NodeName
|
||||
if _, ok := nodeNameToInfo[nodeName]; !ok {
|
||||
nodeNameToInfo[nodeName] = framework.NewNodeInfo()
|
||||
}
|
||||
nodeNameToInfo[nodeName].AddPod(pod)
|
||||
}
|
||||
imageExistenceMap := createImageExistenceMap(nodes)
|
||||
|
||||
for _, node := range nodes {
|
||||
if _, ok := nodeNameToInfo[node.Name]; !ok {
|
||||
nodeNameToInfo[node.Name] = framework.NewNodeInfo()
|
||||
}
|
||||
nodeInfo := nodeNameToInfo[node.Name]
|
||||
nodeInfo.SetNode(node)
|
||||
nodeInfo.ImageStates = getNodeImageStates(node, imageExistenceMap)
|
||||
}
|
||||
return nodeNameToInfo
|
||||
}
|
||||
|
||||
func createUsedPVCSet(pods []*v1.Pod) sets.Set[string] {
|
||||
usedPVCSet := sets.New[string]()
|
||||
for _, pod := range pods {
|
||||
if pod.Spec.NodeName == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, v := range pod.Spec.Volumes {
|
||||
if v.PersistentVolumeClaim == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
key := framework.GetNamespacedName(pod.Namespace, v.PersistentVolumeClaim.ClaimName)
|
||||
usedPVCSet.Insert(key)
|
||||
}
|
||||
}
|
||||
return usedPVCSet
|
||||
}
|
||||
|
||||
// getNodeImageStates returns the given node's image states based on the given imageExistence map.
|
||||
func getNodeImageStates(node *v1.Node, imageExistenceMap map[string]sets.Set[string]) map[string]*framework.ImageStateSummary {
|
||||
imageStates := make(map[string]*framework.ImageStateSummary)
|
||||
|
||||
for _, image := range node.Status.Images {
|
||||
for _, name := range image.Names {
|
||||
imageStates[name] = &framework.ImageStateSummary{
|
||||
Size: image.SizeBytes,
|
||||
NumNodes: imageExistenceMap[name].Len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
return imageStates
|
||||
}
|
||||
|
||||
// createImageExistenceMap returns a map recording on which nodes the images exist, keyed by the images' names.
|
||||
func createImageExistenceMap(nodes []*v1.Node) map[string]sets.Set[string] {
|
||||
imageExistenceMap := make(map[string]sets.Set[string])
|
||||
for _, node := range nodes {
|
||||
for _, image := range node.Status.Images {
|
||||
for _, name := range image.Names {
|
||||
if _, ok := imageExistenceMap[name]; !ok {
|
||||
imageExistenceMap[name] = sets.New(node.Name)
|
||||
} else {
|
||||
imageExistenceMap[name].Insert(node.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return imageExistenceMap
|
||||
}
|
||||
|
||||
// NodeInfos returns a NodeInfoLister.
|
||||
func (s *Snapshot) NodeInfos() framework.NodeInfoLister {
|
||||
return s
|
||||
}
|
||||
|
||||
// StorageInfos returns a StorageInfoLister.
|
||||
func (s *Snapshot) StorageInfos() framework.StorageInfoLister {
|
||||
return s
|
||||
}
|
||||
|
||||
// NumNodes returns the number of nodes in the snapshot.
|
||||
func (s *Snapshot) NumNodes() int {
|
||||
return len(s.nodeInfoList)
|
||||
}
|
||||
|
||||
// List returns the list of nodes in the snapshot.
|
||||
func (s *Snapshot) List() ([]*framework.NodeInfo, error) {
|
||||
return s.nodeInfoList, nil
|
||||
}
|
||||
|
||||
// HavePodsWithAffinityList returns the list of nodes with at least one pod with inter-pod affinity
|
||||
func (s *Snapshot) HavePodsWithAffinityList() ([]*framework.NodeInfo, error) {
|
||||
return s.havePodsWithAffinityNodeInfoList, nil
|
||||
}
|
||||
|
||||
// HavePodsWithRequiredAntiAffinityList returns the list of nodes with at least one pod with
|
||||
// required inter-pod anti-affinity
|
||||
func (s *Snapshot) HavePodsWithRequiredAntiAffinityList() ([]*framework.NodeInfo, error) {
|
||||
return s.havePodsWithRequiredAntiAffinityNodeInfoList, nil
|
||||
}
|
||||
|
||||
// Get returns the NodeInfo of the given node name.
|
||||
func (s *Snapshot) Get(nodeName string) (*framework.NodeInfo, error) {
|
||||
if v, ok := s.nodeInfoMap[nodeName]; ok && v.Node() != nil {
|
||||
return v, nil
|
||||
}
|
||||
return nil, fmt.Errorf("nodeinfo not found for node name %q", nodeName)
|
||||
}
|
||||
|
||||
func (s *Snapshot) IsPVCUsedByPods(key string) bool {
|
||||
return s.usedPVCSet.Has(key)
|
||||
}
|
244
vendor/k8s.io/kubernetes/pkg/scheduler/backend/heap/heap.go
generated
vendored
Normal file
244
vendor/k8s.io/kubernetes/pkg/scheduler/backend/heap/heap.go
generated
vendored
Normal file
@ -0,0 +1,244 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Below is the implementation of the a heap. The logic is pretty much the same
|
||||
// as cache.heap, however, this heap does not perform synchronization. It leaves
|
||||
// synchronization to the SchedulingQueue.
|
||||
|
||||
package heap
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"fmt"
|
||||
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
)
|
||||
|
||||
// KeyFunc is a function type to get the key from an object.
|
||||
type KeyFunc[T any] func(obj T) string
|
||||
|
||||
type heapItem[T any] struct {
|
||||
obj T // The object which is stored in the heap.
|
||||
index int // The index of the object's key in the Heap.queue.
|
||||
}
|
||||
|
||||
type itemKeyValue[T any] struct {
|
||||
key string
|
||||
obj T
|
||||
}
|
||||
|
||||
// data is an internal struct that implements the standard heap interface
|
||||
// and keeps the data stored in the heap.
|
||||
type data[T any] struct {
|
||||
// items is a map from key of the objects to the objects and their index.
|
||||
// We depend on the property that items in the map are in the queue and vice versa.
|
||||
items map[string]*heapItem[T]
|
||||
// queue implements a heap data structure and keeps the order of elements
|
||||
// according to the heap invariant. The queue keeps the keys of objects stored
|
||||
// in "items".
|
||||
queue []string
|
||||
|
||||
// keyFunc is used to make the key used for queued item insertion and retrieval, and
|
||||
// should be deterministic.
|
||||
keyFunc KeyFunc[T]
|
||||
// lessFunc is used to compare two objects in the heap.
|
||||
lessFunc LessFunc[T]
|
||||
}
|
||||
|
||||
var (
|
||||
_ = heap.Interface(&data[any]{}) // heapData is a standard heap
|
||||
)
|
||||
|
||||
// Less compares two objects and returns true if the first one should go
|
||||
// in front of the second one in the heap.
|
||||
func (h *data[T]) Less(i, j int) bool {
|
||||
if i > len(h.queue) || j > len(h.queue) {
|
||||
return false
|
||||
}
|
||||
itemi, ok := h.items[h.queue[i]]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
itemj, ok := h.items[h.queue[j]]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return h.lessFunc(itemi.obj, itemj.obj)
|
||||
}
|
||||
|
||||
// Len returns the number of items in the Heap.
|
||||
func (h *data[T]) Len() int { return len(h.queue) }
|
||||
|
||||
// Swap implements swapping of two elements in the heap. This is a part of standard
|
||||
// heap interface and should never be called directly.
|
||||
func (h *data[T]) Swap(i, j int) {
|
||||
if i < 0 || j < 0 {
|
||||
return
|
||||
}
|
||||
h.queue[i], h.queue[j] = h.queue[j], h.queue[i]
|
||||
item := h.items[h.queue[i]]
|
||||
item.index = i
|
||||
item = h.items[h.queue[j]]
|
||||
item.index = j
|
||||
}
|
||||
|
||||
// Push is supposed to be called by container/heap.Push only.
|
||||
func (h *data[T]) Push(kv interface{}) {
|
||||
keyValue := kv.(*itemKeyValue[T])
|
||||
n := len(h.queue)
|
||||
h.items[keyValue.key] = &heapItem[T]{keyValue.obj, n}
|
||||
h.queue = append(h.queue, keyValue.key)
|
||||
}
|
||||
|
||||
// Pop is supposed to be called by container/heap.Pop only.
|
||||
func (h *data[T]) Pop() interface{} {
|
||||
if len(h.queue) == 0 {
|
||||
return nil
|
||||
}
|
||||
key := h.queue[len(h.queue)-1]
|
||||
h.queue = h.queue[0 : len(h.queue)-1]
|
||||
item, ok := h.items[key]
|
||||
if !ok {
|
||||
// This is an error
|
||||
return nil
|
||||
}
|
||||
delete(h.items, key)
|
||||
return item.obj
|
||||
}
|
||||
|
||||
// Peek returns the head of the heap without removing it.
|
||||
func (h *data[T]) Peek() (T, bool) {
|
||||
if len(h.queue) > 0 {
|
||||
return h.items[h.queue[0]].obj, true
|
||||
}
|
||||
var zero T
|
||||
return zero, false
|
||||
}
|
||||
|
||||
// Heap is a producer/consumer queue that implements a heap data structure.
|
||||
// It can be used to implement priority queues and similar data structures.
|
||||
type Heap[T any] struct {
|
||||
// data stores objects and has a queue that keeps their ordering according
|
||||
// to the heap invariant.
|
||||
data *data[T]
|
||||
// metricRecorder updates the counter when elements of a heap get added or
|
||||
// removed, and it does nothing if it's nil
|
||||
metricRecorder metrics.MetricRecorder
|
||||
}
|
||||
|
||||
// AddOrUpdate inserts an item, and puts it in the queue. The item is updated if it
|
||||
// already exists.
|
||||
func (h *Heap[T]) AddOrUpdate(obj T) {
|
||||
key := h.data.keyFunc(obj)
|
||||
if _, exists := h.data.items[key]; exists {
|
||||
h.data.items[key].obj = obj
|
||||
heap.Fix(h.data, h.data.items[key].index)
|
||||
} else {
|
||||
heap.Push(h.data, &itemKeyValue[T]{key, obj})
|
||||
if h.metricRecorder != nil {
|
||||
h.metricRecorder.Inc()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Delete removes an item.
|
||||
func (h *Heap[T]) Delete(obj T) error {
|
||||
key := h.data.keyFunc(obj)
|
||||
if item, ok := h.data.items[key]; ok {
|
||||
heap.Remove(h.data, item.index)
|
||||
if h.metricRecorder != nil {
|
||||
h.metricRecorder.Dec()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("object not found")
|
||||
}
|
||||
|
||||
// Peek returns the head of the heap without removing it.
|
||||
func (h *Heap[T]) Peek() (T, bool) {
|
||||
return h.data.Peek()
|
||||
}
|
||||
|
||||
// Pop returns the head of the heap and removes it.
|
||||
func (h *Heap[T]) Pop() (T, error) {
|
||||
obj := heap.Pop(h.data)
|
||||
if obj != nil {
|
||||
if h.metricRecorder != nil {
|
||||
h.metricRecorder.Dec()
|
||||
}
|
||||
return obj.(T), nil
|
||||
}
|
||||
var zero T
|
||||
return zero, fmt.Errorf("heap is empty")
|
||||
}
|
||||
|
||||
// Get returns the requested item, or sets exists=false.
|
||||
func (h *Heap[T]) Get(obj T) (T, bool) {
|
||||
key := h.data.keyFunc(obj)
|
||||
return h.GetByKey(key)
|
||||
}
|
||||
|
||||
// GetByKey returns the requested item, or sets exists=false.
|
||||
func (h *Heap[T]) GetByKey(key string) (T, bool) {
|
||||
item, exists := h.data.items[key]
|
||||
if !exists {
|
||||
var zero T
|
||||
return zero, false
|
||||
}
|
||||
return item.obj, true
|
||||
}
|
||||
|
||||
func (h *Heap[T]) Has(obj T) bool {
|
||||
key := h.data.keyFunc(obj)
|
||||
_, ok := h.GetByKey(key)
|
||||
return ok
|
||||
}
|
||||
|
||||
// List returns a list of all the items.
|
||||
func (h *Heap[T]) List() []T {
|
||||
list := make([]T, 0, len(h.data.items))
|
||||
for _, item := range h.data.items {
|
||||
list = append(list, item.obj)
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
// Len returns the number of items in the heap.
|
||||
func (h *Heap[T]) Len() int {
|
||||
return len(h.data.queue)
|
||||
}
|
||||
|
||||
// New returns a Heap which can be used to queue up items to process.
|
||||
func New[T any](keyFn KeyFunc[T], lessFn LessFunc[T]) *Heap[T] {
|
||||
return NewWithRecorder(keyFn, lessFn, nil)
|
||||
}
|
||||
|
||||
// NewWithRecorder wraps an optional metricRecorder to compose a Heap object.
|
||||
func NewWithRecorder[T any](keyFn KeyFunc[T], lessFn LessFunc[T], metricRecorder metrics.MetricRecorder) *Heap[T] {
|
||||
return &Heap[T]{
|
||||
data: &data[T]{
|
||||
items: map[string]*heapItem[T]{},
|
||||
queue: []string{},
|
||||
keyFunc: keyFn,
|
||||
lessFunc: lessFn,
|
||||
},
|
||||
metricRecorder: metricRecorder,
|
||||
}
|
||||
}
|
||||
|
||||
// LessFunc is a function that receives two items and returns true if the first
|
||||
// item should be placed before the second one when the list is sorted.
|
||||
type LessFunc[T any] func(item1, item2 T) bool
|
415
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/active_queue.go
generated
vendored
Normal file
415
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/active_queue.go
generated
vendored
Normal file
@ -0,0 +1,415 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/heap"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
)
|
||||
|
||||
// activeQueuer is a wrapper for activeQ related operations.
|
||||
// Its methods, except "unlocked" ones, take the lock inside.
|
||||
// Note: be careful when using unlocked() methods.
|
||||
// getLock() methods should be used only for unlocked() methods
|
||||
// and it is forbidden to call any other activeQueuer's method under this lock.
|
||||
type activeQueuer interface {
|
||||
underLock(func(unlockedActiveQ unlockedActiveQueuer))
|
||||
underRLock(func(unlockedActiveQ unlockedActiveQueueReader))
|
||||
|
||||
update(newPod *v1.Pod, oldPodInfo *framework.QueuedPodInfo) *framework.QueuedPodInfo
|
||||
delete(pInfo *framework.QueuedPodInfo) error
|
||||
pop(logger klog.Logger) (*framework.QueuedPodInfo, error)
|
||||
list() []*v1.Pod
|
||||
len() int
|
||||
has(pInfo *framework.QueuedPodInfo) bool
|
||||
|
||||
listInFlightEvents() []interface{}
|
||||
listInFlightPods() []*v1.Pod
|
||||
clusterEventsForPod(logger klog.Logger, pInfo *framework.QueuedPodInfo) ([]*clusterEvent, error)
|
||||
addEventsIfPodInFlight(oldPod, newPod *v1.Pod, events []framework.ClusterEvent) bool
|
||||
addEventIfAnyInFlight(oldObj, newObj interface{}, event framework.ClusterEvent) bool
|
||||
|
||||
schedulingCycle() int64
|
||||
done(pod types.UID)
|
||||
close()
|
||||
broadcast()
|
||||
}
|
||||
|
||||
// unlockedActiveQueuer defines activeQ methods that are not protected by the lock itself.
|
||||
// underLock() method should be used to protect these methods.
|
||||
type unlockedActiveQueuer interface {
|
||||
unlockedActiveQueueReader
|
||||
AddOrUpdate(pInfo *framework.QueuedPodInfo)
|
||||
}
|
||||
|
||||
// unlockedActiveQueueReader defines activeQ read-only methods that are not protected by the lock itself.
|
||||
// underLock() or underRLock() method should be used to protect these methods.
|
||||
type unlockedActiveQueueReader interface {
|
||||
Get(pInfo *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool)
|
||||
Has(pInfo *framework.QueuedPodInfo) bool
|
||||
}
|
||||
|
||||
// activeQueue implements activeQueuer. All of the fields have to be protected using the lock.
|
||||
type activeQueue struct {
|
||||
// lock synchronizes all operations related to activeQ.
|
||||
// It protects activeQ, inFlightPods, inFlightEvents, schedulingCycle and closed fields.
|
||||
// Caution: DO NOT take "SchedulingQueue.lock" after taking "lock".
|
||||
// You should always take "SchedulingQueue.lock" first, otherwise the queue could end up in deadlock.
|
||||
// "lock" should not be taken after taking "nLock".
|
||||
// Correct locking order is: SchedulingQueue.lock > lock > nominator.nLock.
|
||||
lock sync.RWMutex
|
||||
|
||||
// activeQ is heap structure that scheduler actively looks at to find pods to
|
||||
// schedule. Head of heap is the highest priority pod.
|
||||
queue *heap.Heap[*framework.QueuedPodInfo]
|
||||
|
||||
// cond is a condition that is notified when the pod is added to activeQ.
|
||||
// It is used with lock.
|
||||
cond sync.Cond
|
||||
|
||||
// inFlightPods holds the UID of all pods which have been popped out for which Done
|
||||
// hasn't been called yet - in other words, all pods that are currently being
|
||||
// processed (being scheduled, in permit, or in the binding cycle).
|
||||
//
|
||||
// The values in the map are the entry of each pod in the inFlightEvents list.
|
||||
// The value of that entry is the *v1.Pod at the time that scheduling of that
|
||||
// pod started, which can be useful for logging or debugging.
|
||||
inFlightPods map[types.UID]*list.Element
|
||||
|
||||
// inFlightEvents holds the events received by the scheduling queue
|
||||
// (entry value is clusterEvent) together with in-flight pods (entry
|
||||
// value is *v1.Pod). Entries get added at the end while the mutex is
|
||||
// locked, so they get serialized.
|
||||
//
|
||||
// The pod entries are added in Pop and used to track which events
|
||||
// occurred after the pod scheduling attempt for that pod started.
|
||||
// They get removed when the scheduling attempt is done, at which
|
||||
// point all events that occurred in the meantime are processed.
|
||||
//
|
||||
// After removal of a pod, events at the start of the list are no
|
||||
// longer needed because all of the other in-flight pods started
|
||||
// later. Those events can be removed.
|
||||
inFlightEvents *list.List
|
||||
|
||||
// schedCycle represents sequence number of scheduling cycle and is incremented
|
||||
// when a pod is popped.
|
||||
schedCycle int64
|
||||
|
||||
// closed indicates that the queue is closed.
|
||||
// It is mainly used to let Pop() exit its control loop while waiting for an item.
|
||||
closed bool
|
||||
|
||||
// isSchedulingQueueHintEnabled indicates whether the feature gate for the scheduling queue is enabled.
|
||||
isSchedulingQueueHintEnabled bool
|
||||
|
||||
metricsRecorder metrics.MetricAsyncRecorder
|
||||
}
|
||||
|
||||
func newActiveQueue(queue *heap.Heap[*framework.QueuedPodInfo], isSchedulingQueueHintEnabled bool, metricRecorder metrics.MetricAsyncRecorder) *activeQueue {
|
||||
aq := &activeQueue{
|
||||
queue: queue,
|
||||
inFlightPods: make(map[types.UID]*list.Element),
|
||||
inFlightEvents: list.New(),
|
||||
isSchedulingQueueHintEnabled: isSchedulingQueueHintEnabled,
|
||||
metricsRecorder: metricRecorder,
|
||||
}
|
||||
aq.cond.L = &aq.lock
|
||||
|
||||
return aq
|
||||
}
|
||||
|
||||
// underLock runs the fn function under the lock.Lock.
|
||||
// fn can run unlockedActiveQueuer methods but should NOT run any other activeQueue method,
|
||||
// as it would end up in deadlock.
|
||||
func (aq *activeQueue) underLock(fn func(unlockedActiveQ unlockedActiveQueuer)) {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
fn(aq.queue)
|
||||
}
|
||||
|
||||
// underLock runs the fn function under the lock.RLock.
|
||||
// fn can run unlockedActiveQueueReader methods but should NOT run any other activeQueue method,
|
||||
// as it would end up in deadlock.
|
||||
func (aq *activeQueue) underRLock(fn func(unlockedActiveQ unlockedActiveQueueReader)) {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
fn(aq.queue)
|
||||
}
|
||||
|
||||
// update updates the pod in activeQ if oldPodInfo is already in the queue.
|
||||
// It returns new pod info if updated, nil otherwise.
|
||||
func (aq *activeQueue) update(newPod *v1.Pod, oldPodInfo *framework.QueuedPodInfo) *framework.QueuedPodInfo {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
if pInfo, exists := aq.queue.Get(oldPodInfo); exists {
|
||||
_ = pInfo.Update(newPod)
|
||||
aq.queue.AddOrUpdate(pInfo)
|
||||
return pInfo
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// delete deletes the pod info from activeQ.
|
||||
func (aq *activeQueue) delete(pInfo *framework.QueuedPodInfo) error {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
return aq.queue.Delete(pInfo)
|
||||
}
|
||||
|
||||
// pop removes the head of the queue and returns it.
|
||||
// It blocks if the queue is empty and waits until a new item is added to the queue.
|
||||
// It increments scheduling cycle when a pod is popped.
|
||||
func (aq *activeQueue) pop(logger klog.Logger) (*framework.QueuedPodInfo, error) {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
return aq.unlockedPop(logger)
|
||||
}
|
||||
|
||||
func (aq *activeQueue) unlockedPop(logger klog.Logger) (*framework.QueuedPodInfo, error) {
|
||||
for aq.queue.Len() == 0 {
|
||||
// When the queue is empty, invocation of Pop() is blocked until new item is enqueued.
|
||||
// When Close() is called, the p.closed is set and the condition is broadcast,
|
||||
// which causes this loop to continue and return from the Pop().
|
||||
if aq.closed {
|
||||
logger.V(2).Info("Scheduling queue is closed")
|
||||
return nil, nil
|
||||
}
|
||||
aq.cond.Wait()
|
||||
}
|
||||
pInfo, err := aq.queue.Pop()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pInfo.Attempts++
|
||||
// In flight, no concurrent events yet.
|
||||
if aq.isSchedulingQueueHintEnabled {
|
||||
// If the pod is already in the map, we shouldn't overwrite the inFlightPods otherwise it'd lead to a memory leak.
|
||||
// https://github.com/kubernetes/kubernetes/pull/127016
|
||||
if _, ok := aq.inFlightPods[pInfo.Pod.UID]; ok {
|
||||
// Just report it as an error, but no need to stop the scheduler
|
||||
// because it likely doesn't cause any visible issues from the scheduling perspective.
|
||||
logger.Error(nil, "the same pod is tracked in multiple places in the scheduler, and just discard it", "pod", klog.KObj(pInfo.Pod))
|
||||
// Just ignore/discard this duplicated pod and try to pop the next one.
|
||||
return aq.unlockedPop(logger)
|
||||
}
|
||||
|
||||
aq.metricsRecorder.ObserveInFlightEventsAsync(metrics.PodPoppedInFlightEvent, 1, false)
|
||||
aq.inFlightPods[pInfo.Pod.UID] = aq.inFlightEvents.PushBack(pInfo.Pod)
|
||||
}
|
||||
aq.schedCycle++
|
||||
|
||||
// Update metrics and reset the set of unschedulable plugins for the next attempt.
|
||||
for plugin := range pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins) {
|
||||
metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Dec()
|
||||
}
|
||||
pInfo.UnschedulablePlugins.Clear()
|
||||
pInfo.PendingPlugins.Clear()
|
||||
|
||||
return pInfo, nil
|
||||
}
|
||||
|
||||
// list returns all pods that are in the queue.
|
||||
func (aq *activeQueue) list() []*v1.Pod {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
var result []*v1.Pod
|
||||
for _, pInfo := range aq.queue.List() {
|
||||
result = append(result, pInfo.Pod)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// len returns length of the queue.
|
||||
func (aq *activeQueue) len() int {
|
||||
return aq.queue.Len()
|
||||
}
|
||||
|
||||
// has inform if pInfo exists in the queue.
|
||||
func (aq *activeQueue) has(pInfo *framework.QueuedPodInfo) bool {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
return aq.queue.Has(pInfo)
|
||||
}
|
||||
|
||||
// listInFlightEvents returns all inFlightEvents.
|
||||
func (aq *activeQueue) listInFlightEvents() []interface{} {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
var values []interface{}
|
||||
for event := aq.inFlightEvents.Front(); event != nil; event = event.Next() {
|
||||
values = append(values, event.Value)
|
||||
}
|
||||
return values
|
||||
}
|
||||
|
||||
// listInFlightPods returns all inFlightPods.
|
||||
func (aq *activeQueue) listInFlightPods() []*v1.Pod {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
var pods []*v1.Pod
|
||||
for _, obj := range aq.inFlightPods {
|
||||
pods = append(pods, obj.Value.(*v1.Pod))
|
||||
}
|
||||
return pods
|
||||
}
|
||||
|
||||
// clusterEventsForPod gets all cluster events that have happened during pod for pInfo is being scheduled.
|
||||
func (aq *activeQueue) clusterEventsForPod(logger klog.Logger, pInfo *framework.QueuedPodInfo) ([]*clusterEvent, error) {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
logger.V(5).Info("Checking events for in-flight pod", "pod", klog.KObj(pInfo.Pod), "unschedulablePlugins", pInfo.UnschedulablePlugins, "inFlightEventsSize", aq.inFlightEvents.Len(), "inFlightPodsSize", len(aq.inFlightPods))
|
||||
|
||||
// AddUnschedulableIfNotPresent is called with the Pod at the end of scheduling or binding.
|
||||
// So, given pInfo should have been Pop()ed before,
|
||||
// we can assume pInfo must be recorded in inFlightPods and thus inFlightEvents.
|
||||
inFlightPod, ok := aq.inFlightPods[pInfo.Pod.UID]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("in flight Pod isn't found in the scheduling queue. If you see this error log, it's likely a bug in the scheduler")
|
||||
}
|
||||
|
||||
var events []*clusterEvent
|
||||
for event := inFlightPod.Next(); event != nil; event = event.Next() {
|
||||
e, ok := event.Value.(*clusterEvent)
|
||||
if !ok {
|
||||
// Must be another in-flight Pod (*v1.Pod). Can be ignored.
|
||||
continue
|
||||
}
|
||||
events = append(events, e)
|
||||
}
|
||||
return events, nil
|
||||
}
|
||||
|
||||
// addEventsIfPodInFlight adds clusterEvent to inFlightEvents if the newPod is in inFlightPods.
|
||||
// It returns true if pushed the event to the inFlightEvents.
|
||||
func (aq *activeQueue) addEventsIfPodInFlight(oldPod, newPod *v1.Pod, events []framework.ClusterEvent) bool {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
_, ok := aq.inFlightPods[newPod.UID]
|
||||
if ok {
|
||||
for _, event := range events {
|
||||
aq.metricsRecorder.ObserveInFlightEventsAsync(event.Label(), 1, false)
|
||||
aq.inFlightEvents.PushBack(&clusterEvent{
|
||||
event: event,
|
||||
oldObj: oldPod,
|
||||
newObj: newPod,
|
||||
})
|
||||
}
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
// addEventIfAnyInFlight adds clusterEvent to inFlightEvents if any pod is in inFlightPods.
|
||||
// It returns true if pushed the event to the inFlightEvents.
|
||||
func (aq *activeQueue) addEventIfAnyInFlight(oldObj, newObj interface{}, event framework.ClusterEvent) bool {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
if len(aq.inFlightPods) != 0 {
|
||||
aq.metricsRecorder.ObserveInFlightEventsAsync(event.Label(), 1, false)
|
||||
aq.inFlightEvents.PushBack(&clusterEvent{
|
||||
event: event,
|
||||
oldObj: oldObj,
|
||||
newObj: newObj,
|
||||
})
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (aq *activeQueue) schedulingCycle() int64 {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
return aq.schedCycle
|
||||
}
|
||||
|
||||
// done must be called for pod returned by Pop. This allows the queue to
|
||||
// keep track of which pods are currently being processed.
|
||||
func (aq *activeQueue) done(pod types.UID) {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
inFlightPod, ok := aq.inFlightPods[pod]
|
||||
if !ok {
|
||||
// This Pod is already done()ed.
|
||||
return
|
||||
}
|
||||
delete(aq.inFlightPods, pod)
|
||||
|
||||
// Remove the pod from the list.
|
||||
aq.inFlightEvents.Remove(inFlightPod)
|
||||
|
||||
aggrMetricsCounter := map[string]int{}
|
||||
// Remove events which are only referred to by this Pod
|
||||
// so that the inFlightEvents list doesn't grow infinitely.
|
||||
// If the pod was at the head of the list, then all
|
||||
// events between it and the next pod are no longer needed
|
||||
// and can be removed.
|
||||
for {
|
||||
e := aq.inFlightEvents.Front()
|
||||
if e == nil {
|
||||
// Empty list.
|
||||
break
|
||||
}
|
||||
ev, ok := e.Value.(*clusterEvent)
|
||||
if !ok {
|
||||
// A pod, must stop pruning.
|
||||
break
|
||||
}
|
||||
aq.inFlightEvents.Remove(e)
|
||||
aggrMetricsCounter[ev.event.Label()]--
|
||||
}
|
||||
|
||||
for evLabel, count := range aggrMetricsCounter {
|
||||
aq.metricsRecorder.ObserveInFlightEventsAsync(evLabel, float64(count), false)
|
||||
}
|
||||
|
||||
aq.metricsRecorder.ObserveInFlightEventsAsync(metrics.PodPoppedInFlightEvent, -1,
|
||||
// If it's the last Pod in inFlightPods, we should force-flush the metrics.
|
||||
// Otherwise, especially in small clusters, which don't get a new Pod frequently,
|
||||
// the metrics might not be flushed for a long time.
|
||||
len(aq.inFlightPods) == 0)
|
||||
}
|
||||
|
||||
// close closes the activeQueue.
|
||||
func (aq *activeQueue) close() {
|
||||
// We should call done() for all in-flight pods to clean up the inFlightEvents metrics.
|
||||
// It's safe even if the binding cycle running asynchronously calls done() afterwards
|
||||
// done() will just be a no-op.
|
||||
for pod := range aq.inFlightPods {
|
||||
aq.done(pod)
|
||||
}
|
||||
aq.lock.Lock()
|
||||
aq.closed = true
|
||||
aq.lock.Unlock()
|
||||
}
|
||||
|
||||
// broadcast notifies the pop() operation that new pod(s) was added to the activeQueue.
|
||||
func (aq *activeQueue) broadcast() {
|
||||
aq.cond.Broadcast()
|
||||
}
|
195
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/nominator.go
generated
vendored
Normal file
195
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/nominator.go
generated
vendored
Normal file
@ -0,0 +1,195 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
listersv1 "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// nominator is a structure that stores pods nominated to run on nodes.
|
||||
// It exists because nominatedNodeName of pod objects stored in the structure
|
||||
// may be different than what scheduler has here. We should be able to find pods
|
||||
// by their UID and update/delete them.
|
||||
type nominator struct {
|
||||
// nLock synchronizes all operations related to nominator.
|
||||
// It should not be used anywhere else.
|
||||
// Caution: DO NOT take ("SchedulingQueue.lock" or "activeQueue.lock") after taking "nLock".
|
||||
// You should always take "SchedulingQueue.lock" and "activeQueue.lock" first,
|
||||
// otherwise the nominator could end up in deadlock.
|
||||
// Correct locking order is: SchedulingQueue.lock > activeQueue.lock > nLock.
|
||||
nLock sync.RWMutex
|
||||
|
||||
// podLister is used to verify if the given pod is alive.
|
||||
podLister listersv1.PodLister
|
||||
// nominatedPods is a map keyed by a node name and the value is a list of
|
||||
// pods which are nominated to run on the node. These are pods which can be in
|
||||
// the activeQ or unschedulablePods.
|
||||
nominatedPods map[string][]podRef
|
||||
// nominatedPodToNode is map keyed by a Pod UID to the node name where it is
|
||||
// nominated.
|
||||
nominatedPodToNode map[types.UID]string
|
||||
}
|
||||
|
||||
func newPodNominator(podLister listersv1.PodLister) *nominator {
|
||||
return &nominator{
|
||||
podLister: podLister,
|
||||
nominatedPods: make(map[string][]podRef),
|
||||
nominatedPodToNode: make(map[types.UID]string),
|
||||
}
|
||||
}
|
||||
|
||||
// AddNominatedPod adds a pod to the nominated pods of the given node.
|
||||
// This is called during the preemption process after a node is nominated to run
|
||||
// the pod. We update the structure before sending a request to update the pod
|
||||
// object to avoid races with the following scheduling cycles.
|
||||
func (npm *nominator) AddNominatedPod(logger klog.Logger, pi *framework.PodInfo, nominatingInfo *framework.NominatingInfo) {
|
||||
npm.nLock.Lock()
|
||||
npm.addNominatedPodUnlocked(logger, pi, nominatingInfo)
|
||||
npm.nLock.Unlock()
|
||||
}
|
||||
|
||||
func (npm *nominator) addNominatedPodUnlocked(logger klog.Logger, pi *framework.PodInfo, nominatingInfo *framework.NominatingInfo) {
|
||||
// Always delete the pod if it already exists, to ensure we never store more than
|
||||
// one instance of the pod.
|
||||
npm.deleteUnlocked(pi.Pod)
|
||||
|
||||
var nodeName string
|
||||
if nominatingInfo.Mode() == framework.ModeOverride {
|
||||
nodeName = nominatingInfo.NominatedNodeName
|
||||
} else if nominatingInfo.Mode() == framework.ModeNoop {
|
||||
if pi.Pod.Status.NominatedNodeName == "" {
|
||||
return
|
||||
}
|
||||
nodeName = pi.Pod.Status.NominatedNodeName
|
||||
}
|
||||
|
||||
if npm.podLister != nil {
|
||||
// If the pod was removed or if it was already scheduled, don't nominate it.
|
||||
updatedPod, err := npm.podLister.Pods(pi.Pod.Namespace).Get(pi.Pod.Name)
|
||||
if err != nil {
|
||||
logger.V(4).Info("Pod doesn't exist in podLister, aborted adding it to the nominator", "pod", klog.KObj(pi.Pod))
|
||||
return
|
||||
}
|
||||
if updatedPod.Spec.NodeName != "" {
|
||||
logger.V(4).Info("Pod is already scheduled to a node, aborted adding it to the nominator", "pod", klog.KObj(pi.Pod), "node", updatedPod.Spec.NodeName)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
npm.nominatedPodToNode[pi.Pod.UID] = nodeName
|
||||
for _, np := range npm.nominatedPods[nodeName] {
|
||||
if np.uid == pi.Pod.UID {
|
||||
logger.V(4).Info("Pod already exists in the nominator", "pod", np.uid)
|
||||
return
|
||||
}
|
||||
}
|
||||
npm.nominatedPods[nodeName] = append(npm.nominatedPods[nodeName], podToRef(pi.Pod))
|
||||
}
|
||||
|
||||
// UpdateNominatedPod updates the <oldPod> with <newPod>.
|
||||
func (npm *nominator) UpdateNominatedPod(logger klog.Logger, oldPod *v1.Pod, newPodInfo *framework.PodInfo) {
|
||||
npm.nLock.Lock()
|
||||
defer npm.nLock.Unlock()
|
||||
// In some cases, an Update event with no "NominatedNode" present is received right
|
||||
// after a node("NominatedNode") is reserved for this pod in memory.
|
||||
// In this case, we need to keep reserving the NominatedNode when updating the pod pointer.
|
||||
var nominatingInfo *framework.NominatingInfo
|
||||
// We won't fall into below `if` block if the Update event represents:
|
||||
// (1) NominatedNode info is added
|
||||
// (2) NominatedNode info is updated
|
||||
// (3) NominatedNode info is removed
|
||||
if nominatedNodeName(oldPod) == "" && nominatedNodeName(newPodInfo.Pod) == "" {
|
||||
if nnn, ok := npm.nominatedPodToNode[oldPod.UID]; ok {
|
||||
// This is the only case we should continue reserving the NominatedNode
|
||||
nominatingInfo = &framework.NominatingInfo{
|
||||
NominatingMode: framework.ModeOverride,
|
||||
NominatedNodeName: nnn,
|
||||
}
|
||||
}
|
||||
}
|
||||
// We update irrespective of the nominatedNodeName changed or not, to ensure
|
||||
// that pod pointer is updated.
|
||||
npm.deleteUnlocked(oldPod)
|
||||
npm.addNominatedPodUnlocked(logger, newPodInfo, nominatingInfo)
|
||||
}
|
||||
|
||||
// DeleteNominatedPodIfExists deletes <pod> from nominatedPods.
|
||||
func (npm *nominator) DeleteNominatedPodIfExists(pod *v1.Pod) {
|
||||
npm.nLock.Lock()
|
||||
npm.deleteUnlocked(pod)
|
||||
npm.nLock.Unlock()
|
||||
}
|
||||
|
||||
func (npm *nominator) deleteUnlocked(p *v1.Pod) {
|
||||
nnn, ok := npm.nominatedPodToNode[p.UID]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
for i, np := range npm.nominatedPods[nnn] {
|
||||
if np.uid == p.UID {
|
||||
npm.nominatedPods[nnn] = append(npm.nominatedPods[nnn][:i], npm.nominatedPods[nnn][i+1:]...)
|
||||
if len(npm.nominatedPods[nnn]) == 0 {
|
||||
delete(npm.nominatedPods, nnn)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
delete(npm.nominatedPodToNode, p.UID)
|
||||
}
|
||||
|
||||
func (npm *nominator) nominatedPodsForNode(nodeName string) []podRef {
|
||||
npm.nLock.RLock()
|
||||
defer npm.nLock.RUnlock()
|
||||
return slices.Clone(npm.nominatedPods[nodeName])
|
||||
}
|
||||
|
||||
// nominatedNodeName returns nominated node name of a Pod.
|
||||
func nominatedNodeName(pod *v1.Pod) string {
|
||||
return pod.Status.NominatedNodeName
|
||||
}
|
||||
|
||||
type podRef struct {
|
||||
name string
|
||||
namespace string
|
||||
uid types.UID
|
||||
}
|
||||
|
||||
func podToRef(pod *v1.Pod) podRef {
|
||||
return podRef{
|
||||
name: pod.Name,
|
||||
namespace: pod.Namespace,
|
||||
uid: pod.UID,
|
||||
}
|
||||
}
|
||||
|
||||
func (np podRef) toPod() *v1.Pod {
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: np.name,
|
||||
Namespace: np.namespace,
|
||||
UID: np.uid,
|
||||
},
|
||||
}
|
||||
}
|
1397
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/scheduling_queue.go
generated
vendored
Normal file
1397
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/scheduling_queue.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
63
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/testing.go
generated
vendored
Normal file
63
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/testing.go
generated
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
)
|
||||
|
||||
// NewTestQueue creates a priority queue with an empty informer factory.
|
||||
func NewTestQueue(ctx context.Context, lessFn framework.LessFunc, opts ...Option) *PriorityQueue {
|
||||
return NewTestQueueWithObjects(ctx, lessFn, nil, opts...)
|
||||
}
|
||||
|
||||
// NewTestQueueWithObjects creates a priority queue with an informer factory
|
||||
// populated with the provided objects.
|
||||
func NewTestQueueWithObjects(
|
||||
ctx context.Context,
|
||||
lessFn framework.LessFunc,
|
||||
objs []runtime.Object,
|
||||
opts ...Option,
|
||||
) *PriorityQueue {
|
||||
informerFactory := informers.NewSharedInformerFactory(fake.NewClientset(objs...), 0)
|
||||
|
||||
// Because some major functions (e.g., Pop) requires the metric recorder to be set,
|
||||
// we always set a metric recorder here.
|
||||
recorder := metrics.NewMetricsAsyncRecorder(10, 20*time.Microsecond, ctx.Done())
|
||||
// We set it before the options that users provide, so that users can override it.
|
||||
opts = append([]Option{WithMetricsRecorder(*recorder)}, opts...)
|
||||
return NewTestQueueWithInformerFactory(ctx, lessFn, informerFactory, opts...)
|
||||
}
|
||||
|
||||
func NewTestQueueWithInformerFactory(
|
||||
ctx context.Context,
|
||||
lessFn framework.LessFunc,
|
||||
informerFactory informers.SharedInformerFactory,
|
||||
opts ...Option,
|
||||
) *PriorityQueue {
|
||||
pq := NewPriorityQueue(lessFn, informerFactory, opts...)
|
||||
informerFactory.Start(ctx.Done())
|
||||
informerFactory.WaitForCacheSync(ctx.Done())
|
||||
return pq
|
||||
}
|
665
vendor/k8s.io/kubernetes/pkg/scheduler/eventhandlers.go
generated
vendored
Normal file
665
vendor/k8s.io/kubernetes/pkg/scheduler/eventhandlers.go
generated
vendored
Normal file
@ -0,0 +1,665 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
storagev1 "k8s.io/api/storage/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/dynamic/dynamicinformer"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
corev1helpers "k8s.io/component-helpers/scheduling/corev1"
|
||||
corev1nodeaffinity "k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/queue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
"k8s.io/kubernetes/pkg/scheduler/profile"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util/assumecache"
|
||||
)
|
||||
|
||||
func (sched *Scheduler) addNodeToCache(obj interface{}) {
|
||||
evt := framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add}
|
||||
start := time.Now()
|
||||
defer metrics.EventHandlingLatency.WithLabelValues(evt.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
logger := sched.logger
|
||||
node, ok := obj.(*v1.Node)
|
||||
if !ok {
|
||||
logger.Error(nil, "Cannot convert to *v1.Node", "obj", obj)
|
||||
return
|
||||
}
|
||||
|
||||
logger.V(3).Info("Add event for node", "node", klog.KObj(node))
|
||||
nodeInfo := sched.Cache.AddNode(logger, node)
|
||||
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, evt, nil, node, preCheckForNode(nodeInfo))
|
||||
}
|
||||
|
||||
func (sched *Scheduler) updateNodeInCache(oldObj, newObj interface{}) {
|
||||
start := time.Now()
|
||||
logger := sched.logger
|
||||
oldNode, ok := oldObj.(*v1.Node)
|
||||
if !ok {
|
||||
logger.Error(nil, "Cannot convert oldObj to *v1.Node", "oldObj", oldObj)
|
||||
return
|
||||
}
|
||||
newNode, ok := newObj.(*v1.Node)
|
||||
if !ok {
|
||||
logger.Error(nil, "Cannot convert newObj to *v1.Node", "newObj", newObj)
|
||||
return
|
||||
}
|
||||
|
||||
logger.V(4).Info("Update event for node", "node", klog.KObj(newNode))
|
||||
nodeInfo := sched.Cache.UpdateNode(logger, oldNode, newNode)
|
||||
events := framework.NodeSchedulingPropertiesChange(newNode, oldNode)
|
||||
|
||||
// Save the time it takes to update the node in the cache.
|
||||
updatingDuration := metrics.SinceInSeconds(start)
|
||||
|
||||
// Only requeue unschedulable pods if the node became more schedulable.
|
||||
for _, evt := range events {
|
||||
startMoving := time.Now()
|
||||
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, evt, oldNode, newNode, preCheckForNode(nodeInfo))
|
||||
movingDuration := metrics.SinceInSeconds(startMoving)
|
||||
|
||||
metrics.EventHandlingLatency.WithLabelValues(evt.Label()).Observe(updatingDuration + movingDuration)
|
||||
}
|
||||
}
|
||||
|
||||
func (sched *Scheduler) deleteNodeFromCache(obj interface{}) {
|
||||
evt := framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Delete}
|
||||
start := time.Now()
|
||||
defer metrics.EventHandlingLatency.WithLabelValues(evt.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
|
||||
logger := sched.logger
|
||||
var node *v1.Node
|
||||
switch t := obj.(type) {
|
||||
case *v1.Node:
|
||||
node = t
|
||||
case cache.DeletedFinalStateUnknown:
|
||||
var ok bool
|
||||
node, ok = t.Obj.(*v1.Node)
|
||||
if !ok {
|
||||
logger.Error(nil, "Cannot convert to *v1.Node", "obj", t.Obj)
|
||||
return
|
||||
}
|
||||
default:
|
||||
logger.Error(nil, "Cannot convert to *v1.Node", "obj", t)
|
||||
return
|
||||
}
|
||||
|
||||
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, evt, node, nil, nil)
|
||||
|
||||
logger.V(3).Info("Delete event for node", "node", klog.KObj(node))
|
||||
if err := sched.Cache.RemoveNode(logger, node); err != nil {
|
||||
logger.Error(err, "Scheduler cache RemoveNode failed")
|
||||
}
|
||||
}
|
||||
|
||||
func (sched *Scheduler) addPodToSchedulingQueue(obj interface{}) {
|
||||
start := time.Now()
|
||||
defer metrics.EventHandlingLatency.WithLabelValues(framework.EventUnscheduledPodAdd.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
|
||||
logger := sched.logger
|
||||
pod := obj.(*v1.Pod)
|
||||
logger.V(3).Info("Add event for unscheduled pod", "pod", klog.KObj(pod))
|
||||
sched.SchedulingQueue.Add(logger, pod)
|
||||
}
|
||||
|
||||
func (sched *Scheduler) updatePodInSchedulingQueue(oldObj, newObj interface{}) {
|
||||
start := time.Now()
|
||||
logger := sched.logger
|
||||
oldPod, newPod := oldObj.(*v1.Pod), newObj.(*v1.Pod)
|
||||
// Bypass update event that carries identical objects; otherwise, a duplicated
|
||||
// Pod may go through scheduling and cause unexpected behavior (see #96071).
|
||||
if oldPod.ResourceVersion == newPod.ResourceVersion {
|
||||
return
|
||||
}
|
||||
|
||||
defer metrics.EventHandlingLatency.WithLabelValues(framework.EventUnscheduledPodUpdate.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
for _, evt := range framework.PodSchedulingPropertiesChange(newPod, oldPod) {
|
||||
if evt.Label() != framework.EventUnscheduledPodUpdate.Label() {
|
||||
defer metrics.EventHandlingLatency.WithLabelValues(evt.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
}
|
||||
}
|
||||
|
||||
isAssumed, err := sched.Cache.IsAssumedPod(newPod)
|
||||
if err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("failed to check whether pod %s/%s is assumed: %v", newPod.Namespace, newPod.Name, err))
|
||||
}
|
||||
if isAssumed {
|
||||
return
|
||||
}
|
||||
|
||||
logger.V(4).Info("Update event for unscheduled pod", "pod", klog.KObj(newPod))
|
||||
sched.SchedulingQueue.Update(logger, oldPod, newPod)
|
||||
}
|
||||
|
||||
func (sched *Scheduler) deletePodFromSchedulingQueue(obj interface{}) {
|
||||
start := time.Now()
|
||||
defer metrics.EventHandlingLatency.WithLabelValues(framework.EventUnscheduledPodDelete.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
|
||||
logger := sched.logger
|
||||
var pod *v1.Pod
|
||||
switch t := obj.(type) {
|
||||
case *v1.Pod:
|
||||
pod = obj.(*v1.Pod)
|
||||
case cache.DeletedFinalStateUnknown:
|
||||
var ok bool
|
||||
pod, ok = t.Obj.(*v1.Pod)
|
||||
if !ok {
|
||||
utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched))
|
||||
return
|
||||
}
|
||||
default:
|
||||
utilruntime.HandleError(fmt.Errorf("unable to handle object in %T: %T", sched, obj))
|
||||
return
|
||||
}
|
||||
|
||||
logger.V(3).Info("Delete event for unscheduled pod", "pod", klog.KObj(pod))
|
||||
sched.SchedulingQueue.Delete(pod)
|
||||
fwk, err := sched.frameworkForPod(pod)
|
||||
if err != nil {
|
||||
// This shouldn't happen, because we only accept for scheduling the pods
|
||||
// which specify a scheduler name that matches one of the profiles.
|
||||
logger.Error(err, "Unable to get profile", "pod", klog.KObj(pod))
|
||||
return
|
||||
}
|
||||
// If a waiting pod is rejected, it indicates it's previously assumed and we're
|
||||
// removing it from the scheduler cache. In this case, signal a AssignedPodDelete
|
||||
// event to immediately retry some unscheduled Pods.
|
||||
if fwk.RejectWaitingPod(pod.UID) {
|
||||
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, framework.EventAssignedPodDelete, pod, nil, nil)
|
||||
}
|
||||
}
|
||||
|
||||
func (sched *Scheduler) addPodToCache(obj interface{}) {
|
||||
start := time.Now()
|
||||
defer metrics.EventHandlingLatency.WithLabelValues(framework.EventAssignedPodAdd.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
|
||||
logger := sched.logger
|
||||
pod, ok := obj.(*v1.Pod)
|
||||
if !ok {
|
||||
logger.Error(nil, "Cannot convert to *v1.Pod", "obj", obj)
|
||||
return
|
||||
}
|
||||
|
||||
logger.V(3).Info("Add event for scheduled pod", "pod", klog.KObj(pod))
|
||||
if err := sched.Cache.AddPod(logger, pod); err != nil {
|
||||
logger.Error(err, "Scheduler cache AddPod failed", "pod", klog.KObj(pod))
|
||||
}
|
||||
|
||||
// SchedulingQueue.AssignedPodAdded has a problem:
|
||||
// It internally pre-filters Pods to move to activeQ,
|
||||
// while taking only in-tree plugins into consideration.
|
||||
// Consequently, if custom plugins that subscribes Pod/Add events reject Pods,
|
||||
// those Pods will never be requeued to activeQ by an assigned Pod related events,
|
||||
// and they may be stuck in unschedulableQ.
|
||||
//
|
||||
// Here we use MoveAllToActiveOrBackoffQueue only when QueueingHint is enabled.
|
||||
// (We cannot switch to MoveAllToActiveOrBackoffQueue right away because of throughput concern.)
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) {
|
||||
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, framework.EventAssignedPodAdd, nil, pod, nil)
|
||||
} else {
|
||||
sched.SchedulingQueue.AssignedPodAdded(logger, pod)
|
||||
}
|
||||
}
|
||||
|
||||
func (sched *Scheduler) updatePodInCache(oldObj, newObj interface{}) {
|
||||
start := time.Now()
|
||||
defer metrics.EventHandlingLatency.WithLabelValues(framework.EventAssignedPodUpdate.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
|
||||
logger := sched.logger
|
||||
oldPod, ok := oldObj.(*v1.Pod)
|
||||
if !ok {
|
||||
logger.Error(nil, "Cannot convert oldObj to *v1.Pod", "oldObj", oldObj)
|
||||
return
|
||||
}
|
||||
newPod, ok := newObj.(*v1.Pod)
|
||||
if !ok {
|
||||
logger.Error(nil, "Cannot convert newObj to *v1.Pod", "newObj", newObj)
|
||||
return
|
||||
}
|
||||
|
||||
logger.V(4).Info("Update event for scheduled pod", "pod", klog.KObj(oldPod))
|
||||
if err := sched.Cache.UpdatePod(logger, oldPod, newPod); err != nil {
|
||||
logger.Error(err, "Scheduler cache UpdatePod failed", "pod", klog.KObj(oldPod))
|
||||
}
|
||||
|
||||
events := framework.PodSchedulingPropertiesChange(newPod, oldPod)
|
||||
|
||||
// Save the time it takes to update the pod in the cache.
|
||||
updatingDuration := metrics.SinceInSeconds(start)
|
||||
|
||||
for _, evt := range events {
|
||||
startMoving := time.Now()
|
||||
// SchedulingQueue.AssignedPodUpdated has a problem:
|
||||
// It internally pre-filters Pods to move to activeQ,
|
||||
// while taking only in-tree plugins into consideration.
|
||||
// Consequently, if custom plugins that subscribes Pod/Update events reject Pods,
|
||||
// those Pods will never be requeued to activeQ by an assigned Pod related events,
|
||||
// and they may be stuck in unschedulableQ.
|
||||
//
|
||||
// Here we use MoveAllToActiveOrBackoffQueue only when QueueingHint is enabled.
|
||||
// (We cannot switch to MoveAllToActiveOrBackoffQueue right away because of throughput concern.)
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) {
|
||||
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, evt, oldPod, newPod, nil)
|
||||
} else {
|
||||
sched.SchedulingQueue.AssignedPodUpdated(logger, oldPod, newPod, evt)
|
||||
}
|
||||
movingDuration := metrics.SinceInSeconds(startMoving)
|
||||
metrics.EventHandlingLatency.WithLabelValues(evt.Label()).Observe(updatingDuration + movingDuration)
|
||||
}
|
||||
}
|
||||
|
||||
func (sched *Scheduler) deletePodFromCache(obj interface{}) {
|
||||
start := time.Now()
|
||||
defer metrics.EventHandlingLatency.WithLabelValues(framework.EventAssignedPodDelete.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
|
||||
logger := sched.logger
|
||||
var pod *v1.Pod
|
||||
switch t := obj.(type) {
|
||||
case *v1.Pod:
|
||||
pod = t
|
||||
case cache.DeletedFinalStateUnknown:
|
||||
var ok bool
|
||||
pod, ok = t.Obj.(*v1.Pod)
|
||||
if !ok {
|
||||
logger.Error(nil, "Cannot convert to *v1.Pod", "obj", t.Obj)
|
||||
return
|
||||
}
|
||||
default:
|
||||
logger.Error(nil, "Cannot convert to *v1.Pod", "obj", t)
|
||||
return
|
||||
}
|
||||
|
||||
logger.V(3).Info("Delete event for scheduled pod", "pod", klog.KObj(pod))
|
||||
if err := sched.Cache.RemovePod(logger, pod); err != nil {
|
||||
logger.Error(err, "Scheduler cache RemovePod failed", "pod", klog.KObj(pod))
|
||||
}
|
||||
|
||||
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, framework.EventAssignedPodDelete, pod, nil, nil)
|
||||
}
|
||||
|
||||
// assignedPod selects pods that are assigned (scheduled and running).
|
||||
func assignedPod(pod *v1.Pod) bool {
|
||||
return len(pod.Spec.NodeName) != 0
|
||||
}
|
||||
|
||||
// responsibleForPod returns true if the pod has asked to be scheduled by the given scheduler.
|
||||
func responsibleForPod(pod *v1.Pod, profiles profile.Map) bool {
|
||||
return profiles.HandlesSchedulerName(pod.Spec.SchedulerName)
|
||||
}
|
||||
|
||||
const (
|
||||
// syncedPollPeriod controls how often you look at the status of your sync funcs
|
||||
syncedPollPeriod = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
// WaitForHandlersSync waits for EventHandlers to sync.
|
||||
// It returns true if it was successful, false if the controller should shut down
|
||||
func (sched *Scheduler) WaitForHandlersSync(ctx context.Context) error {
|
||||
return wait.PollUntilContextCancel(ctx, syncedPollPeriod, true, func(ctx context.Context) (done bool, err error) {
|
||||
for _, handler := range sched.registeredHandlers {
|
||||
if !handler.HasSynced() {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
}
|
||||
|
||||
// addAllEventHandlers is a helper function used in tests and in Scheduler
|
||||
// to add event handlers for various informers.
|
||||
func addAllEventHandlers(
|
||||
sched *Scheduler,
|
||||
informerFactory informers.SharedInformerFactory,
|
||||
dynInformerFactory dynamicinformer.DynamicSharedInformerFactory,
|
||||
resourceClaimCache *assumecache.AssumeCache,
|
||||
gvkMap map[framework.EventResource]framework.ActionType,
|
||||
) error {
|
||||
var (
|
||||
handlerRegistration cache.ResourceEventHandlerRegistration
|
||||
err error
|
||||
handlers []cache.ResourceEventHandlerRegistration
|
||||
)
|
||||
// scheduled pod cache
|
||||
if handlerRegistration, err = informerFactory.Core().V1().Pods().Informer().AddEventHandler(
|
||||
cache.FilteringResourceEventHandler{
|
||||
FilterFunc: func(obj interface{}) bool {
|
||||
switch t := obj.(type) {
|
||||
case *v1.Pod:
|
||||
return assignedPod(t)
|
||||
case cache.DeletedFinalStateUnknown:
|
||||
if _, ok := t.Obj.(*v1.Pod); ok {
|
||||
// The carried object may be stale, so we don't use it to check if
|
||||
// it's assigned or not. Attempting to cleanup anyways.
|
||||
return true
|
||||
}
|
||||
utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched))
|
||||
return false
|
||||
default:
|
||||
utilruntime.HandleError(fmt.Errorf("unable to handle object in %T: %T", sched, obj))
|
||||
return false
|
||||
}
|
||||
},
|
||||
Handler: cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: sched.addPodToCache,
|
||||
UpdateFunc: sched.updatePodInCache,
|
||||
DeleteFunc: sched.deletePodFromCache,
|
||||
},
|
||||
},
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
|
||||
// unscheduled pod queue
|
||||
if handlerRegistration, err = informerFactory.Core().V1().Pods().Informer().AddEventHandler(
|
||||
cache.FilteringResourceEventHandler{
|
||||
FilterFunc: func(obj interface{}) bool {
|
||||
switch t := obj.(type) {
|
||||
case *v1.Pod:
|
||||
return !assignedPod(t) && responsibleForPod(t, sched.Profiles)
|
||||
case cache.DeletedFinalStateUnknown:
|
||||
if pod, ok := t.Obj.(*v1.Pod); ok {
|
||||
// The carried object may be stale, so we don't use it to check if
|
||||
// it's assigned or not.
|
||||
return responsibleForPod(pod, sched.Profiles)
|
||||
}
|
||||
utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched))
|
||||
return false
|
||||
default:
|
||||
utilruntime.HandleError(fmt.Errorf("unable to handle object in %T: %T", sched, obj))
|
||||
return false
|
||||
}
|
||||
},
|
||||
Handler: cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: sched.addPodToSchedulingQueue,
|
||||
UpdateFunc: sched.updatePodInSchedulingQueue,
|
||||
DeleteFunc: sched.deletePodFromSchedulingQueue,
|
||||
},
|
||||
},
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
|
||||
if handlerRegistration, err = informerFactory.Core().V1().Nodes().Informer().AddEventHandler(
|
||||
cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: sched.addNodeToCache,
|
||||
UpdateFunc: sched.updateNodeInCache,
|
||||
DeleteFunc: sched.deleteNodeFromCache,
|
||||
},
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
|
||||
logger := sched.logger
|
||||
buildEvtResHandler := func(at framework.ActionType, resource framework.EventResource) cache.ResourceEventHandlerFuncs {
|
||||
funcs := cache.ResourceEventHandlerFuncs{}
|
||||
if at&framework.Add != 0 {
|
||||
evt := framework.ClusterEvent{Resource: resource, ActionType: framework.Add}
|
||||
funcs.AddFunc = func(obj interface{}) {
|
||||
start := time.Now()
|
||||
defer metrics.EventHandlingLatency.WithLabelValues(evt.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
if resource == framework.StorageClass && !utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) {
|
||||
sc, ok := obj.(*storagev1.StorageClass)
|
||||
if !ok {
|
||||
logger.Error(nil, "Cannot convert to *storagev1.StorageClass", "obj", obj)
|
||||
return
|
||||
}
|
||||
|
||||
// CheckVolumeBindingPred fails if pod has unbound immediate PVCs. If these
|
||||
// PVCs have specified StorageClass name, creating StorageClass objects
|
||||
// with late binding will cause predicates to pass, so we need to move pods
|
||||
// to active queue.
|
||||
// We don't need to invalidate cached results because results will not be
|
||||
// cached for pod that has unbound immediate PVCs.
|
||||
if sc.VolumeBindingMode == nil || *sc.VolumeBindingMode != storagev1.VolumeBindingWaitForFirstConsumer {
|
||||
return
|
||||
}
|
||||
}
|
||||
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, evt, nil, obj, nil)
|
||||
}
|
||||
}
|
||||
if at&framework.Update != 0 {
|
||||
evt := framework.ClusterEvent{Resource: resource, ActionType: framework.Update}
|
||||
funcs.UpdateFunc = func(old, obj interface{}) {
|
||||
start := time.Now()
|
||||
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, evt, old, obj, nil)
|
||||
metrics.EventHandlingLatency.WithLabelValues(evt.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
}
|
||||
}
|
||||
if at&framework.Delete != 0 {
|
||||
evt := framework.ClusterEvent{Resource: resource, ActionType: framework.Delete}
|
||||
funcs.DeleteFunc = func(obj interface{}) {
|
||||
start := time.Now()
|
||||
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(logger, evt, obj, nil, nil)
|
||||
metrics.EventHandlingLatency.WithLabelValues(evt.Label()).Observe(metrics.SinceInSeconds(start))
|
||||
}
|
||||
}
|
||||
return funcs
|
||||
}
|
||||
|
||||
for gvk, at := range gvkMap {
|
||||
switch gvk {
|
||||
case framework.Node, framework.Pod:
|
||||
// Do nothing.
|
||||
case framework.CSINode:
|
||||
if handlerRegistration, err = informerFactory.Storage().V1().CSINodes().Informer().AddEventHandler(
|
||||
buildEvtResHandler(at, framework.CSINode),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
case framework.CSIDriver:
|
||||
if handlerRegistration, err = informerFactory.Storage().V1().CSIDrivers().Informer().AddEventHandler(
|
||||
buildEvtResHandler(at, framework.CSIDriver),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
case framework.CSIStorageCapacity:
|
||||
if handlerRegistration, err = informerFactory.Storage().V1().CSIStorageCapacities().Informer().AddEventHandler(
|
||||
buildEvtResHandler(at, framework.CSIStorageCapacity),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
case framework.PersistentVolume:
|
||||
// MaxPDVolumeCountPredicate: since it relies on the counts of PV.
|
||||
//
|
||||
// PvAdd: Pods created when there are no PVs available will be stuck in
|
||||
// unschedulable queue. But unbound PVs created for static provisioning and
|
||||
// delay binding storage class are skipped in PV controller dynamic
|
||||
// provisioning and binding process, will not trigger events to schedule pod
|
||||
// again. So we need to move pods to active queue on PV add for this
|
||||
// scenario.
|
||||
//
|
||||
// PvUpdate: Scheduler.bindVolumesWorker may fail to update assumed pod volume
|
||||
// bindings due to conflicts if PVs are updated by PV controller or other
|
||||
// parties, then scheduler will add pod back to unschedulable queue. We
|
||||
// need to move pods to active queue on PV update for this scenario.
|
||||
if handlerRegistration, err = informerFactory.Core().V1().PersistentVolumes().Informer().AddEventHandler(
|
||||
buildEvtResHandler(at, framework.PersistentVolume),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
case framework.PersistentVolumeClaim:
|
||||
// MaxPDVolumeCountPredicate: add/update PVC will affect counts of PV when it is bound.
|
||||
if handlerRegistration, err = informerFactory.Core().V1().PersistentVolumeClaims().Informer().AddEventHandler(
|
||||
buildEvtResHandler(at, framework.PersistentVolumeClaim),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
case framework.ResourceClaim:
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
|
||||
handlerRegistration = resourceClaimCache.AddEventHandler(
|
||||
buildEvtResHandler(at, framework.ResourceClaim),
|
||||
)
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
}
|
||||
case framework.ResourceSlice:
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
|
||||
if handlerRegistration, err = informerFactory.Resource().V1beta1().ResourceSlices().Informer().AddEventHandler(
|
||||
buildEvtResHandler(at, framework.ResourceSlice),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
}
|
||||
case framework.DeviceClass:
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
|
||||
if handlerRegistration, err = informerFactory.Resource().V1beta1().DeviceClasses().Informer().AddEventHandler(
|
||||
buildEvtResHandler(at, framework.DeviceClass),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
}
|
||||
case framework.StorageClass:
|
||||
if handlerRegistration, err = informerFactory.Storage().V1().StorageClasses().Informer().AddEventHandler(
|
||||
buildEvtResHandler(at, framework.StorageClass),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
case framework.VolumeAttachment:
|
||||
if handlerRegistration, err = informerFactory.Storage().V1().VolumeAttachments().Informer().AddEventHandler(
|
||||
buildEvtResHandler(at, framework.VolumeAttachment),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
default:
|
||||
// Tests may not instantiate dynInformerFactory.
|
||||
if dynInformerFactory == nil {
|
||||
continue
|
||||
}
|
||||
// GVK is expected to be at least 3-folded, separated by dots.
|
||||
// <kind in plural>.<version>.<group>
|
||||
// Valid examples:
|
||||
// - foos.v1.example.com
|
||||
// - bars.v1beta1.a.b.c
|
||||
// Invalid examples:
|
||||
// - foos.v1 (2 sections)
|
||||
// - foo.v1.example.com (the first section should be plural)
|
||||
if strings.Count(string(gvk), ".") < 2 {
|
||||
logger.Error(nil, "incorrect event registration", "gvk", gvk)
|
||||
continue
|
||||
}
|
||||
// Fall back to try dynamic informers.
|
||||
gvr, _ := schema.ParseResourceArg(string(gvk))
|
||||
dynInformer := dynInformerFactory.ForResource(*gvr).Informer()
|
||||
if handlerRegistration, err = dynInformer.AddEventHandler(
|
||||
buildEvtResHandler(at, gvk),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
}
|
||||
}
|
||||
sched.registeredHandlers = handlers
|
||||
return nil
|
||||
}
|
||||
|
||||
func preCheckForNode(nodeInfo *framework.NodeInfo) queue.PreEnqueueCheck {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) {
|
||||
// QHint is initially created from the motivation of replacing this preCheck.
|
||||
// It assumes that the scheduler only has in-tree plugins, which is problematic for our extensibility.
|
||||
// Here, we skip preCheck if QHint is enabled, and we eventually remove it after QHint is graduated.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Note: the following checks doesn't take preemption into considerations, in very rare
|
||||
// cases (e.g., node resizing), "pod" may still fail a check but preemption helps. We deliberately
|
||||
// chose to ignore those cases as unschedulable pods will be re-queued eventually.
|
||||
return func(pod *v1.Pod) bool {
|
||||
admissionResults := AdmissionCheck(pod, nodeInfo, false)
|
||||
if len(admissionResults) != 0 {
|
||||
return false
|
||||
}
|
||||
_, isUntolerated := corev1helpers.FindMatchingUntoleratedTaint(nodeInfo.Node().Spec.Taints, pod.Spec.Tolerations, func(t *v1.Taint) bool {
|
||||
return t.Effect == v1.TaintEffectNoSchedule
|
||||
})
|
||||
return !isUntolerated
|
||||
}
|
||||
}
|
||||
|
||||
// AdmissionCheck calls the filtering logic of noderesources/nodeport/nodeAffinity/nodename
|
||||
// and returns the failure reasons. It's used in kubelet(pkg/kubelet/lifecycle/predicate.go) and scheduler.
|
||||
// It returns the first failure if `includeAllFailures` is set to false; otherwise
|
||||
// returns all failures.
|
||||
func AdmissionCheck(pod *v1.Pod, nodeInfo *framework.NodeInfo, includeAllFailures bool) []AdmissionResult {
|
||||
var admissionResults []AdmissionResult
|
||||
insufficientResources := noderesources.Fits(pod, nodeInfo, noderesources.ResourceRequestsOptions{
|
||||
EnablePodLevelResources: utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources),
|
||||
})
|
||||
if len(insufficientResources) != 0 {
|
||||
for i := range insufficientResources {
|
||||
admissionResults = append(admissionResults, AdmissionResult{InsufficientResource: &insufficientResources[i]})
|
||||
}
|
||||
if !includeAllFailures {
|
||||
return admissionResults
|
||||
}
|
||||
}
|
||||
|
||||
if matches, _ := corev1nodeaffinity.GetRequiredNodeAffinity(pod).Match(nodeInfo.Node()); !matches {
|
||||
admissionResults = append(admissionResults, AdmissionResult{Name: nodeaffinity.Name, Reason: nodeaffinity.ErrReasonPod})
|
||||
if !includeAllFailures {
|
||||
return admissionResults
|
||||
}
|
||||
}
|
||||
if !nodename.Fits(pod, nodeInfo) {
|
||||
admissionResults = append(admissionResults, AdmissionResult{Name: nodename.Name, Reason: nodename.ErrReason})
|
||||
if !includeAllFailures {
|
||||
return admissionResults
|
||||
}
|
||||
}
|
||||
if !nodeports.Fits(pod, nodeInfo) {
|
||||
admissionResults = append(admissionResults, AdmissionResult{Name: nodeports.Name, Reason: nodeports.ErrReason})
|
||||
if !includeAllFailures {
|
||||
return admissionResults
|
||||
}
|
||||
}
|
||||
return admissionResults
|
||||
}
|
||||
|
||||
// AdmissionResult describes the reason why Scheduler can't admit the pod.
|
||||
// If the reason is a resource fit one, then AdmissionResult.InsufficientResource includes the details.
|
||||
type AdmissionResult struct {
|
||||
Name string
|
||||
Reason string
|
||||
InsufficientResource *noderesources.InsufficientResource
|
||||
}
|
456
vendor/k8s.io/kubernetes/pkg/scheduler/extender.go
generated
vendored
Normal file
456
vendor/k8s.io/kubernetes/pkg/scheduler/extender.go
generated
vendored
Normal file
@ -0,0 +1,456 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
utilnet "k8s.io/apimachinery/pkg/util/net"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
extenderv1 "k8s.io/kube-scheduler/extender/v1"
|
||||
schedulerapi "k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultExtenderTimeout defines the default extender timeout in second.
|
||||
DefaultExtenderTimeout = 5 * time.Second
|
||||
)
|
||||
|
||||
// HTTPExtender implements the Extender interface.
|
||||
type HTTPExtender struct {
|
||||
extenderURL string
|
||||
preemptVerb string
|
||||
filterVerb string
|
||||
prioritizeVerb string
|
||||
bindVerb string
|
||||
weight int64
|
||||
client *http.Client
|
||||
nodeCacheCapable bool
|
||||
managedResources sets.Set[string]
|
||||
ignorable bool
|
||||
}
|
||||
|
||||
func makeTransport(config *schedulerapi.Extender) (http.RoundTripper, error) {
|
||||
var cfg restclient.Config
|
||||
if config.TLSConfig != nil {
|
||||
cfg.TLSClientConfig.Insecure = config.TLSConfig.Insecure
|
||||
cfg.TLSClientConfig.ServerName = config.TLSConfig.ServerName
|
||||
cfg.TLSClientConfig.CertFile = config.TLSConfig.CertFile
|
||||
cfg.TLSClientConfig.KeyFile = config.TLSConfig.KeyFile
|
||||
cfg.TLSClientConfig.CAFile = config.TLSConfig.CAFile
|
||||
cfg.TLSClientConfig.CertData = config.TLSConfig.CertData
|
||||
cfg.TLSClientConfig.KeyData = config.TLSConfig.KeyData
|
||||
cfg.TLSClientConfig.CAData = config.TLSConfig.CAData
|
||||
}
|
||||
if config.EnableHTTPS {
|
||||
hasCA := len(cfg.CAFile) > 0 || len(cfg.CAData) > 0
|
||||
if !hasCA {
|
||||
cfg.Insecure = true
|
||||
}
|
||||
}
|
||||
tlsConfig, err := restclient.TLSConfigFor(&cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if tlsConfig != nil {
|
||||
return utilnet.SetTransportDefaults(&http.Transport{
|
||||
TLSClientConfig: tlsConfig,
|
||||
}), nil
|
||||
}
|
||||
return utilnet.SetTransportDefaults(&http.Transport{}), nil
|
||||
}
|
||||
|
||||
// NewHTTPExtender creates an HTTPExtender object.
|
||||
func NewHTTPExtender(config *schedulerapi.Extender) (framework.Extender, error) {
|
||||
if config.HTTPTimeout.Duration.Nanoseconds() == 0 {
|
||||
config.HTTPTimeout.Duration = time.Duration(DefaultExtenderTimeout)
|
||||
}
|
||||
|
||||
transport, err := makeTransport(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
client := &http.Client{
|
||||
Transport: transport,
|
||||
Timeout: config.HTTPTimeout.Duration,
|
||||
}
|
||||
managedResources := sets.New[string]()
|
||||
for _, r := range config.ManagedResources {
|
||||
managedResources.Insert(string(r.Name))
|
||||
}
|
||||
return &HTTPExtender{
|
||||
extenderURL: config.URLPrefix,
|
||||
preemptVerb: config.PreemptVerb,
|
||||
filterVerb: config.FilterVerb,
|
||||
prioritizeVerb: config.PrioritizeVerb,
|
||||
bindVerb: config.BindVerb,
|
||||
weight: config.Weight,
|
||||
client: client,
|
||||
nodeCacheCapable: config.NodeCacheCapable,
|
||||
managedResources: managedResources,
|
||||
ignorable: config.Ignorable,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Name returns extenderURL to identify the extender.
|
||||
func (h *HTTPExtender) Name() string {
|
||||
return h.extenderURL
|
||||
}
|
||||
|
||||
// IsIgnorable returns true indicates scheduling should not fail when this extender
|
||||
// is unavailable
|
||||
func (h *HTTPExtender) IsIgnorable() bool {
|
||||
return h.ignorable
|
||||
}
|
||||
|
||||
// SupportsPreemption returns true if an extender supports preemption.
|
||||
// An extender should have preempt verb defined and enabled its own node cache.
|
||||
func (h *HTTPExtender) SupportsPreemption() bool {
|
||||
return len(h.preemptVerb) > 0
|
||||
}
|
||||
|
||||
// ProcessPreemption returns filtered candidate nodes and victims after running preemption logic in extender.
|
||||
func (h *HTTPExtender) ProcessPreemption(
|
||||
pod *v1.Pod,
|
||||
nodeNameToVictims map[string]*extenderv1.Victims,
|
||||
nodeInfos framework.NodeInfoLister,
|
||||
) (map[string]*extenderv1.Victims, error) {
|
||||
var (
|
||||
result extenderv1.ExtenderPreemptionResult
|
||||
args *extenderv1.ExtenderPreemptionArgs
|
||||
)
|
||||
|
||||
if !h.SupportsPreemption() {
|
||||
return nil, fmt.Errorf("preempt verb is not defined for extender %v but run into ProcessPreemption", h.extenderURL)
|
||||
}
|
||||
|
||||
if h.nodeCacheCapable {
|
||||
// If extender has cached node info, pass NodeNameToMetaVictims in args.
|
||||
nodeNameToMetaVictims := convertToMetaVictims(nodeNameToVictims)
|
||||
args = &extenderv1.ExtenderPreemptionArgs{
|
||||
Pod: pod,
|
||||
NodeNameToMetaVictims: nodeNameToMetaVictims,
|
||||
}
|
||||
} else {
|
||||
args = &extenderv1.ExtenderPreemptionArgs{
|
||||
Pod: pod,
|
||||
NodeNameToVictims: nodeNameToVictims,
|
||||
}
|
||||
}
|
||||
|
||||
if err := h.send(h.preemptVerb, args, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Extender will always return NodeNameToMetaVictims.
|
||||
// So let's convert it to NodeNameToVictims by using <nodeInfos>.
|
||||
newNodeNameToVictims, err := h.convertToVictims(result.NodeNameToMetaVictims, nodeInfos)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Do not override <nodeNameToVictims>.
|
||||
return newNodeNameToVictims, nil
|
||||
}
|
||||
|
||||
// convertToVictims converts "nodeNameToMetaVictims" from object identifiers,
|
||||
// such as UIDs and names, to object pointers.
|
||||
func (h *HTTPExtender) convertToVictims(
|
||||
nodeNameToMetaVictims map[string]*extenderv1.MetaVictims,
|
||||
nodeInfos framework.NodeInfoLister,
|
||||
) (map[string]*extenderv1.Victims, error) {
|
||||
nodeNameToVictims := map[string]*extenderv1.Victims{}
|
||||
for nodeName, metaVictims := range nodeNameToMetaVictims {
|
||||
nodeInfo, err := nodeInfos.Get(nodeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
victims := &extenderv1.Victims{
|
||||
Pods: []*v1.Pod{},
|
||||
NumPDBViolations: metaVictims.NumPDBViolations,
|
||||
}
|
||||
for _, metaPod := range metaVictims.Pods {
|
||||
pod, err := h.convertPodUIDToPod(metaPod, nodeInfo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
victims.Pods = append(victims.Pods, pod)
|
||||
}
|
||||
nodeNameToVictims[nodeName] = victims
|
||||
}
|
||||
return nodeNameToVictims, nil
|
||||
}
|
||||
|
||||
// convertPodUIDToPod returns v1.Pod object for given MetaPod and node info.
|
||||
// The v1.Pod object is restored by nodeInfo.Pods().
|
||||
// It returns an error if there's cache inconsistency between default scheduler
|
||||
// and extender, i.e. when the pod is not found in nodeInfo.Pods.
|
||||
func (h *HTTPExtender) convertPodUIDToPod(
|
||||
metaPod *extenderv1.MetaPod,
|
||||
nodeInfo *framework.NodeInfo) (*v1.Pod, error) {
|
||||
for _, p := range nodeInfo.Pods {
|
||||
if string(p.Pod.UID) == metaPod.UID {
|
||||
return p.Pod, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("extender: %v claims to preempt pod (UID: %v) on node: %v, but the pod is not found on that node",
|
||||
h.extenderURL, metaPod, nodeInfo.Node().Name)
|
||||
}
|
||||
|
||||
// convertToMetaVictims converts from struct type to meta types.
|
||||
func convertToMetaVictims(
|
||||
nodeNameToVictims map[string]*extenderv1.Victims,
|
||||
) map[string]*extenderv1.MetaVictims {
|
||||
nodeNameToMetaVictims := map[string]*extenderv1.MetaVictims{}
|
||||
for node, victims := range nodeNameToVictims {
|
||||
metaVictims := &extenderv1.MetaVictims{
|
||||
Pods: []*extenderv1.MetaPod{},
|
||||
NumPDBViolations: victims.NumPDBViolations,
|
||||
}
|
||||
for _, pod := range victims.Pods {
|
||||
metaPod := &extenderv1.MetaPod{
|
||||
UID: string(pod.UID),
|
||||
}
|
||||
metaVictims.Pods = append(metaVictims.Pods, metaPod)
|
||||
}
|
||||
nodeNameToMetaVictims[node] = metaVictims
|
||||
}
|
||||
return nodeNameToMetaVictims
|
||||
}
|
||||
|
||||
// Filter based on extender implemented predicate functions. The filtered list is
|
||||
// expected to be a subset of the supplied list; otherwise the function returns an error.
|
||||
// The failedNodes and failedAndUnresolvableNodes optionally contains the list
|
||||
// of failed nodes and failure reasons, except nodes in the latter are
|
||||
// unresolvable.
|
||||
func (h *HTTPExtender) Filter(
|
||||
pod *v1.Pod,
|
||||
nodes []*framework.NodeInfo,
|
||||
) (filteredList []*framework.NodeInfo, failedNodes, failedAndUnresolvableNodes extenderv1.FailedNodesMap, err error) {
|
||||
var (
|
||||
result extenderv1.ExtenderFilterResult
|
||||
nodeList *v1.NodeList
|
||||
nodeNames *[]string
|
||||
nodeResult []*framework.NodeInfo
|
||||
args *extenderv1.ExtenderArgs
|
||||
)
|
||||
fromNodeName := make(map[string]*framework.NodeInfo)
|
||||
for _, n := range nodes {
|
||||
fromNodeName[n.Node().Name] = n
|
||||
}
|
||||
|
||||
if h.filterVerb == "" {
|
||||
return nodes, extenderv1.FailedNodesMap{}, extenderv1.FailedNodesMap{}, nil
|
||||
}
|
||||
|
||||
if h.nodeCacheCapable {
|
||||
nodeNameSlice := make([]string, 0, len(nodes))
|
||||
for _, node := range nodes {
|
||||
nodeNameSlice = append(nodeNameSlice, node.Node().Name)
|
||||
}
|
||||
nodeNames = &nodeNameSlice
|
||||
} else {
|
||||
nodeList = &v1.NodeList{}
|
||||
for _, node := range nodes {
|
||||
nodeList.Items = append(nodeList.Items, *node.Node())
|
||||
}
|
||||
}
|
||||
|
||||
args = &extenderv1.ExtenderArgs{
|
||||
Pod: pod,
|
||||
Nodes: nodeList,
|
||||
NodeNames: nodeNames,
|
||||
}
|
||||
|
||||
if err := h.send(h.filterVerb, args, &result); err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
if result.Error != "" {
|
||||
return nil, nil, nil, errors.New(result.Error)
|
||||
}
|
||||
|
||||
if h.nodeCacheCapable && result.NodeNames != nil {
|
||||
nodeResult = make([]*framework.NodeInfo, len(*result.NodeNames))
|
||||
for i, nodeName := range *result.NodeNames {
|
||||
if n, ok := fromNodeName[nodeName]; ok {
|
||||
nodeResult[i] = n
|
||||
} else {
|
||||
return nil, nil, nil, fmt.Errorf(
|
||||
"extender %q claims a filtered node %q which is not found in the input node list",
|
||||
h.extenderURL, nodeName)
|
||||
}
|
||||
}
|
||||
} else if result.Nodes != nil {
|
||||
nodeResult = make([]*framework.NodeInfo, len(result.Nodes.Items))
|
||||
for i := range result.Nodes.Items {
|
||||
nodeResult[i] = framework.NewNodeInfo()
|
||||
nodeResult[i].SetNode(&result.Nodes.Items[i])
|
||||
}
|
||||
}
|
||||
|
||||
return nodeResult, result.FailedNodes, result.FailedAndUnresolvableNodes, nil
|
||||
}
|
||||
|
||||
// Prioritize based on extender implemented priority functions. Weight*priority is added
|
||||
// up for each such priority function. The returned score is added to the score computed
|
||||
// by Kubernetes scheduler. The total score is used to do the host selection.
|
||||
func (h *HTTPExtender) Prioritize(pod *v1.Pod, nodes []*framework.NodeInfo) (*extenderv1.HostPriorityList, int64, error) {
|
||||
var (
|
||||
result extenderv1.HostPriorityList
|
||||
nodeList *v1.NodeList
|
||||
nodeNames *[]string
|
||||
args *extenderv1.ExtenderArgs
|
||||
)
|
||||
|
||||
if h.prioritizeVerb == "" {
|
||||
result := extenderv1.HostPriorityList{}
|
||||
for _, node := range nodes {
|
||||
result = append(result, extenderv1.HostPriority{Host: node.Node().Name, Score: 0})
|
||||
}
|
||||
return &result, 0, nil
|
||||
}
|
||||
|
||||
if h.nodeCacheCapable {
|
||||
nodeNameSlice := make([]string, 0, len(nodes))
|
||||
for _, node := range nodes {
|
||||
nodeNameSlice = append(nodeNameSlice, node.Node().Name)
|
||||
}
|
||||
nodeNames = &nodeNameSlice
|
||||
} else {
|
||||
nodeList = &v1.NodeList{}
|
||||
for _, node := range nodes {
|
||||
nodeList.Items = append(nodeList.Items, *node.Node())
|
||||
}
|
||||
}
|
||||
|
||||
args = &extenderv1.ExtenderArgs{
|
||||
Pod: pod,
|
||||
Nodes: nodeList,
|
||||
NodeNames: nodeNames,
|
||||
}
|
||||
|
||||
if err := h.send(h.prioritizeVerb, args, &result); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
return &result, h.weight, nil
|
||||
}
|
||||
|
||||
// Bind delegates the action of binding a pod to a node to the extender.
|
||||
func (h *HTTPExtender) Bind(binding *v1.Binding) error {
|
||||
var result extenderv1.ExtenderBindingResult
|
||||
if !h.IsBinder() {
|
||||
// This shouldn't happen as this extender wouldn't have become a Binder.
|
||||
return fmt.Errorf("unexpected empty bindVerb in extender")
|
||||
}
|
||||
req := &extenderv1.ExtenderBindingArgs{
|
||||
PodName: binding.Name,
|
||||
PodNamespace: binding.Namespace,
|
||||
PodUID: binding.UID,
|
||||
Node: binding.Target.Name,
|
||||
}
|
||||
if err := h.send(h.bindVerb, req, &result); err != nil {
|
||||
return err
|
||||
}
|
||||
if result.Error != "" {
|
||||
return errors.New(result.Error)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsBinder returns whether this extender is configured for the Bind method.
|
||||
func (h *HTTPExtender) IsBinder() bool {
|
||||
return h.bindVerb != ""
|
||||
}
|
||||
|
||||
// IsPrioritizer returns whether this extender is configured for the Prioritize method.
|
||||
func (h *HTTPExtender) IsPrioritizer() bool {
|
||||
return h.prioritizeVerb != ""
|
||||
}
|
||||
|
||||
// IsFilter returns whether this extender is configured for the Filter method.
|
||||
func (h *HTTPExtender) IsFilter() bool {
|
||||
return h.filterVerb != ""
|
||||
}
|
||||
|
||||
// Helper function to send messages to the extender
|
||||
func (h *HTTPExtender) send(action string, args interface{}, result interface{}) error {
|
||||
out, err := json.Marshal(args)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
url := strings.TrimRight(h.extenderURL, "/") + "/" + action
|
||||
|
||||
req, err := http.NewRequest("POST", url, bytes.NewReader(out))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := h.client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("failed %v with extender at URL %v, code %v", action, url, resp.StatusCode)
|
||||
}
|
||||
|
||||
return json.NewDecoder(resp.Body).Decode(result)
|
||||
}
|
||||
|
||||
// IsInterested returns true if at least one extended resource requested by
|
||||
// this pod is managed by this extender.
|
||||
func (h *HTTPExtender) IsInterested(pod *v1.Pod) bool {
|
||||
if h.managedResources.Len() == 0 {
|
||||
return true
|
||||
}
|
||||
if h.hasManagedResources(pod.Spec.Containers) {
|
||||
return true
|
||||
}
|
||||
if h.hasManagedResources(pod.Spec.InitContainers) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *HTTPExtender) hasManagedResources(containers []v1.Container) bool {
|
||||
for i := range containers {
|
||||
container := &containers[i]
|
||||
for resourceName := range container.Resources.Requests {
|
||||
if h.managedResources.Has(string(resourceName)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for resourceName := range container.Resources.Limits {
|
||||
if h.managedResources.Has(string(resourceName)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
123
vendor/k8s.io/kubernetes/pkg/scheduler/framework/cycle_state.go
generated
vendored
Normal file
123
vendor/k8s.io/kubernetes/pkg/scheduler/framework/cycle_state.go
generated
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package framework
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrNotFound is the not found error message.
|
||||
ErrNotFound = errors.New("not found")
|
||||
)
|
||||
|
||||
// StateData is a generic type for arbitrary data stored in CycleState.
|
||||
type StateData interface {
|
||||
// Clone is an interface to make a copy of StateData. For performance reasons,
|
||||
// clone should make shallow copies for members (e.g., slices or maps) that are not
|
||||
// impacted by PreFilter's optional AddPod/RemovePod methods.
|
||||
Clone() StateData
|
||||
}
|
||||
|
||||
// StateKey is the type of keys stored in CycleState.
|
||||
type StateKey string
|
||||
|
||||
// CycleState provides a mechanism for plugins to store and retrieve arbitrary data.
|
||||
// StateData stored by one plugin can be read, altered, or deleted by another plugin.
|
||||
// CycleState does not provide any data protection, as all plugins are assumed to be
|
||||
// trusted.
|
||||
// Note: CycleState uses a sync.Map to back the storage, because it is thread safe. It's aimed to optimize for the "write once and read many times" scenarios.
|
||||
// It is the recommended pattern used in all in-tree plugins - plugin-specific state is written once in PreFilter/PreScore and afterward read many times in Filter/Score.
|
||||
type CycleState struct {
|
||||
// storage is keyed with StateKey, and valued with StateData.
|
||||
storage sync.Map
|
||||
// if recordPluginMetrics is true, metrics.PluginExecutionDuration will be recorded for this cycle.
|
||||
recordPluginMetrics bool
|
||||
// SkipFilterPlugins are plugins that will be skipped in the Filter extension point.
|
||||
SkipFilterPlugins sets.Set[string]
|
||||
// SkipScorePlugins are plugins that will be skipped in the Score extension point.
|
||||
SkipScorePlugins sets.Set[string]
|
||||
}
|
||||
|
||||
// NewCycleState initializes a new CycleState and returns its pointer.
|
||||
func NewCycleState() *CycleState {
|
||||
return &CycleState{}
|
||||
}
|
||||
|
||||
// ShouldRecordPluginMetrics returns whether metrics.PluginExecutionDuration metrics should be recorded.
|
||||
func (c *CycleState) ShouldRecordPluginMetrics() bool {
|
||||
if c == nil {
|
||||
return false
|
||||
}
|
||||
return c.recordPluginMetrics
|
||||
}
|
||||
|
||||
// SetRecordPluginMetrics sets recordPluginMetrics to the given value.
|
||||
func (c *CycleState) SetRecordPluginMetrics(flag bool) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.recordPluginMetrics = flag
|
||||
}
|
||||
|
||||
// Clone creates a copy of CycleState and returns its pointer. Clone returns
|
||||
// nil if the context being cloned is nil.
|
||||
func (c *CycleState) Clone() *CycleState {
|
||||
if c == nil {
|
||||
return nil
|
||||
}
|
||||
copy := NewCycleState()
|
||||
// Safe copy storage in case of overwriting.
|
||||
c.storage.Range(func(k, v interface{}) bool {
|
||||
copy.storage.Store(k, v.(StateData).Clone())
|
||||
return true
|
||||
})
|
||||
// The below are not mutated, so we don't have to safe copy.
|
||||
copy.recordPluginMetrics = c.recordPluginMetrics
|
||||
copy.SkipFilterPlugins = c.SkipFilterPlugins
|
||||
copy.SkipScorePlugins = c.SkipScorePlugins
|
||||
|
||||
return copy
|
||||
}
|
||||
|
||||
// Read retrieves data with the given "key" from CycleState. If the key is not
|
||||
// present, ErrNotFound is returned.
|
||||
//
|
||||
// See CycleState for notes on concurrency.
|
||||
func (c *CycleState) Read(key StateKey) (StateData, error) {
|
||||
if v, ok := c.storage.Load(key); ok {
|
||||
return v.(StateData), nil
|
||||
}
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
|
||||
// Write stores the given "val" in CycleState with the given "key".
|
||||
//
|
||||
// See CycleState for notes on concurrency.
|
||||
func (c *CycleState) Write(key StateKey, val StateData) {
|
||||
c.storage.Store(key, val)
|
||||
}
|
||||
|
||||
// Delete deletes data with the given key from CycleState.
|
||||
//
|
||||
// See CycleState for notes on concurrency.
|
||||
func (c *CycleState) Delete(key StateKey) {
|
||||
c.storage.Delete(key)
|
||||
}
|
229
vendor/k8s.io/kubernetes/pkg/scheduler/framework/events.go
generated
vendored
Normal file
229
vendor/k8s.io/kubernetes/pkg/scheduler/framework/events.go
generated
vendored
Normal file
@ -0,0 +1,229 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package framework
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/equality"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/component-helpers/resource"
|
||||
"k8s.io/dynamic-resource-allocation/resourceclaim"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
)
|
||||
|
||||
// Special event labels.
|
||||
const (
|
||||
// ScheduleAttemptFailure is the event when a schedule attempt fails.
|
||||
ScheduleAttemptFailure = "ScheduleAttemptFailure"
|
||||
// BackoffComplete is the event when a pod finishes backoff.
|
||||
BackoffComplete = "BackoffComplete"
|
||||
// ForceActivate is the event when a pod is moved from unschedulablePods/backoffQ
|
||||
// to activeQ. Usually it's triggered by plugin implementations.
|
||||
ForceActivate = "ForceActivate"
|
||||
// UnschedulableTimeout is the event when a pod is moved from unschedulablePods
|
||||
// due to the timeout specified at pod-max-in-unschedulable-pods-duration.
|
||||
UnschedulableTimeout = "UnschedulableTimeout"
|
||||
)
|
||||
|
||||
var (
|
||||
// EventAssignedPodAdd is the event when an assigned pod is added.
|
||||
EventAssignedPodAdd = ClusterEvent{Resource: assignedPod, ActionType: Add}
|
||||
// EventAssignedPodUpdate is the event when an assigned pod is updated.
|
||||
EventAssignedPodUpdate = ClusterEvent{Resource: assignedPod, ActionType: Update}
|
||||
// EventAssignedPodDelete is the event when an assigned pod is deleted.
|
||||
EventAssignedPodDelete = ClusterEvent{Resource: assignedPod, ActionType: Delete}
|
||||
// EventUnscheduledPodAdd is the event when an unscheduled pod is added.
|
||||
EventUnscheduledPodAdd = ClusterEvent{Resource: unschedulablePod, ActionType: Add}
|
||||
// EventUnscheduledPodUpdate is the event when an unscheduled pod is updated.
|
||||
EventUnscheduledPodUpdate = ClusterEvent{Resource: unschedulablePod, ActionType: Update}
|
||||
// EventUnscheduledPodDelete is the event when an unscheduled pod is deleted.
|
||||
EventUnscheduledPodDelete = ClusterEvent{Resource: unschedulablePod, ActionType: Delete}
|
||||
// EventUnschedulableTimeout is the event when a pod stays in unschedulable for longer than timeout.
|
||||
EventUnschedulableTimeout = ClusterEvent{Resource: WildCard, ActionType: All, label: UnschedulableTimeout}
|
||||
// EventForceActivate is the event when a pod is moved from unschedulablePods/backoffQ to activeQ.
|
||||
EventForceActivate = ClusterEvent{Resource: WildCard, ActionType: All, label: ForceActivate}
|
||||
)
|
||||
|
||||
// PodSchedulingPropertiesChange interprets the update of a pod and returns corresponding UpdatePodXYZ event(s).
|
||||
// Once we have other pod update events, we should update here as well.
|
||||
func PodSchedulingPropertiesChange(newPod *v1.Pod, oldPod *v1.Pod) (events []ClusterEvent) {
|
||||
r := assignedPod
|
||||
if newPod.Spec.NodeName == "" {
|
||||
r = unschedulablePod
|
||||
}
|
||||
|
||||
podChangeExtracters := []podChangeExtractor{
|
||||
extractPodLabelsChange,
|
||||
extractPodScaleDown,
|
||||
extractPodSchedulingGateEliminatedChange,
|
||||
extractPodTolerationChange,
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
|
||||
podChangeExtracters = append(podChangeExtracters, extractPodGeneratedResourceClaimChange)
|
||||
}
|
||||
|
||||
for _, fn := range podChangeExtracters {
|
||||
if event := fn(newPod, oldPod); event != none {
|
||||
events = append(events, ClusterEvent{Resource: r, ActionType: event})
|
||||
}
|
||||
}
|
||||
|
||||
if len(events) == 0 {
|
||||
// When no specific event is found, we use AssignedPodOtherUpdate,
|
||||
// which should only trigger plugins registering a general Pod/Update event.
|
||||
events = append(events, ClusterEvent{Resource: r, ActionType: updatePodOther})
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
type podChangeExtractor func(newPod *v1.Pod, oldPod *v1.Pod) ActionType
|
||||
|
||||
// extractPodScaleDown interprets the update of a pod and returns PodRequestScaledDown event if any pod's resource request(s) is scaled down.
|
||||
func extractPodScaleDown(newPod, oldPod *v1.Pod) ActionType {
|
||||
opt := resource.PodResourcesOptions{
|
||||
UseStatusResources: utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling),
|
||||
}
|
||||
newPodRequests := resource.PodRequests(newPod, opt)
|
||||
oldPodRequests := resource.PodRequests(oldPod, opt)
|
||||
|
||||
for rName, oldReq := range oldPodRequests {
|
||||
newReq, ok := newPodRequests[rName]
|
||||
if !ok {
|
||||
// The resource request of rName is removed.
|
||||
return UpdatePodScaleDown
|
||||
}
|
||||
|
||||
if oldReq.MilliValue() > newReq.MilliValue() {
|
||||
// The resource request of rName is scaled down.
|
||||
return UpdatePodScaleDown
|
||||
}
|
||||
}
|
||||
|
||||
return none
|
||||
}
|
||||
|
||||
func extractPodLabelsChange(newPod *v1.Pod, oldPod *v1.Pod) ActionType {
|
||||
if isLabelChanged(newPod.GetLabels(), oldPod.GetLabels()) {
|
||||
return UpdatePodLabel
|
||||
}
|
||||
return none
|
||||
}
|
||||
|
||||
func extractPodTolerationChange(newPod *v1.Pod, oldPod *v1.Pod) ActionType {
|
||||
if len(newPod.Spec.Tolerations) != len(oldPod.Spec.Tolerations) {
|
||||
// A Pod got a new toleration.
|
||||
// Due to API validation, the user can add, but cannot modify or remove tolerations.
|
||||
// So, it's enough to just check the length of tolerations to notice the update.
|
||||
// And, any updates in tolerations could make Pod schedulable.
|
||||
return UpdatePodTolerations
|
||||
}
|
||||
|
||||
return none
|
||||
}
|
||||
|
||||
func extractPodSchedulingGateEliminatedChange(newPod *v1.Pod, oldPod *v1.Pod) ActionType {
|
||||
if len(newPod.Spec.SchedulingGates) == 0 && len(oldPod.Spec.SchedulingGates) != 0 {
|
||||
// A scheduling gate on the pod is completely removed.
|
||||
return UpdatePodSchedulingGatesEliminated
|
||||
}
|
||||
|
||||
return none
|
||||
}
|
||||
|
||||
func extractPodGeneratedResourceClaimChange(newPod *v1.Pod, oldPod *v1.Pod) ActionType {
|
||||
if !resourceclaim.PodStatusEqual(newPod.Status.ResourceClaimStatuses, oldPod.Status.ResourceClaimStatuses) {
|
||||
return UpdatePodGeneratedResourceClaim
|
||||
}
|
||||
|
||||
return none
|
||||
}
|
||||
|
||||
// NodeSchedulingPropertiesChange interprets the update of a node and returns corresponding UpdateNodeXYZ event(s).
|
||||
func NodeSchedulingPropertiesChange(newNode *v1.Node, oldNode *v1.Node) (events []ClusterEvent) {
|
||||
nodeChangeExtracters := []nodeChangeExtractor{
|
||||
extractNodeSpecUnschedulableChange,
|
||||
extractNodeAllocatableChange,
|
||||
extractNodeLabelsChange,
|
||||
extractNodeTaintsChange,
|
||||
extractNodeConditionsChange,
|
||||
extractNodeAnnotationsChange,
|
||||
}
|
||||
|
||||
for _, fn := range nodeChangeExtracters {
|
||||
if event := fn(newNode, oldNode); event != none {
|
||||
events = append(events, ClusterEvent{Resource: Node, ActionType: event})
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type nodeChangeExtractor func(newNode *v1.Node, oldNode *v1.Node) ActionType
|
||||
|
||||
func extractNodeAllocatableChange(newNode *v1.Node, oldNode *v1.Node) ActionType {
|
||||
if !equality.Semantic.DeepEqual(oldNode.Status.Allocatable, newNode.Status.Allocatable) {
|
||||
return UpdateNodeAllocatable
|
||||
}
|
||||
return none
|
||||
}
|
||||
|
||||
func extractNodeLabelsChange(newNode *v1.Node, oldNode *v1.Node) ActionType {
|
||||
if isLabelChanged(newNode.GetLabels(), oldNode.GetLabels()) {
|
||||
return UpdateNodeLabel
|
||||
}
|
||||
return none
|
||||
}
|
||||
|
||||
func isLabelChanged(newLabels map[string]string, oldLabels map[string]string) bool {
|
||||
return !equality.Semantic.DeepEqual(newLabels, oldLabels)
|
||||
}
|
||||
|
||||
func extractNodeTaintsChange(newNode *v1.Node, oldNode *v1.Node) ActionType {
|
||||
if !equality.Semantic.DeepEqual(newNode.Spec.Taints, oldNode.Spec.Taints) {
|
||||
return UpdateNodeTaint
|
||||
}
|
||||
return none
|
||||
}
|
||||
|
||||
func extractNodeConditionsChange(newNode *v1.Node, oldNode *v1.Node) ActionType {
|
||||
strip := func(conditions []v1.NodeCondition) map[v1.NodeConditionType]v1.ConditionStatus {
|
||||
conditionStatuses := make(map[v1.NodeConditionType]v1.ConditionStatus, len(conditions))
|
||||
for i := range conditions {
|
||||
conditionStatuses[conditions[i].Type] = conditions[i].Status
|
||||
}
|
||||
return conditionStatuses
|
||||
}
|
||||
if !equality.Semantic.DeepEqual(strip(oldNode.Status.Conditions), strip(newNode.Status.Conditions)) {
|
||||
return UpdateNodeCondition
|
||||
}
|
||||
return none
|
||||
}
|
||||
|
||||
func extractNodeSpecUnschedulableChange(newNode *v1.Node, oldNode *v1.Node) ActionType {
|
||||
if newNode.Spec.Unschedulable != oldNode.Spec.Unschedulable && !newNode.Spec.Unschedulable {
|
||||
// TODO: create UpdateNodeSpecUnschedulable ActionType
|
||||
return UpdateNodeTaint
|
||||
}
|
||||
return none
|
||||
}
|
||||
|
||||
func extractNodeAnnotationsChange(newNode *v1.Node, oldNode *v1.Node) ActionType {
|
||||
if !equality.Semantic.DeepEqual(oldNode.GetAnnotations(), newNode.GetAnnotations()) {
|
||||
return UpdateNodeAnnotation
|
||||
}
|
||||
return none
|
||||
}
|
79
vendor/k8s.io/kubernetes/pkg/scheduler/framework/extender.go
generated
vendored
Normal file
79
vendor/k8s.io/kubernetes/pkg/scheduler/framework/extender.go
generated
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package framework
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
extenderv1 "k8s.io/kube-scheduler/extender/v1"
|
||||
)
|
||||
|
||||
// Extender is an interface for external processes to influence scheduling
|
||||
// decisions made by Kubernetes. This is typically needed for resources not directly
|
||||
// managed by Kubernetes.
|
||||
type Extender interface {
|
||||
// Name returns a unique name that identifies the extender.
|
||||
Name() string
|
||||
|
||||
// Filter based on extender-implemented predicate functions. The filtered list is
|
||||
// expected to be a subset of the supplied list.
|
||||
// The failedNodes and failedAndUnresolvableNodes optionally contains the list
|
||||
// of failed nodes and failure reasons, except nodes in the latter are
|
||||
// unresolvable.
|
||||
Filter(pod *v1.Pod, nodes []*NodeInfo) (filteredNodes []*NodeInfo, failedNodesMap extenderv1.FailedNodesMap, failedAndUnresolvable extenderv1.FailedNodesMap, err error)
|
||||
|
||||
// Prioritize based on extender-implemented priority functions. The returned scores & weight
|
||||
// are used to compute the weighted score for an extender. The weighted scores are added to
|
||||
// the scores computed by Kubernetes scheduler. The total scores are used to do the host selection.
|
||||
Prioritize(pod *v1.Pod, nodes []*NodeInfo) (hostPriorities *extenderv1.HostPriorityList, weight int64, err error)
|
||||
|
||||
// Bind delegates the action of binding a pod to a node to the extender.
|
||||
Bind(binding *v1.Binding) error
|
||||
|
||||
// IsBinder returns whether this extender is configured for the Bind method.
|
||||
IsBinder() bool
|
||||
|
||||
// IsInterested returns true if at least one extended resource requested by
|
||||
// this pod is managed by this extender.
|
||||
IsInterested(pod *v1.Pod) bool
|
||||
|
||||
// IsPrioritizer returns whether this extender is configured for the Prioritize method.
|
||||
IsPrioritizer() bool
|
||||
|
||||
// IsFilter returns whether this extender is configured for the Filter method.
|
||||
IsFilter() bool
|
||||
|
||||
// ProcessPreemption returns nodes with their victim pods processed by extender based on
|
||||
// given:
|
||||
// 1. Pod to schedule
|
||||
// 2. Candidate nodes and victim pods (nodeNameToVictims) generated by previous scheduling process.
|
||||
// The possible changes made by extender may include:
|
||||
// 1. Subset of given candidate nodes after preemption phase of extender.
|
||||
// 2. A different set of victim pod for every given candidate node after preemption phase of extender.
|
||||
ProcessPreemption(
|
||||
pod *v1.Pod,
|
||||
nodeNameToVictims map[string]*extenderv1.Victims,
|
||||
nodeInfos NodeInfoLister,
|
||||
) (map[string]*extenderv1.Victims, error)
|
||||
|
||||
// SupportsPreemption returns if the scheduler extender support preemption or not.
|
||||
SupportsPreemption() bool
|
||||
|
||||
// IsIgnorable returns true indicates scheduling should not fail when this extender
|
||||
// is unavailable. This gives scheduler ability to fail fast and tolerate non-critical extenders as well.
|
||||
// Both Filter and Bind actions are supported.
|
||||
IsIgnorable() bool
|
||||
}
|
954
vendor/k8s.io/kubernetes/pkg/scheduler/framework/interface.go
generated
vendored
Normal file
954
vendor/k8s.io/kubernetes/pkg/scheduler/framework/interface.go
generated
vendored
Normal file
@ -0,0 +1,954 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// This file defines the scheduling framework plugin interfaces.
|
||||
|
||||
package framework
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"math"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/informers"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
"k8s.io/client-go/tools/events"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
|
||||
)
|
||||
|
||||
// NodeScoreList declares a list of nodes and their scores.
|
||||
type NodeScoreList []NodeScore
|
||||
|
||||
// NodeScore is a struct with node name and score.
|
||||
type NodeScore struct {
|
||||
Name string
|
||||
Score int64
|
||||
}
|
||||
|
||||
// NodeToStatusReader is a read-only interface of NodeToStatus passed to each PostFilter plugin.
|
||||
type NodeToStatusReader interface {
|
||||
// Get returns the status for given nodeName.
|
||||
// If the node is not in the map, the AbsentNodesStatus is returned.
|
||||
Get(nodeName string) *Status
|
||||
// NodesForStatusCode returns a list of NodeInfos for the nodes that have a given status code.
|
||||
// It returns the NodeInfos for all matching nodes denoted by AbsentNodesStatus as well.
|
||||
NodesForStatusCode(nodeLister NodeInfoLister, code Code) ([]*NodeInfo, error)
|
||||
}
|
||||
|
||||
// NodeToStatusMap is an alias for NodeToStatusReader to keep partial backwards compatibility.
|
||||
// NodeToStatusReader should be used if possible.
|
||||
type NodeToStatusMap = NodeToStatusReader
|
||||
|
||||
// NodeToStatus contains the statuses of the Nodes where the incoming Pod was not schedulable.
|
||||
type NodeToStatus struct {
|
||||
// nodeToStatus contains specific statuses of the nodes.
|
||||
nodeToStatus map[string]*Status
|
||||
// absentNodesStatus defines a status for all nodes that are absent in nodeToStatus map.
|
||||
// By default, all absent nodes are UnschedulableAndUnresolvable.
|
||||
absentNodesStatus *Status
|
||||
}
|
||||
|
||||
// NewDefaultNodeToStatus creates NodeToStatus without any node in the map.
|
||||
// The absentNodesStatus is set by default to UnschedulableAndUnresolvable.
|
||||
func NewDefaultNodeToStatus() *NodeToStatus {
|
||||
return NewNodeToStatus(make(map[string]*Status), NewStatus(UnschedulableAndUnresolvable))
|
||||
}
|
||||
|
||||
// NewNodeToStatus creates NodeToStatus initialized with given nodeToStatus and absentNodesStatus.
|
||||
func NewNodeToStatus(nodeToStatus map[string]*Status, absentNodesStatus *Status) *NodeToStatus {
|
||||
return &NodeToStatus{
|
||||
nodeToStatus: nodeToStatus,
|
||||
absentNodesStatus: absentNodesStatus,
|
||||
}
|
||||
}
|
||||
|
||||
// Get returns the status for given nodeName. If the node is not in the map, the absentNodesStatus is returned.
|
||||
func (m *NodeToStatus) Get(nodeName string) *Status {
|
||||
if status, ok := m.nodeToStatus[nodeName]; ok {
|
||||
return status
|
||||
}
|
||||
return m.absentNodesStatus
|
||||
}
|
||||
|
||||
// Set sets status for given nodeName.
|
||||
func (m *NodeToStatus) Set(nodeName string, status *Status) {
|
||||
m.nodeToStatus[nodeName] = status
|
||||
}
|
||||
|
||||
// Len returns length of nodeToStatus map. It is not aware of number of absent nodes.
|
||||
func (m *NodeToStatus) Len() int {
|
||||
return len(m.nodeToStatus)
|
||||
}
|
||||
|
||||
// AbsentNodesStatus returns absentNodesStatus value.
|
||||
func (m *NodeToStatus) AbsentNodesStatus() *Status {
|
||||
return m.absentNodesStatus
|
||||
}
|
||||
|
||||
// SetAbsentNodesStatus sets absentNodesStatus value.
|
||||
func (m *NodeToStatus) SetAbsentNodesStatus(status *Status) {
|
||||
m.absentNodesStatus = status
|
||||
}
|
||||
|
||||
// ForEachExplicitNode runs fn for each node which status is explicitly set.
|
||||
// Imporatant note, it runs the fn only for nodes with a status explicitly registered,
|
||||
// and hence may not run the fn for all existing nodes.
|
||||
// For example, if PreFilter rejects all Nodes, the scheduler would NOT set a failure status to every Node,
|
||||
// but set a failure status as AbsentNodesStatus.
|
||||
// You're supposed to get a status from AbsentNodesStatus(), and consider all other nodes that are rejected by them.
|
||||
func (m *NodeToStatus) ForEachExplicitNode(fn func(nodeName string, status *Status)) {
|
||||
for nodeName, status := range m.nodeToStatus {
|
||||
fn(nodeName, status)
|
||||
}
|
||||
}
|
||||
|
||||
// NodesForStatusCode returns a list of NodeInfos for the nodes that matches a given status code.
|
||||
// If the absentNodesStatus matches the code, all existing nodes are fetched using nodeLister
|
||||
// and filtered using NodeToStatus.Get.
|
||||
// If the absentNodesStatus doesn't match the code, nodeToStatus map is used to create a list of nodes
|
||||
// and nodeLister.Get is used to obtain NodeInfo for each.
|
||||
func (m *NodeToStatus) NodesForStatusCode(nodeLister NodeInfoLister, code Code) ([]*NodeInfo, error) {
|
||||
var resultNodes []*NodeInfo
|
||||
|
||||
if m.AbsentNodesStatus().Code() == code {
|
||||
allNodes, err := nodeLister.List()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if m.Len() == 0 {
|
||||
// All nodes are absent and status code is matching, so can return all nodes.
|
||||
return allNodes, nil
|
||||
}
|
||||
// Need to find all the nodes that are absent or have a matching code using the allNodes.
|
||||
for _, node := range allNodes {
|
||||
nodeName := node.Node().Name
|
||||
if status := m.Get(nodeName); status.Code() == code {
|
||||
resultNodes = append(resultNodes, node)
|
||||
}
|
||||
}
|
||||
return resultNodes, nil
|
||||
}
|
||||
|
||||
m.ForEachExplicitNode(func(nodeName string, status *Status) {
|
||||
if status.Code() == code {
|
||||
if nodeInfo, err := nodeLister.Get(nodeName); err == nil {
|
||||
resultNodes = append(resultNodes, nodeInfo)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return resultNodes, nil
|
||||
}
|
||||
|
||||
// NodePluginScores is a struct with node name and scores for that node.
|
||||
type NodePluginScores struct {
|
||||
// Name is node name.
|
||||
Name string
|
||||
// Scores is scores from plugins and extenders.
|
||||
Scores []PluginScore
|
||||
// TotalScore is the total score in Scores.
|
||||
TotalScore int64
|
||||
}
|
||||
|
||||
// PluginScore is a struct with plugin/extender name and score.
|
||||
type PluginScore struct {
|
||||
// Name is the name of plugin or extender.
|
||||
Name string
|
||||
Score int64
|
||||
}
|
||||
|
||||
// Code is the Status code/type which is returned from plugins.
|
||||
type Code int
|
||||
|
||||
// These are predefined codes used in a Status.
|
||||
// Note: when you add a new status, you have to add it in `codes` slice below.
|
||||
const (
|
||||
// Success means that plugin ran correctly and found pod schedulable.
|
||||
// NOTE: A nil status is also considered as "Success".
|
||||
Success Code = iota
|
||||
// Error is one of the failures, used for internal plugin errors, unexpected input, etc.
|
||||
// Plugin shouldn't return this code for expected failures, like Unschedulable.
|
||||
// Since it's the unexpected failure, the scheduling queue registers the pod without unschedulable plugins.
|
||||
// Meaning, the Pod will be requeued to activeQ/backoffQ soon.
|
||||
Error
|
||||
// Unschedulable is one of the failures, used when a plugin finds a pod unschedulable.
|
||||
// If it's returned from PreFilter or Filter, the scheduler might attempt to
|
||||
// run other postFilter plugins like preemption to get this pod scheduled.
|
||||
// Use UnschedulableAndUnresolvable to make the scheduler skipping other postFilter plugins.
|
||||
// The accompanying status message should explain why the pod is unschedulable.
|
||||
//
|
||||
// We regard the backoff as a penalty of wasting the scheduling cycle.
|
||||
// When the scheduling queue requeues Pods, which was rejected with Unschedulable in the last scheduling,
|
||||
// the Pod goes through backoff.
|
||||
Unschedulable
|
||||
// UnschedulableAndUnresolvable is used when a plugin finds a pod unschedulable and
|
||||
// other postFilter plugins like preemption would not change anything.
|
||||
// See the comment on PostFilter interface for more details about how PostFilter should handle this status.
|
||||
// Plugins should return Unschedulable if it is possible that the pod can get scheduled
|
||||
// after running other postFilter plugins.
|
||||
// The accompanying status message should explain why the pod is unschedulable.
|
||||
//
|
||||
// We regard the backoff as a penalty of wasting the scheduling cycle.
|
||||
// When the scheduling queue requeues Pods, which was rejected with UnschedulableAndUnresolvable in the last scheduling,
|
||||
// the Pod goes through backoff.
|
||||
UnschedulableAndUnresolvable
|
||||
// Wait is used when a Permit plugin finds a pod scheduling should wait.
|
||||
Wait
|
||||
// Skip is used in the following scenarios:
|
||||
// - when a Bind plugin chooses to skip binding.
|
||||
// - when a PreFilter plugin returns Skip so that coupled Filter plugin/PreFilterExtensions() will be skipped.
|
||||
// - when a PreScore plugin returns Skip so that coupled Score plugin will be skipped.
|
||||
Skip
|
||||
// Pending means that the scheduling process is finished successfully,
|
||||
// but the plugin wants to stop the scheduling cycle/binding cycle here.
|
||||
//
|
||||
// For example, the DRA plugin sometimes needs to wait for the external device driver
|
||||
// to provision the resource for the Pod.
|
||||
// It's different from when to return Unschedulable/UnschedulableAndUnresolvable,
|
||||
// because in this case, the scheduler decides where the Pod can go successfully,
|
||||
// but we need to wait for the external component to do something based on that scheduling result.
|
||||
//
|
||||
// We regard the backoff as a penalty of wasting the scheduling cycle.
|
||||
// In the case of returning Pending, we cannot say the scheduling cycle is wasted
|
||||
// because the scheduling result is used to proceed the Pod's scheduling forward,
|
||||
// that particular scheduling cycle is failed though.
|
||||
// So, Pods rejected by such reasons don't need to suffer a penalty (backoff).
|
||||
// When the scheduling queue requeues Pods, which was rejected with Pending in the last scheduling,
|
||||
// the Pod goes to activeQ directly ignoring backoff.
|
||||
Pending
|
||||
)
|
||||
|
||||
// This list should be exactly the same as the codes iota defined above in the same order.
|
||||
var codes = []string{"Success", "Error", "Unschedulable", "UnschedulableAndUnresolvable", "Wait", "Skip", "Pending"}
|
||||
|
||||
func (c Code) String() string {
|
||||
return codes[c]
|
||||
}
|
||||
|
||||
const (
|
||||
// MaxNodeScore is the maximum score a Score plugin is expected to return.
|
||||
MaxNodeScore int64 = 100
|
||||
|
||||
// MinNodeScore is the minimum score a Score plugin is expected to return.
|
||||
MinNodeScore int64 = 0
|
||||
|
||||
// MaxTotalScore is the maximum total score.
|
||||
MaxTotalScore int64 = math.MaxInt64
|
||||
)
|
||||
|
||||
// PodsToActivateKey is a reserved state key for stashing pods.
|
||||
// If the stashed pods are present in unschedulablePods or backoffQ,they will be
|
||||
// activated (i.e., moved to activeQ) in two phases:
|
||||
// - end of a scheduling cycle if it succeeds (will be cleared from `PodsToActivate` if activated)
|
||||
// - end of a binding cycle if it succeeds
|
||||
var PodsToActivateKey StateKey = "kubernetes.io/pods-to-activate"
|
||||
|
||||
// PodsToActivate stores pods to be activated.
|
||||
type PodsToActivate struct {
|
||||
sync.Mutex
|
||||
// Map is keyed with namespaced pod name, and valued with the pod.
|
||||
Map map[string]*v1.Pod
|
||||
}
|
||||
|
||||
// Clone just returns the same state.
|
||||
func (s *PodsToActivate) Clone() StateData {
|
||||
return s
|
||||
}
|
||||
|
||||
// NewPodsToActivate instantiates a PodsToActivate object.
|
||||
func NewPodsToActivate() *PodsToActivate {
|
||||
return &PodsToActivate{Map: make(map[string]*v1.Pod)}
|
||||
}
|
||||
|
||||
// Status indicates the result of running a plugin. It consists of a code, a
|
||||
// message, (optionally) an error, and a plugin name it fails by.
|
||||
// When the status code is not Success, the reasons should explain why.
|
||||
// And, when code is Success, all the other fields should be empty.
|
||||
// NOTE: A nil Status is also considered as Success.
|
||||
type Status struct {
|
||||
code Code
|
||||
reasons []string
|
||||
err error
|
||||
// plugin is an optional field that records the plugin name causes this status.
|
||||
// It's set by the framework when code is Unschedulable, UnschedulableAndUnresolvable or Pending.
|
||||
plugin string
|
||||
}
|
||||
|
||||
func (s *Status) WithError(err error) *Status {
|
||||
s.err = err
|
||||
return s
|
||||
}
|
||||
|
||||
// Code returns code of the Status.
|
||||
func (s *Status) Code() Code {
|
||||
if s == nil {
|
||||
return Success
|
||||
}
|
||||
return s.code
|
||||
}
|
||||
|
||||
// Message returns a concatenated message on reasons of the Status.
|
||||
func (s *Status) Message() string {
|
||||
if s == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.Join(s.Reasons(), ", ")
|
||||
}
|
||||
|
||||
// SetPlugin sets the given plugin name to s.plugin.
|
||||
func (s *Status) SetPlugin(plugin string) {
|
||||
s.plugin = plugin
|
||||
}
|
||||
|
||||
// WithPlugin sets the given plugin name to s.plugin,
|
||||
// and returns the given status object.
|
||||
func (s *Status) WithPlugin(plugin string) *Status {
|
||||
s.SetPlugin(plugin)
|
||||
return s
|
||||
}
|
||||
|
||||
// Plugin returns the plugin name which caused this status.
|
||||
func (s *Status) Plugin() string {
|
||||
return s.plugin
|
||||
}
|
||||
|
||||
// Reasons returns reasons of the Status.
|
||||
func (s *Status) Reasons() []string {
|
||||
if s.err != nil {
|
||||
return append([]string{s.err.Error()}, s.reasons...)
|
||||
}
|
||||
return s.reasons
|
||||
}
|
||||
|
||||
// AppendReason appends given reason to the Status.
|
||||
func (s *Status) AppendReason(reason string) {
|
||||
s.reasons = append(s.reasons, reason)
|
||||
}
|
||||
|
||||
// IsSuccess returns true if and only if "Status" is nil or Code is "Success".
|
||||
func (s *Status) IsSuccess() bool {
|
||||
return s.Code() == Success
|
||||
}
|
||||
|
||||
// IsWait returns true if and only if "Status" is non-nil and its Code is "Wait".
|
||||
func (s *Status) IsWait() bool {
|
||||
return s.Code() == Wait
|
||||
}
|
||||
|
||||
// IsSkip returns true if and only if "Status" is non-nil and its Code is "Skip".
|
||||
func (s *Status) IsSkip() bool {
|
||||
return s.Code() == Skip
|
||||
}
|
||||
|
||||
// IsRejected returns true if "Status" is Unschedulable (Unschedulable, UnschedulableAndUnresolvable, or Pending).
|
||||
func (s *Status) IsRejected() bool {
|
||||
code := s.Code()
|
||||
return code == Unschedulable || code == UnschedulableAndUnresolvable || code == Pending
|
||||
}
|
||||
|
||||
// AsError returns nil if the status is a success, a wait or a skip; otherwise returns an "error" object
|
||||
// with a concatenated message on reasons of the Status.
|
||||
func (s *Status) AsError() error {
|
||||
if s.IsSuccess() || s.IsWait() || s.IsSkip() {
|
||||
return nil
|
||||
}
|
||||
if s.err != nil {
|
||||
return s.err
|
||||
}
|
||||
return errors.New(s.Message())
|
||||
}
|
||||
|
||||
// Equal checks equality of two statuses. This is useful for testing with
|
||||
// cmp.Equal.
|
||||
func (s *Status) Equal(x *Status) bool {
|
||||
if s == nil || x == nil {
|
||||
return s.IsSuccess() && x.IsSuccess()
|
||||
}
|
||||
if s.code != x.code {
|
||||
return false
|
||||
}
|
||||
if !cmp.Equal(s.err, x.err, cmpopts.EquateErrors()) {
|
||||
return false
|
||||
}
|
||||
if !cmp.Equal(s.reasons, x.reasons) {
|
||||
return false
|
||||
}
|
||||
return cmp.Equal(s.plugin, x.plugin)
|
||||
}
|
||||
|
||||
func (s *Status) String() string {
|
||||
return s.Message()
|
||||
}
|
||||
|
||||
// NewStatus makes a Status out of the given arguments and returns its pointer.
|
||||
func NewStatus(code Code, reasons ...string) *Status {
|
||||
s := &Status{
|
||||
code: code,
|
||||
reasons: reasons,
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// AsStatus wraps an error in a Status.
|
||||
func AsStatus(err error) *Status {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
return &Status{
|
||||
code: Error,
|
||||
err: err,
|
||||
}
|
||||
}
|
||||
|
||||
// WaitingPod represents a pod currently waiting in the permit phase.
|
||||
type WaitingPod interface {
|
||||
// GetPod returns a reference to the waiting pod.
|
||||
GetPod() *v1.Pod
|
||||
// GetPendingPlugins returns a list of pending Permit plugin's name.
|
||||
GetPendingPlugins() []string
|
||||
// Allow declares the waiting pod is allowed to be scheduled by the plugin named as "pluginName".
|
||||
// If this is the last remaining plugin to allow, then a success signal is delivered
|
||||
// to unblock the pod.
|
||||
Allow(pluginName string)
|
||||
// Reject declares the waiting pod unschedulable.
|
||||
Reject(pluginName, msg string)
|
||||
}
|
||||
|
||||
// Plugin is the parent type for all the scheduling framework plugins.
|
||||
type Plugin interface {
|
||||
Name() string
|
||||
}
|
||||
|
||||
// PreEnqueuePlugin is an interface that must be implemented by "PreEnqueue" plugins.
|
||||
// These plugins are called prior to adding Pods to activeQ.
|
||||
// Note: an preEnqueue plugin is expected to be lightweight and efficient, so it's not expected to
|
||||
// involve expensive calls like accessing external endpoints; otherwise it'd block other
|
||||
// Pods' enqueuing in event handlers.
|
||||
type PreEnqueuePlugin interface {
|
||||
Plugin
|
||||
// PreEnqueue is called prior to adding Pods to activeQ.
|
||||
PreEnqueue(ctx context.Context, p *v1.Pod) *Status
|
||||
}
|
||||
|
||||
// LessFunc is the function to sort pod info
|
||||
type LessFunc func(podInfo1, podInfo2 *QueuedPodInfo) bool
|
||||
|
||||
// QueueSortPlugin is an interface that must be implemented by "QueueSort" plugins.
|
||||
// These plugins are used to sort pods in the scheduling queue. Only one queue sort
|
||||
// plugin may be enabled at a time.
|
||||
type QueueSortPlugin interface {
|
||||
Plugin
|
||||
// Less are used to sort pods in the scheduling queue.
|
||||
Less(*QueuedPodInfo, *QueuedPodInfo) bool
|
||||
}
|
||||
|
||||
// EnqueueExtensions is an optional interface that plugins can implement to efficiently
|
||||
// move unschedulable Pods in internal scheduling queues.
|
||||
// In the scheduler, Pods can be unschedulable by PreEnqueue, PreFilter, Filter, Reserve, and Permit plugins,
|
||||
// and Pods rejected by these plugins are requeued based on this extension point.
|
||||
// Failures from other extension points are regarded as temporal errors (e.g., network failure),
|
||||
// and the scheduler requeue Pods without this extension point - always requeue Pods to activeQ after backoff.
|
||||
// This is because such temporal errors cannot be resolved by specific cluster events,
|
||||
// and we have no choose but keep retrying scheduling until the failure is resolved.
|
||||
//
|
||||
// Plugins that make pod unschedulable (PreEnqueue, PreFilter, Filter, Reserve, and Permit plugins) should implement this interface,
|
||||
// otherwise the default implementation will be used, which is less efficient in requeueing Pods rejected by the plugin.
|
||||
// And, if plugins other than above extension points support this interface, they are just ignored.
|
||||
type EnqueueExtensions interface {
|
||||
Plugin
|
||||
// EventsToRegister returns a series of possible events that may cause a Pod
|
||||
// failed by this plugin schedulable. Each event has a callback function that
|
||||
// filters out events to reduce useless retry of Pod's scheduling.
|
||||
// The events will be registered when instantiating the internal scheduling queue,
|
||||
// and leveraged to build event handlers dynamically.
|
||||
// When it returns an error, the scheduler fails to start.
|
||||
// Note: the returned list needs to be determined at a startup,
|
||||
// and the scheduler only evaluates it once during start up.
|
||||
// Do not change the result during runtime, for example, based on the cluster's state etc.
|
||||
//
|
||||
// Appropriate implementation of this function will make Pod's re-scheduling accurate and performant.
|
||||
EventsToRegister(context.Context) ([]ClusterEventWithHint, error)
|
||||
}
|
||||
|
||||
// PreFilterExtensions is an interface that is included in plugins that allow specifying
|
||||
// callbacks to make incremental updates to its supposedly pre-calculated
|
||||
// state.
|
||||
type PreFilterExtensions interface {
|
||||
// AddPod is called by the framework while trying to evaluate the impact
|
||||
// of adding podToAdd to the node while scheduling podToSchedule.
|
||||
AddPod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podInfoToAdd *PodInfo, nodeInfo *NodeInfo) *Status
|
||||
// RemovePod is called by the framework while trying to evaluate the impact
|
||||
// of removing podToRemove from the node while scheduling podToSchedule.
|
||||
RemovePod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podInfoToRemove *PodInfo, nodeInfo *NodeInfo) *Status
|
||||
}
|
||||
|
||||
// PreFilterPlugin is an interface that must be implemented by "PreFilter" plugins.
|
||||
// These plugins are called at the beginning of the scheduling cycle.
|
||||
type PreFilterPlugin interface {
|
||||
Plugin
|
||||
// PreFilter is called at the beginning of the scheduling cycle. All PreFilter
|
||||
// plugins must return success or the pod will be rejected. PreFilter could optionally
|
||||
// return a PreFilterResult to influence which nodes to evaluate downstream. This is useful
|
||||
// for cases where it is possible to determine the subset of nodes to process in O(1) time.
|
||||
// When PreFilterResult filters out some Nodes, the framework considers Nodes that are filtered out as getting "UnschedulableAndUnresolvable".
|
||||
// i.e., those Nodes will be out of the candidates of the preemption.
|
||||
//
|
||||
// When it returns Skip status, returned PreFilterResult and other fields in status are just ignored,
|
||||
// and coupled Filter plugin/PreFilterExtensions() will be skipped in this scheduling cycle.
|
||||
PreFilter(ctx context.Context, state *CycleState, p *v1.Pod) (*PreFilterResult, *Status)
|
||||
// PreFilterExtensions returns a PreFilterExtensions interface if the plugin implements one,
|
||||
// or nil if it does not. A Pre-filter plugin can provide extensions to incrementally
|
||||
// modify its pre-processed info. The framework guarantees that the extensions
|
||||
// AddPod/RemovePod will only be called after PreFilter, possibly on a cloned
|
||||
// CycleState, and may call those functions more than once before calling
|
||||
// Filter again on a specific node.
|
||||
PreFilterExtensions() PreFilterExtensions
|
||||
}
|
||||
|
||||
// FilterPlugin is an interface for Filter plugins. These plugins are called at the
|
||||
// filter extension point for filtering out hosts that cannot run a pod.
|
||||
// This concept used to be called 'predicate' in the original scheduler.
|
||||
// These plugins should return "Success", "Unschedulable" or "Error" in Status.code.
|
||||
// However, the scheduler accepts other valid codes as well.
|
||||
// Anything other than "Success" will lead to exclusion of the given host from
|
||||
// running the pod.
|
||||
type FilterPlugin interface {
|
||||
Plugin
|
||||
// Filter is called by the scheduling framework.
|
||||
// All FilterPlugins should return "Success" to declare that
|
||||
// the given node fits the pod. If Filter doesn't return "Success",
|
||||
// it will return "Unschedulable", "UnschedulableAndUnresolvable" or "Error".
|
||||
//
|
||||
// "Error" aborts pod scheduling and puts the pod into the backoff queue.
|
||||
//
|
||||
// For the node being evaluated, Filter plugins should look at the passed
|
||||
// nodeInfo reference for this particular node's information (e.g., pods
|
||||
// considered to be running on the node) instead of looking it up in the
|
||||
// NodeInfoSnapshot because we don't guarantee that they will be the same.
|
||||
// For example, during preemption, we may pass a copy of the original
|
||||
// nodeInfo object that has some pods removed from it to evaluate the
|
||||
// possibility of preempting them to schedule the target pod.
|
||||
Filter(ctx context.Context, state *CycleState, pod *v1.Pod, nodeInfo *NodeInfo) *Status
|
||||
}
|
||||
|
||||
// PostFilterPlugin is an interface for "PostFilter" plugins. These plugins are called
|
||||
// after a pod cannot be scheduled.
|
||||
type PostFilterPlugin interface {
|
||||
Plugin
|
||||
// PostFilter is called by the scheduling framework
|
||||
// when the scheduling cycle failed at PreFilter or Filter by Unschedulable or UnschedulableAndUnresolvable.
|
||||
// NodeToStatusReader has statuses that each Node got in PreFilter or Filter phase.
|
||||
//
|
||||
// If you're implementing a custom preemption with PostFilter, ignoring Nodes with UnschedulableAndUnresolvable is the responsibility of your plugin,
|
||||
// meaning NodeToStatusReader could have Nodes with UnschedulableAndUnresolvable
|
||||
// and the scheduling framework does call PostFilter plugins even when all Nodes in NodeToStatusReader are UnschedulableAndUnresolvable.
|
||||
//
|
||||
// A PostFilter plugin should return one of the following statuses:
|
||||
// - Unschedulable: the plugin gets executed successfully but the pod cannot be made schedulable.
|
||||
// - Success: the plugin gets executed successfully and the pod can be made schedulable.
|
||||
// - Error: the plugin aborts due to some internal error.
|
||||
//
|
||||
// Informational plugins should be configured ahead of other ones, and always return Unschedulable status.
|
||||
// Optionally, a non-nil PostFilterResult may be returned along with a Success status. For example,
|
||||
// a preemption plugin may choose to return nominatedNodeName, so that framework can reuse that to update the
|
||||
// preemptor pod's .spec.status.nominatedNodeName field.
|
||||
PostFilter(ctx context.Context, state *CycleState, pod *v1.Pod, filteredNodeStatusMap NodeToStatusReader) (*PostFilterResult, *Status)
|
||||
}
|
||||
|
||||
// PreScorePlugin is an interface for "PreScore" plugin. PreScore is an
|
||||
// informational extension point. Plugins will be called with a list of nodes
|
||||
// that passed the filtering phase. A plugin may use this data to update internal
|
||||
// state or to generate logs/metrics.
|
||||
type PreScorePlugin interface {
|
||||
Plugin
|
||||
// PreScore is called by the scheduling framework after a list of nodes
|
||||
// passed the filtering phase. All prescore plugins must return success or
|
||||
// the pod will be rejected
|
||||
// When it returns Skip status, other fields in status are just ignored,
|
||||
// and coupled Score plugin will be skipped in this scheduling cycle.
|
||||
PreScore(ctx context.Context, state *CycleState, pod *v1.Pod, nodes []*NodeInfo) *Status
|
||||
}
|
||||
|
||||
// ScoreExtensions is an interface for Score extended functionality.
|
||||
type ScoreExtensions interface {
|
||||
// NormalizeScore is called for all node scores produced by the same plugin's "Score"
|
||||
// method. A successful run of NormalizeScore will update the scores list and return
|
||||
// a success status.
|
||||
NormalizeScore(ctx context.Context, state *CycleState, p *v1.Pod, scores NodeScoreList) *Status
|
||||
}
|
||||
|
||||
// ScorePlugin is an interface that must be implemented by "Score" plugins to rank
|
||||
// nodes that passed the filtering phase.
|
||||
type ScorePlugin interface {
|
||||
Plugin
|
||||
// Score is called on each filtered node. It must return success and an integer
|
||||
// indicating the rank of the node. All scoring plugins must return success or
|
||||
// the pod will be rejected.
|
||||
Score(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) (int64, *Status)
|
||||
|
||||
// ScoreExtensions returns a ScoreExtensions interface if it implements one, or nil if does not.
|
||||
ScoreExtensions() ScoreExtensions
|
||||
}
|
||||
|
||||
// ReservePlugin is an interface for plugins with Reserve and Unreserve
|
||||
// methods. These are meant to update the state of the plugin. This concept
|
||||
// used to be called 'assume' in the original scheduler. These plugins should
|
||||
// return only Success or Error in Status.code. However, the scheduler accepts
|
||||
// other valid codes as well. Anything other than Success will lead to
|
||||
// rejection of the pod.
|
||||
type ReservePlugin interface {
|
||||
Plugin
|
||||
// Reserve is called by the scheduling framework when the scheduler cache is
|
||||
// updated. If this method returns a failed Status, the scheduler will call
|
||||
// the Unreserve method for all enabled ReservePlugins.
|
||||
Reserve(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status
|
||||
// Unreserve is called by the scheduling framework when a reserved pod was
|
||||
// rejected, an error occurred during reservation of subsequent plugins, or
|
||||
// in a later phase. The Unreserve method implementation must be idempotent
|
||||
// and may be called by the scheduler even if the corresponding Reserve
|
||||
// method for the same plugin was not called.
|
||||
Unreserve(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string)
|
||||
}
|
||||
|
||||
// PreBindPlugin is an interface that must be implemented by "PreBind" plugins.
|
||||
// These plugins are called before a pod being scheduled.
|
||||
type PreBindPlugin interface {
|
||||
Plugin
|
||||
// PreBind is called before binding a pod. All prebind plugins must return
|
||||
// success or the pod will be rejected and won't be sent for binding.
|
||||
PreBind(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status
|
||||
}
|
||||
|
||||
// PostBindPlugin is an interface that must be implemented by "PostBind" plugins.
|
||||
// These plugins are called after a pod is successfully bound to a node.
|
||||
type PostBindPlugin interface {
|
||||
Plugin
|
||||
// PostBind is called after a pod is successfully bound. These plugins are
|
||||
// informational. A common application of this extension point is for cleaning
|
||||
// up. If a plugin needs to clean-up its state after a pod is scheduled and
|
||||
// bound, PostBind is the extension point that it should register.
|
||||
PostBind(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string)
|
||||
}
|
||||
|
||||
// PermitPlugin is an interface that must be implemented by "Permit" plugins.
|
||||
// These plugins are called before a pod is bound to a node.
|
||||
type PermitPlugin interface {
|
||||
Plugin
|
||||
// Permit is called before binding a pod (and before prebind plugins). Permit
|
||||
// plugins are used to prevent or delay the binding of a Pod. A permit plugin
|
||||
// must return success or wait with timeout duration, or the pod will be rejected.
|
||||
// The pod will also be rejected if the wait timeout or the pod is rejected while
|
||||
// waiting. Note that if the plugin returns "wait", the framework will wait only
|
||||
// after running the remaining plugins given that no other plugin rejects the pod.
|
||||
Permit(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) (*Status, time.Duration)
|
||||
}
|
||||
|
||||
// BindPlugin is an interface that must be implemented by "Bind" plugins. Bind
|
||||
// plugins are used to bind a pod to a Node.
|
||||
type BindPlugin interface {
|
||||
Plugin
|
||||
// Bind plugins will not be called until all pre-bind plugins have completed. Each
|
||||
// bind plugin is called in the configured order. A bind plugin may choose whether
|
||||
// or not to handle the given Pod. If a bind plugin chooses to handle a Pod, the
|
||||
// remaining bind plugins are skipped. When a bind plugin does not handle a pod,
|
||||
// it must return Skip in its Status code. If a bind plugin returns an Error, the
|
||||
// pod is rejected and will not be bound.
|
||||
Bind(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status
|
||||
}
|
||||
|
||||
// Framework manages the set of plugins in use by the scheduling framework.
|
||||
// Configured plugins are called at specified points in a scheduling context.
|
||||
type Framework interface {
|
||||
Handle
|
||||
|
||||
// PreEnqueuePlugins returns the registered preEnqueue plugins.
|
||||
PreEnqueuePlugins() []PreEnqueuePlugin
|
||||
|
||||
// EnqueueExtensions returns the registered Enqueue extensions.
|
||||
EnqueueExtensions() []EnqueueExtensions
|
||||
|
||||
// QueueSortFunc returns the function to sort pods in scheduling queue
|
||||
QueueSortFunc() LessFunc
|
||||
|
||||
// RunPreFilterPlugins runs the set of configured PreFilter plugins. It returns
|
||||
// *Status and its code is set to non-success if any of the plugins returns
|
||||
// anything but Success. If a non-success status is returned, then the scheduling
|
||||
// cycle is aborted.
|
||||
// It also returns a PreFilterResult, which may influence what or how many nodes to
|
||||
// evaluate downstream.
|
||||
// The third returns value contains PreFilter plugin that rejected some or all Nodes with PreFilterResult.
|
||||
// But, note that it doesn't contain any plugin when a plugin rejects this Pod with non-success status,
|
||||
// not with PreFilterResult.
|
||||
RunPreFilterPlugins(ctx context.Context, state *CycleState, pod *v1.Pod) (*PreFilterResult, *Status, sets.Set[string])
|
||||
|
||||
// RunPostFilterPlugins runs the set of configured PostFilter plugins.
|
||||
// PostFilter plugins can either be informational, in which case should be configured
|
||||
// to execute first and return Unschedulable status, or ones that try to change the
|
||||
// cluster state to make the pod potentially schedulable in a future scheduling cycle.
|
||||
RunPostFilterPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, filteredNodeStatusMap NodeToStatusReader) (*PostFilterResult, *Status)
|
||||
|
||||
// RunPreBindPlugins runs the set of configured PreBind plugins. It returns
|
||||
// *Status and its code is set to non-success if any of the plugins returns
|
||||
// anything but Success. If the Status code is "Unschedulable", it is
|
||||
// considered as a scheduling check failure, otherwise, it is considered as an
|
||||
// internal error. In either case the pod is not going to be bound.
|
||||
RunPreBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
|
||||
|
||||
// RunPostBindPlugins runs the set of configured PostBind plugins.
|
||||
RunPostBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string)
|
||||
|
||||
// RunReservePluginsReserve runs the Reserve method of the set of
|
||||
// configured Reserve plugins. If any of these calls returns an error, it
|
||||
// does not continue running the remaining ones and returns the error. In
|
||||
// such case, pod will not be scheduled.
|
||||
RunReservePluginsReserve(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
|
||||
|
||||
// RunReservePluginsUnreserve runs the Unreserve method of the set of
|
||||
// configured Reserve plugins.
|
||||
RunReservePluginsUnreserve(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string)
|
||||
|
||||
// RunPermitPlugins runs the set of configured Permit plugins. If any of these
|
||||
// plugins returns a status other than "Success" or "Wait", it does not continue
|
||||
// running the remaining plugins and returns an error. Otherwise, if any of the
|
||||
// plugins returns "Wait", then this function will create and add waiting pod
|
||||
// to a map of currently waiting pods and return status with "Wait" code.
|
||||
// Pod will remain waiting pod for the minimum duration returned by the Permit plugins.
|
||||
RunPermitPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
|
||||
|
||||
// WaitOnPermit will block, if the pod is a waiting pod, until the waiting pod is rejected or allowed.
|
||||
WaitOnPermit(ctx context.Context, pod *v1.Pod) *Status
|
||||
|
||||
// RunBindPlugins runs the set of configured Bind plugins. A Bind plugin may choose
|
||||
// whether or not to handle the given Pod. If a Bind plugin chooses to skip the
|
||||
// binding, it should return code=5("skip") status. Otherwise, it should return "Error"
|
||||
// or "Success". If none of the plugins handled binding, RunBindPlugins returns
|
||||
// code=5("skip") status.
|
||||
RunBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
|
||||
|
||||
// HasFilterPlugins returns true if at least one Filter plugin is defined.
|
||||
HasFilterPlugins() bool
|
||||
|
||||
// HasPostFilterPlugins returns true if at least one PostFilter plugin is defined.
|
||||
HasPostFilterPlugins() bool
|
||||
|
||||
// HasScorePlugins returns true if at least one Score plugin is defined.
|
||||
HasScorePlugins() bool
|
||||
|
||||
// ListPlugins returns a map of extension point name to list of configured Plugins.
|
||||
ListPlugins() *config.Plugins
|
||||
|
||||
// ProfileName returns the profile name associated to a profile.
|
||||
ProfileName() string
|
||||
|
||||
// PercentageOfNodesToScore returns percentageOfNodesToScore associated to a profile.
|
||||
PercentageOfNodesToScore() *int32
|
||||
|
||||
// SetPodNominator sets the PodNominator
|
||||
SetPodNominator(nominator PodNominator)
|
||||
// SetPodActivator sets the PodActivator
|
||||
SetPodActivator(activator PodActivator)
|
||||
|
||||
// Close calls Close method of each plugin.
|
||||
Close() error
|
||||
}
|
||||
|
||||
// Handle provides data and some tools that plugins can use. It is
|
||||
// passed to the plugin factories at the time of plugin initialization. Plugins
|
||||
// must store and use this handle to call framework functions.
|
||||
type Handle interface {
|
||||
// PodNominator abstracts operations to maintain nominated Pods.
|
||||
PodNominator
|
||||
// PluginsRunner abstracts operations to run some plugins.
|
||||
PluginsRunner
|
||||
// PodActivator abstracts operations in the scheduling queue.
|
||||
PodActivator
|
||||
// SnapshotSharedLister returns listers from the latest NodeInfo Snapshot. The snapshot
|
||||
// is taken at the beginning of a scheduling cycle and remains unchanged until
|
||||
// a pod finishes "Permit" point.
|
||||
//
|
||||
// It should be used only during scheduling cycle:
|
||||
// - There is no guarantee that the information remains unchanged in the binding phase of scheduling.
|
||||
// So, plugins shouldn't use it in the binding cycle (pre-bind/bind/post-bind/un-reserve plugin)
|
||||
// otherwise, a concurrent read/write error might occur.
|
||||
// - There is no guarantee that the information is always up-to-date.
|
||||
// So, plugins shouldn't use it in QueueingHint and PreEnqueue
|
||||
// otherwise, they might make a decision based on stale information.
|
||||
//
|
||||
// Instead, they should use the resources getting from Informer created from SharedInformerFactory().
|
||||
SnapshotSharedLister() SharedLister
|
||||
|
||||
// IterateOverWaitingPods acquires a read lock and iterates over the WaitingPods map.
|
||||
IterateOverWaitingPods(callback func(WaitingPod))
|
||||
|
||||
// GetWaitingPod returns a waiting pod given its UID.
|
||||
GetWaitingPod(uid types.UID) WaitingPod
|
||||
|
||||
// RejectWaitingPod rejects a waiting pod given its UID.
|
||||
// The return value indicates if the pod is waiting or not.
|
||||
RejectWaitingPod(uid types.UID) bool
|
||||
|
||||
// ClientSet returns a kubernetes clientSet.
|
||||
ClientSet() clientset.Interface
|
||||
|
||||
// KubeConfig returns the raw kube config.
|
||||
KubeConfig() *restclient.Config
|
||||
|
||||
// EventRecorder returns an event recorder.
|
||||
EventRecorder() events.EventRecorder
|
||||
|
||||
SharedInformerFactory() informers.SharedInformerFactory
|
||||
|
||||
// SharedDRAManager can be used to obtain DRA objects, and track modifications to them in-memory - mainly by the DRA plugin.
|
||||
// A non-default implementation can be plugged into the framework to simulate the state of DRA objects.
|
||||
SharedDRAManager() SharedDRAManager
|
||||
|
||||
// RunFilterPluginsWithNominatedPods runs the set of configured filter plugins for nominated pod on the given node.
|
||||
RunFilterPluginsWithNominatedPods(ctx context.Context, state *CycleState, pod *v1.Pod, info *NodeInfo) *Status
|
||||
|
||||
// Extenders returns registered scheduler extenders.
|
||||
Extenders() []Extender
|
||||
|
||||
// Parallelizer returns a parallelizer holding parallelism for scheduler.
|
||||
Parallelizer() parallelize.Parallelizer
|
||||
}
|
||||
|
||||
// PreFilterResult wraps needed info for scheduler framework to act upon PreFilter phase.
|
||||
type PreFilterResult struct {
|
||||
// The set of nodes that should be considered downstream; if nil then
|
||||
// all nodes are eligible.
|
||||
NodeNames sets.Set[string]
|
||||
}
|
||||
|
||||
func (p *PreFilterResult) AllNodes() bool {
|
||||
return p == nil || p.NodeNames == nil
|
||||
}
|
||||
|
||||
func (p *PreFilterResult) Merge(in *PreFilterResult) *PreFilterResult {
|
||||
if p.AllNodes() && in.AllNodes() {
|
||||
return nil
|
||||
}
|
||||
|
||||
r := PreFilterResult{}
|
||||
if p.AllNodes() {
|
||||
r.NodeNames = in.NodeNames.Clone()
|
||||
return &r
|
||||
}
|
||||
if in.AllNodes() {
|
||||
r.NodeNames = p.NodeNames.Clone()
|
||||
return &r
|
||||
}
|
||||
|
||||
r.NodeNames = p.NodeNames.Intersection(in.NodeNames)
|
||||
return &r
|
||||
}
|
||||
|
||||
type NominatingMode int
|
||||
|
||||
const (
|
||||
ModeNoop NominatingMode = iota
|
||||
ModeOverride
|
||||
)
|
||||
|
||||
type NominatingInfo struct {
|
||||
NominatedNodeName string
|
||||
NominatingMode NominatingMode
|
||||
}
|
||||
|
||||
// PostFilterResult wraps needed info for scheduler framework to act upon PostFilter phase.
|
||||
type PostFilterResult struct {
|
||||
*NominatingInfo
|
||||
}
|
||||
|
||||
func NewPostFilterResultWithNominatedNode(name string) *PostFilterResult {
|
||||
return &PostFilterResult{
|
||||
NominatingInfo: &NominatingInfo{
|
||||
NominatedNodeName: name,
|
||||
NominatingMode: ModeOverride,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (ni *NominatingInfo) Mode() NominatingMode {
|
||||
if ni == nil {
|
||||
return ModeNoop
|
||||
}
|
||||
return ni.NominatingMode
|
||||
}
|
||||
|
||||
// PodActivator abstracts operations in the scheduling queue.
|
||||
type PodActivator interface {
|
||||
// Activate moves the given pods to activeQ.
|
||||
// If a pod isn't found in unschedulablePods or backoffQ and it's in-flight,
|
||||
// the wildcard event is registered so that the pod will be requeued when it comes back.
|
||||
// But, if a pod isn't found in unschedulablePods or backoffQ and it's not in-flight (i.e., completely unknown pod),
|
||||
// Activate would ignore the pod.
|
||||
Activate(logger klog.Logger, pods map[string]*v1.Pod)
|
||||
}
|
||||
|
||||
// PodNominator abstracts operations to maintain nominated Pods.
|
||||
type PodNominator interface {
|
||||
// AddNominatedPod adds the given pod to the nominator or
|
||||
// updates it if it already exists.
|
||||
AddNominatedPod(logger klog.Logger, pod *PodInfo, nominatingInfo *NominatingInfo)
|
||||
// DeleteNominatedPodIfExists deletes nominatedPod from internal cache. It's a no-op if it doesn't exist.
|
||||
DeleteNominatedPodIfExists(pod *v1.Pod)
|
||||
// UpdateNominatedPod updates the <oldPod> with <newPod>.
|
||||
UpdateNominatedPod(logger klog.Logger, oldPod *v1.Pod, newPodInfo *PodInfo)
|
||||
// NominatedPodsForNode returns nominatedPods on the given node.
|
||||
NominatedPodsForNode(nodeName string) []*PodInfo
|
||||
}
|
||||
|
||||
// PluginsRunner abstracts operations to run some plugins.
|
||||
// This is used by preemption PostFilter plugins when evaluating the feasibility of
|
||||
// scheduling the pod on nodes when certain running pods get evicted.
|
||||
type PluginsRunner interface {
|
||||
// RunPreScorePlugins runs the set of configured PreScore plugins. If any
|
||||
// of these plugins returns any status other than "Success", the given pod is rejected.
|
||||
RunPreScorePlugins(context.Context, *CycleState, *v1.Pod, []*NodeInfo) *Status
|
||||
// RunScorePlugins runs the set of configured scoring plugins.
|
||||
// It returns a list that stores scores from each plugin and total score for each Node.
|
||||
// It also returns *Status, which is set to non-success if any of the plugins returns
|
||||
// a non-success status.
|
||||
RunScorePlugins(context.Context, *CycleState, *v1.Pod, []*NodeInfo) ([]NodePluginScores, *Status)
|
||||
// RunFilterPlugins runs the set of configured Filter plugins for pod on
|
||||
// the given node. Note that for the node being evaluated, the passed nodeInfo
|
||||
// reference could be different from the one in NodeInfoSnapshot map (e.g., pods
|
||||
// considered to be running on the node could be different). For example, during
|
||||
// preemption, we may pass a copy of the original nodeInfo object that has some pods
|
||||
// removed from it to evaluate the possibility of preempting them to
|
||||
// schedule the target pod.
|
||||
RunFilterPlugins(context.Context, *CycleState, *v1.Pod, *NodeInfo) *Status
|
||||
// RunPreFilterExtensionAddPod calls the AddPod interface for the set of configured
|
||||
// PreFilter plugins. It returns directly if any of the plugins return any
|
||||
// status other than Success.
|
||||
RunPreFilterExtensionAddPod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podInfoToAdd *PodInfo, nodeInfo *NodeInfo) *Status
|
||||
// RunPreFilterExtensionRemovePod calls the RemovePod interface for the set of configured
|
||||
// PreFilter plugins. It returns directly if any of the plugins return any
|
||||
// status other than Success.
|
||||
RunPreFilterExtensionRemovePod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podInfoToRemove *PodInfo, nodeInfo *NodeInfo) *Status
|
||||
}
|
111
vendor/k8s.io/kubernetes/pkg/scheduler/framework/listers.go
generated
vendored
Normal file
111
vendor/k8s.io/kubernetes/pkg/scheduler/framework/listers.go
generated
vendored
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package framework
|
||||
|
||||
import (
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/dynamic-resource-allocation/structured"
|
||||
)
|
||||
|
||||
// NodeInfoLister interface represents anything that can list/get NodeInfo objects from node name.
|
||||
type NodeInfoLister interface {
|
||||
// List returns the list of NodeInfos.
|
||||
List() ([]*NodeInfo, error)
|
||||
// HavePodsWithAffinityList returns the list of NodeInfos of nodes with pods with affinity terms.
|
||||
HavePodsWithAffinityList() ([]*NodeInfo, error)
|
||||
// HavePodsWithRequiredAntiAffinityList returns the list of NodeInfos of nodes with pods with required anti-affinity terms.
|
||||
HavePodsWithRequiredAntiAffinityList() ([]*NodeInfo, error)
|
||||
// Get returns the NodeInfo of the given node name.
|
||||
Get(nodeName string) (*NodeInfo, error)
|
||||
}
|
||||
|
||||
// StorageInfoLister interface represents anything that handles storage-related operations and resources.
|
||||
type StorageInfoLister interface {
|
||||
// IsPVCUsedByPods returns true/false on whether the PVC is used by one or more scheduled pods,
|
||||
// keyed in the format "namespace/name".
|
||||
IsPVCUsedByPods(key string) bool
|
||||
}
|
||||
|
||||
// SharedLister groups scheduler-specific listers.
|
||||
type SharedLister interface {
|
||||
NodeInfos() NodeInfoLister
|
||||
StorageInfos() StorageInfoLister
|
||||
}
|
||||
|
||||
// ResourceSliceLister can be used to obtain ResourceSlices.
|
||||
type ResourceSliceLister interface {
|
||||
// List returns a list of all ResourceSlices.
|
||||
List() ([]*resourceapi.ResourceSlice, error)
|
||||
}
|
||||
|
||||
// DeviceClassLister can be used to obtain DeviceClasses.
|
||||
type DeviceClassLister interface {
|
||||
// List returns a list of all DeviceClasses.
|
||||
List() ([]*resourceapi.DeviceClass, error)
|
||||
// Get returns the DeviceClass with the given className.
|
||||
Get(className string) (*resourceapi.DeviceClass, error)
|
||||
}
|
||||
|
||||
// ResourceClaimTracker can be used to obtain ResourceClaims, and track changes to ResourceClaims in-memory.
|
||||
//
|
||||
// If the claims are meant to be allocated in the API during the binding phase (when used by scheduler), the tracker helps avoid
|
||||
// race conditions between scheduling and binding phases (as well as between the binding phase and the informer cache update).
|
||||
//
|
||||
// If the binding phase is not run (e.g. when used by Cluster Autoscaler which only runs the scheduling phase, and simulates binding in-memory),
|
||||
// the tracker allows the framework user to obtain the claim allocations produced by the DRA plugin, and persist them outside of the API (e.g. in-memory).
|
||||
type ResourceClaimTracker interface {
|
||||
// List lists ResourceClaims. The result is guaranteed to immediately include any changes made via AssumeClaimAfterAPICall(),
|
||||
// and SignalClaimPendingAllocation().
|
||||
List() ([]*resourceapi.ResourceClaim, error)
|
||||
// Get works like List(), but for a single claim.
|
||||
Get(namespace, claimName string) (*resourceapi.ResourceClaim, error)
|
||||
// ListAllAllocatedDevices lists all allocated Devices from allocated ResourceClaims. The result is guaranteed to immediately include
|
||||
// any changes made via AssumeClaimAfterAPICall(), and SignalClaimPendingAllocation().
|
||||
ListAllAllocatedDevices() (sets.Set[structured.DeviceID], error)
|
||||
|
||||
// SignalClaimPendingAllocation signals to the tracker that the given ResourceClaim will be allocated via an API call in the
|
||||
// binding phase. This change is immediately reflected in the result of List() and the other accessors.
|
||||
SignalClaimPendingAllocation(claimUID types.UID, allocatedClaim *resourceapi.ResourceClaim) error
|
||||
// ClaimHasPendingAllocation answers whether a given claim has a pending allocation during the binding phase. It can be used to avoid
|
||||
// race conditions in subsequent scheduling phases.
|
||||
ClaimHasPendingAllocation(claimUID types.UID) bool
|
||||
// RemoveClaimPendingAllocation removes the pending allocation for the given ResourceClaim from the tracker if any was signaled via
|
||||
// SignalClaimPendingAllocation(). Returns whether there was a pending allocation to remove. List() and the other accessors immediately
|
||||
// stop reflecting the pending allocation in the results.
|
||||
RemoveClaimPendingAllocation(claimUID types.UID) (deleted bool)
|
||||
|
||||
// AssumeClaimAfterAPICall signals to the tracker that an API call modifying the given ResourceClaim was made in the binding phase, and the
|
||||
// changes should be reflected in informers very soon. This change is immediately reflected in the result of List() and the other accessors.
|
||||
// This mechanism can be used to avoid race conditions between the informer update and subsequent scheduling phases.
|
||||
AssumeClaimAfterAPICall(claim *resourceapi.ResourceClaim) error
|
||||
// AssumedClaimRestore signals to the tracker that something went wrong with the API call modifying the given ResourceClaim, and
|
||||
// the changes won't be reflected in informers after all. List() and the other accessors immediately stop reflecting the assumed change,
|
||||
// and go back to the informer version.
|
||||
AssumedClaimRestore(namespace, claimName string)
|
||||
}
|
||||
|
||||
// SharedDRAManager can be used to obtain DRA objects, and track modifications to them in-memory - mainly by the DRA plugin.
|
||||
// The plugin's default implementation obtains the objects from the API. A different implementation can be
|
||||
// plugged into the framework in order to simulate the state of DRA objects. For example, Cluster Autoscaler
|
||||
// can use this to provide the correct DRA object state to the DRA plugin when simulating scheduling changes in-memory.
|
||||
type SharedDRAManager interface {
|
||||
ResourceClaims() ResourceClaimTracker
|
||||
ResourceSlices() ResourceSliceLister
|
||||
DeviceClasses() DeviceClassLister
|
||||
}
|
59
vendor/k8s.io/kubernetes/pkg/scheduler/framework/parallelize/error_channel.go
generated
vendored
Normal file
59
vendor/k8s.io/kubernetes/pkg/scheduler/framework/parallelize/error_channel.go
generated
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package parallelize
|
||||
|
||||
import "context"
|
||||
|
||||
// ErrorChannel supports non-blocking send and receive operation to capture error.
|
||||
// A maximum of one error is kept in the channel and the rest of the errors sent
|
||||
// are ignored, unless the existing error is received and the channel becomes empty
|
||||
// again.
|
||||
type ErrorChannel struct {
|
||||
errCh chan error
|
||||
}
|
||||
|
||||
// SendError sends an error without blocking the sender.
|
||||
func (e *ErrorChannel) SendError(err error) {
|
||||
select {
|
||||
case e.errCh <- err:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// SendErrorWithCancel sends an error without blocking the sender and calls
|
||||
// cancel function.
|
||||
func (e *ErrorChannel) SendErrorWithCancel(err error, cancel context.CancelFunc) {
|
||||
e.SendError(err)
|
||||
cancel()
|
||||
}
|
||||
|
||||
// ReceiveError receives an error from channel without blocking on the receiver.
|
||||
func (e *ErrorChannel) ReceiveError() error {
|
||||
select {
|
||||
case err := <-e.errCh:
|
||||
return err
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// NewErrorChannel returns a new ErrorChannel.
|
||||
func NewErrorChannel() *ErrorChannel {
|
||||
return &ErrorChannel{
|
||||
errCh: make(chan error, 1),
|
||||
}
|
||||
}
|
65
vendor/k8s.io/kubernetes/pkg/scheduler/framework/parallelize/parallelism.go
generated
vendored
Normal file
65
vendor/k8s.io/kubernetes/pkg/scheduler/framework/parallelize/parallelism.go
generated
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package parallelize
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
)
|
||||
|
||||
// DefaultParallelism is the default parallelism used in scheduler.
|
||||
const DefaultParallelism int = 16
|
||||
|
||||
// Parallelizer holds the parallelism for scheduler.
|
||||
type Parallelizer struct {
|
||||
parallelism int
|
||||
}
|
||||
|
||||
// NewParallelizer returns an object holding the parallelism.
|
||||
func NewParallelizer(p int) Parallelizer {
|
||||
return Parallelizer{parallelism: p}
|
||||
}
|
||||
|
||||
// chunkSizeFor returns a chunk size for the given number of items to use for
|
||||
// parallel work. The size aims to produce good CPU utilization.
|
||||
// returns max(1, min(sqrt(n), n/Parallelism))
|
||||
func chunkSizeFor(n, parallelism int) int {
|
||||
s := int(math.Sqrt(float64(n)))
|
||||
|
||||
if r := n/parallelism + 1; s > r {
|
||||
s = r
|
||||
} else if s < 1 {
|
||||
s = 1
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Until is a wrapper around workqueue.ParallelizeUntil to use in scheduling algorithms.
|
||||
// A given operation will be a label that is recorded in the goroutine metric.
|
||||
func (p Parallelizer) Until(ctx context.Context, pieces int, doWorkPiece workqueue.DoWorkPieceFunc, operation string) {
|
||||
goroutinesMetric := metrics.Goroutines.WithLabelValues(operation)
|
||||
withMetrics := func(piece int) {
|
||||
goroutinesMetric.Inc()
|
||||
doWorkPiece(piece)
|
||||
goroutinesMetric.Dec()
|
||||
}
|
||||
|
||||
workqueue.ParallelizeUntil(ctx, p.parallelism, pieces, withMetrics, workqueue.WithChunkSize(chunkSizeFor(pieces, p.parallelism)))
|
||||
}
|
3
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/README.md
generated
vendored
Normal file
3
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/README.md
generated
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Scheduler Framework Plugins
|
||||
|
||||
Moved [here](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-scheduling/scheduler_framework_plugins.md).
|
63
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go
generated
vendored
Normal file
63
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go
generated
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package defaultbinder
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
)
|
||||
|
||||
// Name of the plugin used in the plugin registry and configurations.
|
||||
const Name = names.DefaultBinder
|
||||
|
||||
// DefaultBinder binds pods to nodes using a k8s client.
|
||||
type DefaultBinder struct {
|
||||
handle framework.Handle
|
||||
}
|
||||
|
||||
var _ framework.BindPlugin = &DefaultBinder{}
|
||||
|
||||
// New creates a DefaultBinder.
|
||||
func New(_ context.Context, _ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
|
||||
return &DefaultBinder{handle: handle}, nil
|
||||
}
|
||||
|
||||
// Name returns the name of the plugin.
|
||||
func (b DefaultBinder) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// Bind binds pods to nodes using the k8s client.
|
||||
func (b DefaultBinder) Bind(ctx context.Context, state *framework.CycleState, p *v1.Pod, nodeName string) *framework.Status {
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(3).Info("Attempting to bind pod to node", "pod", klog.KObj(p), "node", klog.KRef("", nodeName))
|
||||
binding := &v1.Binding{
|
||||
ObjectMeta: metav1.ObjectMeta{Namespace: p.Namespace, Name: p.Name, UID: p.UID},
|
||||
Target: v1.ObjectReference{Kind: "Node", Name: nodeName},
|
||||
}
|
||||
err := b.handle.ClientSet().CoreV1().Pods(binding.Namespace).Bind(ctx, binding, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
return nil
|
||||
}
|
364
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go
generated
vendored
Normal file
364
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go
generated
vendored
Normal file
@ -0,0 +1,364 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package defaultpreemption
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sort"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policy "k8s.io/api/policy/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
policylisters "k8s.io/client-go/listers/policy/v1"
|
||||
corev1helpers "k8s.io/component-helpers/scheduling/corev1"
|
||||
"k8s.io/klog/v2"
|
||||
extenderv1 "k8s.io/kube-scheduler/extender/v1"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/preemption"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// Name of the plugin used in the plugin registry and configurations.
|
||||
const Name = names.DefaultPreemption
|
||||
|
||||
// DefaultPreemption is a PostFilter plugin implements the preemption logic.
|
||||
type DefaultPreemption struct {
|
||||
fh framework.Handle
|
||||
fts feature.Features
|
||||
args config.DefaultPreemptionArgs
|
||||
podLister corelisters.PodLister
|
||||
pdbLister policylisters.PodDisruptionBudgetLister
|
||||
Evaluator *preemption.Evaluator
|
||||
}
|
||||
|
||||
var _ framework.PostFilterPlugin = &DefaultPreemption{}
|
||||
var _ framework.PreEnqueuePlugin = &DefaultPreemption{}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *DefaultPreemption) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, dpArgs runtime.Object, fh framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
args, ok := dpArgs.(*config.DefaultPreemptionArgs)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("got args of type %T, want *DefaultPreemptionArgs", dpArgs)
|
||||
}
|
||||
if err := validation.ValidateDefaultPreemptionArgs(nil, args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
podLister := fh.SharedInformerFactory().Core().V1().Pods().Lister()
|
||||
pdbLister := getPDBLister(fh.SharedInformerFactory())
|
||||
|
||||
pl := DefaultPreemption{
|
||||
fh: fh,
|
||||
fts: fts,
|
||||
args: *args,
|
||||
podLister: podLister,
|
||||
pdbLister: pdbLister,
|
||||
}
|
||||
pl.Evaluator = preemption.NewEvaluator(Name, fh, &pl, fts.EnableAsyncPreemption)
|
||||
|
||||
return &pl, nil
|
||||
}
|
||||
|
||||
// PostFilter invoked at the postFilter extension point.
|
||||
func (pl *DefaultPreemption) PostFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, m framework.NodeToStatusReader) (*framework.PostFilterResult, *framework.Status) {
|
||||
defer func() {
|
||||
metrics.PreemptionAttempts.Inc()
|
||||
}()
|
||||
|
||||
result, status := pl.Evaluator.Preempt(ctx, state, pod, m)
|
||||
msg := status.Message()
|
||||
if len(msg) > 0 {
|
||||
return result, framework.NewStatus(status.Code(), "preemption: "+msg)
|
||||
}
|
||||
return result, status
|
||||
}
|
||||
|
||||
func (pl *DefaultPreemption) PreEnqueue(ctx context.Context, p *v1.Pod) *framework.Status {
|
||||
if !pl.fts.EnableAsyncPreemption {
|
||||
return nil
|
||||
}
|
||||
if pl.Evaluator.IsPodRunningPreemption(p.GetUID()) {
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, "waiting for the preemption for this pod to be finished")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *DefaultPreemption) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
// The plugin moves the preemptor Pod to acviteQ/backoffQ once the preemption API calls are all done,
|
||||
// and we don't need to move the Pod with any events.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// calculateNumCandidates returns the number of candidates the FindCandidates
|
||||
// method must produce from dry running based on the constraints given by
|
||||
// <minCandidateNodesPercentage> and <minCandidateNodesAbsolute>. The number of
|
||||
// candidates returned will never be greater than <numNodes>.
|
||||
func (pl *DefaultPreemption) calculateNumCandidates(numNodes int32) int32 {
|
||||
n := (numNodes * pl.args.MinCandidateNodesPercentage) / 100
|
||||
if n < pl.args.MinCandidateNodesAbsolute {
|
||||
n = pl.args.MinCandidateNodesAbsolute
|
||||
}
|
||||
if n > numNodes {
|
||||
n = numNodes
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// GetOffsetAndNumCandidates chooses a random offset and calculates the number
|
||||
// of candidates that should be shortlisted for dry running preemption.
|
||||
func (pl *DefaultPreemption) GetOffsetAndNumCandidates(numNodes int32) (int32, int32) {
|
||||
return rand.Int31n(numNodes), pl.calculateNumCandidates(numNodes)
|
||||
}
|
||||
|
||||
// This function is not applicable for out-of-tree preemption plugins that exercise
|
||||
// different preemption candidates on the same nominated node.
|
||||
func (pl *DefaultPreemption) CandidatesToVictimsMap(candidates []preemption.Candidate) map[string]*extenderv1.Victims {
|
||||
m := make(map[string]*extenderv1.Victims, len(candidates))
|
||||
for _, c := range candidates {
|
||||
m[c.Name()] = c.Victims()
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// SelectVictimsOnNode finds minimum set of pods on the given node that should be preempted in order to make enough room
|
||||
// for "pod" to be scheduled.
|
||||
func (pl *DefaultPreemption) SelectVictimsOnNode(
|
||||
ctx context.Context,
|
||||
state *framework.CycleState,
|
||||
pod *v1.Pod,
|
||||
nodeInfo *framework.NodeInfo,
|
||||
pdbs []*policy.PodDisruptionBudget) ([]*v1.Pod, int, *framework.Status) {
|
||||
logger := klog.FromContext(ctx)
|
||||
var potentialVictims []*framework.PodInfo
|
||||
removePod := func(rpi *framework.PodInfo) error {
|
||||
if err := nodeInfo.RemovePod(logger, rpi.Pod); err != nil {
|
||||
return err
|
||||
}
|
||||
status := pl.fh.RunPreFilterExtensionRemovePod(ctx, state, pod, rpi, nodeInfo)
|
||||
if !status.IsSuccess() {
|
||||
return status.AsError()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
addPod := func(api *framework.PodInfo) error {
|
||||
nodeInfo.AddPodInfo(api)
|
||||
status := pl.fh.RunPreFilterExtensionAddPod(ctx, state, pod, api, nodeInfo)
|
||||
if !status.IsSuccess() {
|
||||
return status.AsError()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// As the first step, remove all the lower priority pods from the node and
|
||||
// check if the given pod can be scheduled.
|
||||
podPriority := corev1helpers.PodPriority(pod)
|
||||
for _, pi := range nodeInfo.Pods {
|
||||
if corev1helpers.PodPriority(pi.Pod) < podPriority {
|
||||
potentialVictims = append(potentialVictims, pi)
|
||||
if err := removePod(pi); err != nil {
|
||||
return nil, 0, framework.AsStatus(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No potential victims are found, and so we don't need to evaluate the node again since its state didn't change.
|
||||
if len(potentialVictims) == 0 {
|
||||
return nil, 0, framework.NewStatus(framework.UnschedulableAndUnresolvable, "No preemption victims found for incoming pod")
|
||||
}
|
||||
|
||||
// If the new pod does not fit after removing all the lower priority pods,
|
||||
// we are almost done and this node is not suitable for preemption. The only
|
||||
// condition that we could check is if the "pod" is failing to schedule due to
|
||||
// inter-pod affinity to one or more victims, but we have decided not to
|
||||
// support this case for performance reasons. Having affinity to lower
|
||||
// priority pods is not a recommended configuration anyway.
|
||||
if status := pl.fh.RunFilterPluginsWithNominatedPods(ctx, state, pod, nodeInfo); !status.IsSuccess() {
|
||||
return nil, 0, status
|
||||
}
|
||||
var victims []*v1.Pod
|
||||
numViolatingVictim := 0
|
||||
// Sort potentialVictims by pod priority from high to low, which ensures to
|
||||
// reprieve higher priority pods first.
|
||||
sort.Slice(potentialVictims, func(i, j int) bool { return util.MoreImportantPod(potentialVictims[i].Pod, potentialVictims[j].Pod) })
|
||||
// Try to reprieve as many pods as possible. We first try to reprieve the PDB
|
||||
// violating victims and then other non-violating ones. In both cases, we start
|
||||
// from the highest priority victims.
|
||||
violatingVictims, nonViolatingVictims := filterPodsWithPDBViolation(potentialVictims, pdbs)
|
||||
reprievePod := func(pi *framework.PodInfo) (bool, error) {
|
||||
if err := addPod(pi); err != nil {
|
||||
return false, err
|
||||
}
|
||||
status := pl.fh.RunFilterPluginsWithNominatedPods(ctx, state, pod, nodeInfo)
|
||||
fits := status.IsSuccess()
|
||||
if !fits {
|
||||
if err := removePod(pi); err != nil {
|
||||
return false, err
|
||||
}
|
||||
rpi := pi.Pod
|
||||
victims = append(victims, rpi)
|
||||
logger.V(5).Info("Pod is a potential preemption victim on node", "pod", klog.KObj(rpi), "node", klog.KObj(nodeInfo.Node()))
|
||||
}
|
||||
return fits, nil
|
||||
}
|
||||
for _, p := range violatingVictims {
|
||||
if fits, err := reprievePod(p); err != nil {
|
||||
return nil, 0, framework.AsStatus(err)
|
||||
} else if !fits {
|
||||
numViolatingVictim++
|
||||
}
|
||||
}
|
||||
// Now we try to reprieve non-violating victims.
|
||||
for _, p := range nonViolatingVictims {
|
||||
if _, err := reprievePod(p); err != nil {
|
||||
return nil, 0, framework.AsStatus(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Sort victims after reprieving pods to keep the pods in the victims sorted in order of priority from high to low.
|
||||
if len(violatingVictims) != 0 && len(nonViolatingVictims) != 0 {
|
||||
sort.Slice(victims, func(i, j int) bool { return util.MoreImportantPod(victims[i], victims[j]) })
|
||||
}
|
||||
return victims, numViolatingVictim, framework.NewStatus(framework.Success)
|
||||
}
|
||||
|
||||
// PodEligibleToPreemptOthers returns one bool and one string. The bool
|
||||
// indicates whether this pod should be considered for preempting other pods or
|
||||
// not. The string includes the reason if this pod isn't eligible.
|
||||
// There're several reasons:
|
||||
// 1. The pod has a preemptionPolicy of Never.
|
||||
// 2. The pod has already preempted other pods and the victims are in their graceful termination period.
|
||||
// Currently we check the node that is nominated for this pod, and as long as there are
|
||||
// terminating pods on this node, we don't attempt to preempt more pods.
|
||||
func (pl *DefaultPreemption) PodEligibleToPreemptOthers(_ context.Context, pod *v1.Pod, nominatedNodeStatus *framework.Status) (bool, string) {
|
||||
if pod.Spec.PreemptionPolicy != nil && *pod.Spec.PreemptionPolicy == v1.PreemptNever {
|
||||
return false, "not eligible due to preemptionPolicy=Never."
|
||||
}
|
||||
|
||||
nodeInfos := pl.fh.SnapshotSharedLister().NodeInfos()
|
||||
nomNodeName := pod.Status.NominatedNodeName
|
||||
if len(nomNodeName) > 0 {
|
||||
// If the pod's nominated node is considered as UnschedulableAndUnresolvable by the filters,
|
||||
// then the pod should be considered for preempting again.
|
||||
if nominatedNodeStatus.Code() == framework.UnschedulableAndUnresolvable {
|
||||
return true, ""
|
||||
}
|
||||
|
||||
if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil {
|
||||
podPriority := corev1helpers.PodPriority(pod)
|
||||
for _, p := range nodeInfo.Pods {
|
||||
if corev1helpers.PodPriority(p.Pod) < podPriority && podTerminatingByPreemption(p.Pod) {
|
||||
// There is a terminating pod on the nominated node.
|
||||
return false, "not eligible due to a terminating pod on the nominated node."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true, ""
|
||||
}
|
||||
|
||||
// OrderedScoreFuncs returns a list of ordered score functions to select preferable node where victims will be preempted.
|
||||
func (pl *DefaultPreemption) OrderedScoreFuncs(ctx context.Context, nodesToVictims map[string]*extenderv1.Victims) []func(node string) int64 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// podTerminatingByPreemption returns true if the pod is in the termination state caused by scheduler preemption.
|
||||
func podTerminatingByPreemption(p *v1.Pod) bool {
|
||||
if p.DeletionTimestamp == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, condition := range p.Status.Conditions {
|
||||
if condition.Type == v1.DisruptionTarget {
|
||||
return condition.Status == v1.ConditionTrue && condition.Reason == v1.PodReasonPreemptionByScheduler
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// filterPodsWithPDBViolation groups the given "pods" into two groups of "violatingPods"
|
||||
// and "nonViolatingPods" based on whether their PDBs will be violated if they are
|
||||
// preempted.
|
||||
// This function is stable and does not change the order of received pods. So, if it
|
||||
// receives a sorted list, grouping will preserve the order of the input list.
|
||||
func filterPodsWithPDBViolation(podInfos []*framework.PodInfo, pdbs []*policy.PodDisruptionBudget) (violatingPodInfos, nonViolatingPodInfos []*framework.PodInfo) {
|
||||
pdbsAllowed := make([]int32, len(pdbs))
|
||||
for i, pdb := range pdbs {
|
||||
pdbsAllowed[i] = pdb.Status.DisruptionsAllowed
|
||||
}
|
||||
|
||||
for _, podInfo := range podInfos {
|
||||
pod := podInfo.Pod
|
||||
pdbForPodIsViolated := false
|
||||
// A pod with no labels will not match any PDB. So, no need to check.
|
||||
if len(pod.Labels) != 0 {
|
||||
for i, pdb := range pdbs {
|
||||
if pdb.Namespace != pod.Namespace {
|
||||
continue
|
||||
}
|
||||
selector, err := metav1.LabelSelectorAsSelector(pdb.Spec.Selector)
|
||||
if err != nil {
|
||||
// This object has an invalid selector, it does not match the pod
|
||||
continue
|
||||
}
|
||||
// A PDB with a nil or empty selector matches nothing.
|
||||
if selector.Empty() || !selector.Matches(labels.Set(pod.Labels)) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Existing in DisruptedPods means it has been processed in API server,
|
||||
// we don't treat it as a violating case.
|
||||
if _, exist := pdb.Status.DisruptedPods[pod.Name]; exist {
|
||||
continue
|
||||
}
|
||||
// Only decrement the matched pdb when it's not in its <DisruptedPods>;
|
||||
// otherwise we may over-decrement the budget number.
|
||||
pdbsAllowed[i]--
|
||||
// We have found a matching PDB.
|
||||
if pdbsAllowed[i] < 0 {
|
||||
pdbForPodIsViolated = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if pdbForPodIsViolated {
|
||||
violatingPodInfos = append(violatingPodInfos, podInfo)
|
||||
} else {
|
||||
nonViolatingPodInfos = append(nonViolatingPodInfos, podInfo)
|
||||
}
|
||||
}
|
||||
return violatingPodInfos, nonViolatingPodInfos
|
||||
}
|
||||
|
||||
func getPDBLister(informerFactory informers.SharedInformerFactory) policylisters.PodDisruptionBudgetLister {
|
||||
return informerFactory.Policy().V1().PodDisruptionBudgets().Lister()
|
||||
}
|
9
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources/OWNERS
generated
vendored
Normal file
9
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources/OWNERS
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
reviewers:
|
||||
- klueska
|
||||
- pohly
|
||||
- bart0sh
|
||||
labels:
|
||||
- sig/node
|
||||
- wg/device-management
|
175
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources/allocateddevices.go
generated
vendored
Normal file
175
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources/allocateddevices.go
generated
vendored
Normal file
@ -0,0 +1,175 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dynamicresources
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/dynamic-resource-allocation/structured"
|
||||
"k8s.io/klog/v2"
|
||||
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
// foreachAllocatedDevice invokes the provided callback for each
|
||||
// device in the claim's allocation result which was allocated
|
||||
// exclusively for the claim.
|
||||
//
|
||||
// Devices allocated with admin access can be shared with other
|
||||
// claims and are skipped without invoking the callback.
|
||||
//
|
||||
// foreachAllocatedDevice does nothing if the claim is not allocated.
|
||||
func foreachAllocatedDevice(claim *resourceapi.ResourceClaim, cb func(deviceID structured.DeviceID)) {
|
||||
if claim.Status.Allocation == nil {
|
||||
return
|
||||
}
|
||||
for _, result := range claim.Status.Allocation.Devices.Results {
|
||||
// Kubernetes 1.31 did not set this, 1.32 always does.
|
||||
// Supporting 1.31 is not worth the additional code that
|
||||
// would have to be written (= looking up in request) because
|
||||
// it is extremely unlikely that there really is a result
|
||||
// that still exists in a cluster from 1.31 where this matters.
|
||||
if ptr.Deref(result.AdminAccess, false) {
|
||||
// Is not considered as allocated.
|
||||
continue
|
||||
}
|
||||
deviceID := structured.MakeDeviceID(result.Driver, result.Pool, result.Device)
|
||||
|
||||
// None of the users of this helper need to abort iterating,
|
||||
// therefore it's not supported as it only would add overhead.
|
||||
cb(deviceID)
|
||||
}
|
||||
}
|
||||
|
||||
// allocatedDevices reacts to events in a cache and maintains a set of all allocated devices.
|
||||
// This is cheaper than repeatedly calling List, making strings unique, and building the set
|
||||
// each time PreFilter is called.
|
||||
//
|
||||
// All methods are thread-safe. Get returns a cloned set.
|
||||
type allocatedDevices struct {
|
||||
logger klog.Logger
|
||||
|
||||
mutex sync.RWMutex
|
||||
ids sets.Set[structured.DeviceID]
|
||||
}
|
||||
|
||||
func newAllocatedDevices(logger klog.Logger) *allocatedDevices {
|
||||
return &allocatedDevices{
|
||||
logger: logger,
|
||||
ids: sets.New[structured.DeviceID](),
|
||||
}
|
||||
}
|
||||
|
||||
func (a *allocatedDevices) Get() sets.Set[structured.DeviceID] {
|
||||
a.mutex.RLock()
|
||||
defer a.mutex.RUnlock()
|
||||
|
||||
return a.ids.Clone()
|
||||
}
|
||||
|
||||
func (a *allocatedDevices) handlers() cache.ResourceEventHandler {
|
||||
return cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: a.onAdd,
|
||||
UpdateFunc: a.onUpdate,
|
||||
DeleteFunc: a.onDelete,
|
||||
}
|
||||
}
|
||||
|
||||
func (a *allocatedDevices) onAdd(obj any) {
|
||||
claim, _, err := schedutil.As[*resourceapi.ResourceClaim](obj, nil)
|
||||
if err != nil {
|
||||
// Shouldn't happen.
|
||||
a.logger.Error(err, "unexpected object in allocatedDevices.onAdd")
|
||||
return
|
||||
}
|
||||
|
||||
if claim.Status.Allocation != nil {
|
||||
a.addDevices(claim)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *allocatedDevices) onUpdate(oldObj, newObj any) {
|
||||
originalClaim, modifiedClaim, err := schedutil.As[*resourceapi.ResourceClaim](oldObj, newObj)
|
||||
if err != nil {
|
||||
// Shouldn't happen.
|
||||
a.logger.Error(err, "unexpected object in allocatedDevices.onUpdate")
|
||||
return
|
||||
}
|
||||
|
||||
switch {
|
||||
case originalClaim.Status.Allocation == nil && modifiedClaim.Status.Allocation != nil:
|
||||
a.addDevices(modifiedClaim)
|
||||
case originalClaim.Status.Allocation != nil && modifiedClaim.Status.Allocation == nil:
|
||||
a.removeDevices(originalClaim)
|
||||
default:
|
||||
// Nothing to do. Either both nil or both non-nil, in which case the content
|
||||
// also must be the same (immutable!).
|
||||
}
|
||||
}
|
||||
|
||||
func (a *allocatedDevices) onDelete(obj any) {
|
||||
claim, _, err := schedutil.As[*resourceapi.ResourceClaim](obj, nil)
|
||||
if err != nil {
|
||||
// Shouldn't happen.
|
||||
a.logger.Error(err, "unexpected object in allocatedDevices.onDelete")
|
||||
return
|
||||
}
|
||||
|
||||
a.removeDevices(claim)
|
||||
}
|
||||
|
||||
func (a *allocatedDevices) addDevices(claim *resourceapi.ResourceClaim) {
|
||||
if claim.Status.Allocation == nil {
|
||||
return
|
||||
}
|
||||
// Locking of the mutex gets minimized by pre-computing what needs to be done
|
||||
// without holding the lock.
|
||||
deviceIDs := make([]structured.DeviceID, 0, 20)
|
||||
foreachAllocatedDevice(claim, func(deviceID structured.DeviceID) {
|
||||
a.logger.V(6).Info("Observed device allocation", "device", deviceID, "claim", klog.KObj(claim))
|
||||
deviceIDs = append(deviceIDs, deviceID)
|
||||
})
|
||||
|
||||
a.mutex.Lock()
|
||||
defer a.mutex.Unlock()
|
||||
for _, deviceID := range deviceIDs {
|
||||
a.ids.Insert(deviceID)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *allocatedDevices) removeDevices(claim *resourceapi.ResourceClaim) {
|
||||
if claim.Status.Allocation == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Locking of the mutex gets minimized by pre-computing what needs to be done
|
||||
// without holding the lock.
|
||||
deviceIDs := make([]structured.DeviceID, 0, 20)
|
||||
foreachAllocatedDevice(claim, func(deviceID structured.DeviceID) {
|
||||
a.logger.V(6).Info("Observed device deallocation", "device", deviceID, "claim", klog.KObj(claim))
|
||||
deviceIDs = append(deviceIDs, deviceID)
|
||||
})
|
||||
|
||||
a.mutex.Lock()
|
||||
defer a.mutex.Unlock()
|
||||
for _, deviceID := range deviceIDs {
|
||||
a.ids.Delete(deviceID)
|
||||
}
|
||||
}
|
226
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources/dra_manager.go
generated
vendored
Normal file
226
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources/dra_manager.go
generated
vendored
Normal file
@ -0,0 +1,226 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dynamicresources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/informers"
|
||||
resourcelisters "k8s.io/client-go/listers/resource/v1beta1"
|
||||
"k8s.io/dynamic-resource-allocation/structured"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util/assumecache"
|
||||
)
|
||||
|
||||
var _ framework.SharedDRAManager = &DefaultDRAManager{}
|
||||
|
||||
// DefaultDRAManager is the default implementation of SharedDRAManager. It obtains the DRA objects
|
||||
// from API informers, and uses an AssumeCache and a map of in-flight allocations in order
|
||||
// to avoid race conditions when modifying ResourceClaims.
|
||||
type DefaultDRAManager struct {
|
||||
resourceClaimTracker *claimTracker
|
||||
resourceSliceLister *resourceSliceLister
|
||||
deviceClassLister *deviceClassLister
|
||||
}
|
||||
|
||||
func NewDRAManager(ctx context.Context, claimsCache *assumecache.AssumeCache, informerFactory informers.SharedInformerFactory) *DefaultDRAManager {
|
||||
logger := klog.FromContext(ctx)
|
||||
manager := &DefaultDRAManager{
|
||||
resourceClaimTracker: &claimTracker{
|
||||
cache: claimsCache,
|
||||
inFlightAllocations: &sync.Map{},
|
||||
allocatedDevices: newAllocatedDevices(logger),
|
||||
logger: logger,
|
||||
},
|
||||
resourceSliceLister: &resourceSliceLister{sliceLister: informerFactory.Resource().V1beta1().ResourceSlices().Lister()},
|
||||
deviceClassLister: &deviceClassLister{classLister: informerFactory.Resource().V1beta1().DeviceClasses().Lister()},
|
||||
}
|
||||
|
||||
// Reacting to events is more efficient than iterating over the list
|
||||
// repeatedly in PreFilter.
|
||||
manager.resourceClaimTracker.cache.AddEventHandler(manager.resourceClaimTracker.allocatedDevices.handlers())
|
||||
|
||||
return manager
|
||||
}
|
||||
|
||||
func (s *DefaultDRAManager) ResourceClaims() framework.ResourceClaimTracker {
|
||||
return s.resourceClaimTracker
|
||||
}
|
||||
|
||||
func (s *DefaultDRAManager) ResourceSlices() framework.ResourceSliceLister {
|
||||
return s.resourceSliceLister
|
||||
}
|
||||
|
||||
func (s *DefaultDRAManager) DeviceClasses() framework.DeviceClassLister {
|
||||
return s.deviceClassLister
|
||||
}
|
||||
|
||||
var _ framework.ResourceSliceLister = &resourceSliceLister{}
|
||||
|
||||
type resourceSliceLister struct {
|
||||
sliceLister resourcelisters.ResourceSliceLister
|
||||
}
|
||||
|
||||
func (l *resourceSliceLister) List() ([]*resourceapi.ResourceSlice, error) {
|
||||
return l.sliceLister.List(labels.Everything())
|
||||
}
|
||||
|
||||
var _ framework.DeviceClassLister = &deviceClassLister{}
|
||||
|
||||
type deviceClassLister struct {
|
||||
classLister resourcelisters.DeviceClassLister
|
||||
}
|
||||
|
||||
func (l *deviceClassLister) Get(className string) (*resourceapi.DeviceClass, error) {
|
||||
return l.classLister.Get(className)
|
||||
}
|
||||
|
||||
func (l *deviceClassLister) List() ([]*resourceapi.DeviceClass, error) {
|
||||
return l.classLister.List(labels.Everything())
|
||||
}
|
||||
|
||||
var _ framework.ResourceClaimTracker = &claimTracker{}
|
||||
|
||||
type claimTracker struct {
|
||||
// cache enables temporarily storing a newer claim object
|
||||
// while the scheduler has allocated it and the corresponding object
|
||||
// update from the apiserver has not been processed by the claim
|
||||
// informer callbacks. ResourceClaimTracker get added here in PreBind and removed by
|
||||
// the informer callback (based on the "newer than" comparison in the
|
||||
// assume cache).
|
||||
//
|
||||
// It uses cache.MetaNamespaceKeyFunc to generate object names, which
|
||||
// therefore are "<namespace>/<name>".
|
||||
//
|
||||
// This is necessary to ensure that reconstructing the resource usage
|
||||
// at the start of a pod scheduling cycle doesn't reuse the resources
|
||||
// assigned to such a claim. Alternatively, claim allocation state
|
||||
// could also get tracked across pod scheduling cycles, but that
|
||||
// - adds complexity (need to carefully sync state with informer events
|
||||
// for claims and ResourceSlices)
|
||||
// - would make integration with cluster autoscaler harder because it would need
|
||||
// to trigger informer callbacks.
|
||||
cache *assumecache.AssumeCache
|
||||
// inFlightAllocations is a map from claim UUIDs to claim objects for those claims
|
||||
// for which allocation was triggered during a scheduling cycle and the
|
||||
// corresponding claim status update call in PreBind has not been done
|
||||
// yet. If another pod needs the claim, the pod is treated as "not
|
||||
// schedulable yet". The cluster event for the claim status update will
|
||||
// make it schedulable.
|
||||
//
|
||||
// This mechanism avoids the following problem:
|
||||
// - Pod A triggers allocation for claim X.
|
||||
// - Pod B shares access to that claim and gets scheduled because
|
||||
// the claim is assumed to be allocated.
|
||||
// - PreBind for pod B is called first, tries to update reservedFor and
|
||||
// fails because the claim is not really allocated yet.
|
||||
//
|
||||
// We could avoid the ordering problem by allowing either pod A or pod B
|
||||
// to set the allocation. But that is more complicated and leads to another
|
||||
// problem:
|
||||
// - Pod A and B get scheduled as above.
|
||||
// - PreBind for pod A gets called first, then fails with a temporary API error.
|
||||
// It removes the updated claim from the assume cache because of that.
|
||||
// - PreBind for pod B gets called next and succeeds with adding the
|
||||
// allocation and its own reservedFor entry.
|
||||
// - The assume cache is now not reflecting that the claim is allocated,
|
||||
// which could lead to reusing the same resource for some other claim.
|
||||
//
|
||||
// A sync.Map is used because in practice sharing of a claim between
|
||||
// pods is expected to be rare compared to per-pod claim, so we end up
|
||||
// hitting the "multiple goroutines read, write, and overwrite entries
|
||||
// for disjoint sets of keys" case that sync.Map is optimized for.
|
||||
inFlightAllocations *sync.Map
|
||||
allocatedDevices *allocatedDevices
|
||||
logger klog.Logger
|
||||
}
|
||||
|
||||
func (c *claimTracker) ClaimHasPendingAllocation(claimUID types.UID) bool {
|
||||
_, found := c.inFlightAllocations.Load(claimUID)
|
||||
return found
|
||||
}
|
||||
|
||||
func (c *claimTracker) SignalClaimPendingAllocation(claimUID types.UID, allocatedClaim *resourceapi.ResourceClaim) error {
|
||||
c.inFlightAllocations.Store(claimUID, allocatedClaim)
|
||||
// There's no reason to return an error in this implementation, but the error is helpful for other implementations.
|
||||
// For example, implementations that have to deal with fake claims might want to return an error if the allocation
|
||||
// is for an invalid claim.
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *claimTracker) RemoveClaimPendingAllocation(claimUID types.UID) (deleted bool) {
|
||||
_, found := c.inFlightAllocations.LoadAndDelete(claimUID)
|
||||
return found
|
||||
}
|
||||
|
||||
func (c *claimTracker) Get(namespace, claimName string) (*resourceapi.ResourceClaim, error) {
|
||||
obj, err := c.cache.Get(namespace + "/" + claimName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
claim, ok := obj.(*resourceapi.ResourceClaim)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unexpected object type %T for assumed object %s/%s", obj, namespace, claimName)
|
||||
}
|
||||
return claim, nil
|
||||
}
|
||||
|
||||
func (c *claimTracker) List() ([]*resourceapi.ResourceClaim, error) {
|
||||
var result []*resourceapi.ResourceClaim
|
||||
// Probably not worth adding an index for?
|
||||
objs := c.cache.List(nil)
|
||||
for _, obj := range objs {
|
||||
claim, ok := obj.(*resourceapi.ResourceClaim)
|
||||
if ok {
|
||||
result = append(result, claim)
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (c *claimTracker) ListAllAllocatedDevices() (sets.Set[structured.DeviceID], error) {
|
||||
// Start with a fresh set that matches the current known state of the
|
||||
// world according to the informers.
|
||||
allocated := c.allocatedDevices.Get()
|
||||
|
||||
// Whatever is in flight also has to be checked.
|
||||
c.inFlightAllocations.Range(func(key, value any) bool {
|
||||
claim := value.(*resourceapi.ResourceClaim)
|
||||
foreachAllocatedDevice(claim, func(deviceID structured.DeviceID) {
|
||||
c.logger.V(6).Info("Device is in flight for allocation", "device", deviceID, "claim", klog.KObj(claim))
|
||||
allocated.Insert(deviceID)
|
||||
})
|
||||
return true
|
||||
})
|
||||
// There's no reason to return an error in this implementation, but the error might be helpful for other implementations.
|
||||
return allocated, nil
|
||||
}
|
||||
|
||||
func (c *claimTracker) AssumeClaimAfterAPICall(claim *resourceapi.ResourceClaim) error {
|
||||
return c.cache.Assume(claim)
|
||||
}
|
||||
|
||||
func (c *claimTracker) AssumedClaimRestore(namespace, claimName string) {
|
||||
c.cache.Restore(namespace + "/" + claimName)
|
||||
}
|
905
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go
generated
vendored
Normal file
905
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go
generated
vendored
Normal file
@ -0,0 +1,905 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package dynamicresources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourceapi "k8s.io/api/resource/v1beta1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/util/retry"
|
||||
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
|
||||
"k8s.io/dynamic-resource-allocation/cel"
|
||||
"k8s.io/dynamic-resource-allocation/resourceclaim"
|
||||
"k8s.io/dynamic-resource-allocation/structured"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
const (
|
||||
// Name is the name of the plugin used in Registry and configurations.
|
||||
Name = names.DynamicResources
|
||||
|
||||
stateKey framework.StateKey = Name
|
||||
)
|
||||
|
||||
// The state is initialized in PreFilter phase. Because we save the pointer in
|
||||
// framework.CycleState, in the later phases we don't need to call Write method
|
||||
// to update the value
|
||||
type stateData struct {
|
||||
// A copy of all claims for the Pod (i.e. 1:1 match with
|
||||
// pod.Spec.ResourceClaims), initially with the status from the start
|
||||
// of the scheduling cycle. Each claim instance is read-only because it
|
||||
// might come from the informer cache. The instances get replaced when
|
||||
// the plugin itself successfully does an Update.
|
||||
//
|
||||
// Empty if the Pod has no claims.
|
||||
claims []*resourceapi.ResourceClaim
|
||||
|
||||
// Allocator handles claims with structured parameters.
|
||||
allocator *structured.Allocator
|
||||
|
||||
// mutex must be locked while accessing any of the fields below.
|
||||
mutex sync.Mutex
|
||||
|
||||
// The indices of all claims that:
|
||||
// - are allocated
|
||||
// - use delayed allocation or the builtin controller
|
||||
// - were not available on at least one node
|
||||
//
|
||||
// Set in parallel during Filter, so write access there must be
|
||||
// protected by the mutex. Used by PostFilter.
|
||||
unavailableClaims sets.Set[int]
|
||||
|
||||
informationsForClaim []informationForClaim
|
||||
|
||||
// nodeAllocations caches the result of Filter for the nodes.
|
||||
nodeAllocations map[string][]resourceapi.AllocationResult
|
||||
}
|
||||
|
||||
func (d *stateData) Clone() framework.StateData {
|
||||
return d
|
||||
}
|
||||
|
||||
type informationForClaim struct {
|
||||
// Node selector based on the claim status if allocated.
|
||||
availableOnNodes *nodeaffinity.NodeSelector
|
||||
|
||||
// Set by Reserved, published by PreBind.
|
||||
allocation *resourceapi.AllocationResult
|
||||
}
|
||||
|
||||
// DynamicResources is a plugin that ensures that ResourceClaims are allocated.
|
||||
type DynamicResources struct {
|
||||
enabled bool
|
||||
enableAdminAccess bool
|
||||
enableSchedulingQueueHint bool
|
||||
|
||||
fh framework.Handle
|
||||
clientset kubernetes.Interface
|
||||
celCache *cel.Cache
|
||||
draManager framework.SharedDRAManager
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(ctx context.Context, plArgs runtime.Object, fh framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
if !fts.EnableDynamicResourceAllocation {
|
||||
// Disabled, won't do anything.
|
||||
return &DynamicResources{}, nil
|
||||
}
|
||||
|
||||
pl := &DynamicResources{
|
||||
enabled: true,
|
||||
enableAdminAccess: fts.EnableDRAAdminAccess,
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
|
||||
fh: fh,
|
||||
clientset: fh.ClientSet(),
|
||||
// This is a LRU cache for compiled CEL expressions. The most
|
||||
// recent 10 of them get reused across different scheduling
|
||||
// cycles.
|
||||
celCache: cel.NewCache(10),
|
||||
draManager: fh.SharedDRAManager(),
|
||||
}
|
||||
|
||||
return pl, nil
|
||||
}
|
||||
|
||||
var _ framework.PreEnqueuePlugin = &DynamicResources{}
|
||||
var _ framework.PreFilterPlugin = &DynamicResources{}
|
||||
var _ framework.FilterPlugin = &DynamicResources{}
|
||||
var _ framework.PostFilterPlugin = &DynamicResources{}
|
||||
var _ framework.ReservePlugin = &DynamicResources{}
|
||||
var _ framework.EnqueueExtensions = &DynamicResources{}
|
||||
var _ framework.PreBindPlugin = &DynamicResources{}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *DynamicResources) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *DynamicResources) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
if !pl.enabled {
|
||||
return nil, nil
|
||||
}
|
||||
// A resource might depend on node labels for topology filtering.
|
||||
// A new or updated node may make pods schedulable.
|
||||
//
|
||||
// A note about UpdateNodeTaint event:
|
||||
// Ideally, it's supposed to register only Add | UpdateNodeLabel because UpdateNodeTaint will never change the result from this plugin.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
nodeActionType := framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint
|
||||
if pl.enableSchedulingQueueHint {
|
||||
// When QHint is enabled, the problematic preCheck is already removed, and we can remove UpdateNodeTaint.
|
||||
nodeActionType = framework.Add | framework.UpdateNodeLabel
|
||||
}
|
||||
|
||||
events := []framework.ClusterEventWithHint{
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}},
|
||||
// Allocation is tracked in ResourceClaims, so any changes may make the pods schedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.ResourceClaim, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterClaimChange},
|
||||
// Adding the ResourceClaim name to the pod status makes pods waiting for their ResourceClaim schedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodGeneratedResourceClaim}, QueueingHintFn: pl.isSchedulableAfterPodChange},
|
||||
// A pod might be waiting for a class to get created or modified.
|
||||
{Event: framework.ClusterEvent{Resource: framework.DeviceClass, ActionType: framework.Add | framework.Update}},
|
||||
// Adding or updating a ResourceSlice might make a pod schedulable because new resources became available.
|
||||
{Event: framework.ClusterEvent{Resource: framework.ResourceSlice, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterResourceSliceChange},
|
||||
}
|
||||
|
||||
return events, nil
|
||||
}
|
||||
|
||||
// PreEnqueue checks if there are known reasons why a pod currently cannot be
|
||||
// scheduled. When this fails, one of the registered events can trigger another
|
||||
// attempt.
|
||||
func (pl *DynamicResources) PreEnqueue(ctx context.Context, pod *v1.Pod) (status *framework.Status) {
|
||||
if !pl.enabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := pl.foreachPodResourceClaim(pod, nil); err != nil {
|
||||
return statusUnschedulable(klog.FromContext(ctx), err.Error())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterClaimChange is invoked for add and update claim events reported by
|
||||
// an informer. It checks whether that change made a previously unschedulable
|
||||
// pod schedulable. It errs on the side of letting a pod scheduling attempt
|
||||
// happen. The delete claim event will not invoke it, so newObj will never be nil.
|
||||
func (pl *DynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalClaim, modifiedClaim, err := schedutil.As[*resourceapi.ResourceClaim](oldObj, newObj)
|
||||
if err != nil {
|
||||
// Shouldn't happen.
|
||||
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterClaimChange: %w", err)
|
||||
}
|
||||
|
||||
usesClaim := false
|
||||
if err := pl.foreachPodResourceClaim(pod, func(_ string, claim *resourceapi.ResourceClaim) {
|
||||
if claim.UID == modifiedClaim.UID {
|
||||
usesClaim = true
|
||||
}
|
||||
}); err != nil {
|
||||
// This is not an unexpected error: we know that
|
||||
// foreachPodResourceClaim only returns errors for "not
|
||||
// schedulable".
|
||||
if loggerV := logger.V(6); loggerV.Enabled() {
|
||||
owner := metav1.GetControllerOf(modifiedClaim)
|
||||
loggerV.Info("pod is not schedulable after resource claim change", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim), "claimOwner", owner, "reason", err.Error())
|
||||
}
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
if originalClaim != nil &&
|
||||
originalClaim.Status.Allocation != nil &&
|
||||
modifiedClaim.Status.Allocation == nil {
|
||||
// A claim with structured parameters was deallocated. This might have made
|
||||
// resources available for other pods.
|
||||
logger.V(6).Info("claim with structured parameters got deallocated", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
if !usesClaim {
|
||||
// This was not the claim the pod was waiting for.
|
||||
logger.V(6).Info("unrelated claim got modified", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
if originalClaim == nil {
|
||||
logger.V(5).Info("claim for pod got created", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// Modifications may or may not be relevant. If the entire
|
||||
// status is as before, then something else must have changed
|
||||
// and we don't care. What happens in practice is that the
|
||||
// resource driver adds the finalizer.
|
||||
if apiequality.Semantic.DeepEqual(&originalClaim.Status, &modifiedClaim.Status) {
|
||||
if loggerV := logger.V(7); loggerV.Enabled() {
|
||||
// Log more information.
|
||||
loggerV.Info("claim for pod got modified where the pod doesn't care", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim), "diff", cmp.Diff(originalClaim, modifiedClaim))
|
||||
} else {
|
||||
logger.V(6).Info("claim for pod got modified where the pod doesn't care", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
}
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("status of claim for pod got updated", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPodChange is invoked for update pod events reported by
|
||||
// an informer. It checks whether that change adds the ResourceClaim(s) that the
|
||||
// pod has been waiting for.
|
||||
func (pl *DynamicResources) isSchedulableAfterPodChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, modifiedPod, err := schedutil.As[*v1.Pod](nil, newObj)
|
||||
if err != nil {
|
||||
// Shouldn't happen.
|
||||
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterClaimChange: %w", err)
|
||||
}
|
||||
|
||||
if pod.UID != modifiedPod.UID {
|
||||
logger.V(7).Info("pod is not schedulable after change in other pod", "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
if err := pl.foreachPodResourceClaim(modifiedPod, nil); err != nil {
|
||||
// This is not an unexpected error: we know that
|
||||
// foreachPodResourceClaim only returns errors for "not
|
||||
// schedulable".
|
||||
logger.V(6).Info("pod is not schedulable after being updated", "pod", klog.KObj(pod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("pod got updated and is schedulable", "pod", klog.KObj(pod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterResourceSliceChange is invoked for add and update slice events reported by
|
||||
// an informer. Such changes can make an unschedulable pod schedulable when the pod requests a device
|
||||
// and the change adds a suitable device.
|
||||
//
|
||||
// For the sake of faster execution and avoiding code duplication, isSchedulableAfterResourceSliceChange
|
||||
// only checks whether the pod uses claims. All of the more detailed checks are done in the scheduling
|
||||
// attempt.
|
||||
//
|
||||
// The delete claim event will not invoke it, so newObj will never be nil.
|
||||
func (pl *DynamicResources) isSchedulableAfterResourceSliceChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, modifiedSlice, err := schedutil.As[*resourceapi.ResourceSlice](oldObj, newObj)
|
||||
if err != nil {
|
||||
// Shouldn't happen.
|
||||
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterResourceSliceChange: %w", err)
|
||||
}
|
||||
|
||||
if err := pl.foreachPodResourceClaim(pod, nil); err != nil {
|
||||
// This is not an unexpected error: we know that
|
||||
// foreachPodResourceClaim only returns errors for "not
|
||||
// schedulable".
|
||||
logger.V(6).Info("pod is not schedulable after resource slice change", "pod", klog.KObj(pod), "resourceSlice", klog.KObj(modifiedSlice), "reason", err.Error())
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// We could check what got changed in the slice, but right now that's likely to be
|
||||
// about the spec (there's no status yet...).
|
||||
// We could check whether all claims use classic DRA, but that doesn't seem worth it.
|
||||
// Let's assume that changing the slice may make the pod schedulable.
|
||||
logger.V(5).Info("ResourceSlice change might make pod schedulable", "pod", klog.KObj(pod), "resourceSlice", klog.KObj(modifiedSlice))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// podResourceClaims returns the ResourceClaims for all pod.Spec.PodResourceClaims.
|
||||
func (pl *DynamicResources) podResourceClaims(pod *v1.Pod) ([]*resourceapi.ResourceClaim, error) {
|
||||
claims := make([]*resourceapi.ResourceClaim, 0, len(pod.Spec.ResourceClaims))
|
||||
if err := pl.foreachPodResourceClaim(pod, func(_ string, claim *resourceapi.ResourceClaim) {
|
||||
// We store the pointer as returned by the lister. The
|
||||
// assumption is that if a claim gets modified while our code
|
||||
// runs, the cache will store a new pointer, not mutate the
|
||||
// existing object that we point to here.
|
||||
claims = append(claims, claim)
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return claims, nil
|
||||
}
|
||||
|
||||
// foreachPodResourceClaim checks that each ResourceClaim for the pod exists.
|
||||
// It calls an optional handler for those claims that it finds.
|
||||
func (pl *DynamicResources) foreachPodResourceClaim(pod *v1.Pod, cb func(podResourceName string, claim *resourceapi.ResourceClaim)) error {
|
||||
for _, resource := range pod.Spec.ResourceClaims {
|
||||
claimName, mustCheckOwner, err := resourceclaim.Name(pod, &resource)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// The claim name might be nil if no underlying resource claim
|
||||
// was generated for the referenced claim. There are valid use
|
||||
// cases when this might happen, so we simply skip it.
|
||||
if claimName == nil {
|
||||
continue
|
||||
}
|
||||
claim, err := pl.draManager.ResourceClaims().Get(pod.Namespace, *claimName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if claim.DeletionTimestamp != nil {
|
||||
return fmt.Errorf("resourceclaim %q is being deleted", claim.Name)
|
||||
}
|
||||
|
||||
if mustCheckOwner {
|
||||
if err := resourceclaim.IsForPod(pod, claim); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cb != nil {
|
||||
cb(resource.Name, claim)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// PreFilter invoked at the prefilter extension point to check if pod has all
|
||||
// immediate claims bound. UnschedulableAndUnresolvable is returned if
|
||||
// the pod cannot be scheduled at the moment on any node.
|
||||
func (pl *DynamicResources) PreFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
if !pl.enabled {
|
||||
return nil, framework.NewStatus(framework.Skip)
|
||||
}
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
// If the pod does not reference any claim, we don't need to do
|
||||
// anything for it. We just initialize an empty state to record that
|
||||
// observation for the other functions. This gets updated below
|
||||
// if we get that far.
|
||||
s := &stateData{}
|
||||
state.Write(stateKey, s)
|
||||
|
||||
claims, err := pl.podResourceClaims(pod)
|
||||
if err != nil {
|
||||
return nil, statusUnschedulable(logger, err.Error())
|
||||
}
|
||||
logger.V(5).Info("pod resource claims", "pod", klog.KObj(pod), "resourceclaims", klog.KObjSlice(claims))
|
||||
|
||||
// If the pod does not reference any claim,
|
||||
// DynamicResources Filter has nothing to do with the Pod.
|
||||
if len(claims) == 0 {
|
||||
return nil, framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
// All claims which the scheduler needs to allocate itself.
|
||||
allocateClaims := make([]*resourceapi.ResourceClaim, 0, len(claims))
|
||||
|
||||
s.informationsForClaim = make([]informationForClaim, len(claims))
|
||||
for index, claim := range claims {
|
||||
if claim.Status.Allocation != nil &&
|
||||
!resourceclaim.CanBeReserved(claim) &&
|
||||
!resourceclaim.IsReservedForPod(pod, claim) {
|
||||
// Resource is in use. The pod has to wait.
|
||||
return nil, statusUnschedulable(logger, "resourceclaim in use", "pod", klog.KObj(pod), "resourceclaim", klog.KObj(claim))
|
||||
}
|
||||
|
||||
if claim.Status.Allocation != nil {
|
||||
if claim.Status.Allocation.NodeSelector != nil {
|
||||
nodeSelector, err := nodeaffinity.NewNodeSelector(claim.Status.Allocation.NodeSelector)
|
||||
if err != nil {
|
||||
return nil, statusError(logger, err)
|
||||
}
|
||||
s.informationsForClaim[index].availableOnNodes = nodeSelector
|
||||
}
|
||||
} else {
|
||||
allocateClaims = append(allocateClaims, claim)
|
||||
|
||||
// Allocation in flight? Better wait for that
|
||||
// to finish, see inFlightAllocations
|
||||
// documentation for details.
|
||||
if pl.draManager.ResourceClaims().ClaimHasPendingAllocation(claim.UID) {
|
||||
return nil, statusUnschedulable(logger, fmt.Sprintf("resource claim %s is in the process of being allocated", klog.KObj(claim)))
|
||||
}
|
||||
|
||||
// Check all requests and device classes. If a class
|
||||
// does not exist, scheduling cannot proceed, no matter
|
||||
// how the claim is being allocated.
|
||||
//
|
||||
// When using a control plane controller, a class might
|
||||
// have a node filter. This is useful for trimming the
|
||||
// initial set of potential nodes before we ask the
|
||||
// driver(s) for information about the specific pod.
|
||||
for _, request := range claim.Spec.Devices.Requests {
|
||||
if request.DeviceClassName == "" {
|
||||
return nil, statusError(logger, fmt.Errorf("request %s: unsupported request type", request.Name))
|
||||
}
|
||||
|
||||
_, err := pl.draManager.DeviceClasses().Get(request.DeviceClassName)
|
||||
if err != nil {
|
||||
// If the class cannot be retrieved, allocation cannot proceed.
|
||||
if apierrors.IsNotFound(err) {
|
||||
// Here we mark the pod as "unschedulable", so it'll sleep in
|
||||
// the unscheduleable queue until a DeviceClass event occurs.
|
||||
return nil, statusUnschedulable(logger, fmt.Sprintf("request %s: device class %s does not exist", request.Name, request.DeviceClassName))
|
||||
}
|
||||
// Other error, retry with backoff.
|
||||
return nil, statusError(logger, fmt.Errorf("request %s: look up device class: %w", request.Name, err))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(allocateClaims) > 0 {
|
||||
logger.V(5).Info("Preparing allocation with structured parameters", "pod", klog.KObj(pod), "resourceclaims", klog.KObjSlice(allocateClaims))
|
||||
|
||||
// Doing this over and over again for each pod could be avoided
|
||||
// by setting the allocator up once and then keeping it up-to-date
|
||||
// as changes are observed.
|
||||
//
|
||||
// But that would cause problems for using the plugin in the
|
||||
// Cluster Autoscaler. If this step here turns out to be
|
||||
// expensive, we may have to maintain and update state more
|
||||
// persistently.
|
||||
//
|
||||
// Claims (and thus their devices) are treated as "allocated" if they are in the assume cache
|
||||
// or currently their allocation is in-flight. This does not change
|
||||
// during filtering, so we can determine that once.
|
||||
allAllocatedDevices, err := pl.draManager.ResourceClaims().ListAllAllocatedDevices()
|
||||
if err != nil {
|
||||
return nil, statusError(logger, err)
|
||||
}
|
||||
slices, err := pl.draManager.ResourceSlices().List()
|
||||
if err != nil {
|
||||
return nil, statusError(logger, err)
|
||||
}
|
||||
allocator, err := structured.NewAllocator(ctx, pl.enableAdminAccess, allocateClaims, allAllocatedDevices, pl.draManager.DeviceClasses(), slices, pl.celCache)
|
||||
if err != nil {
|
||||
return nil, statusError(logger, err)
|
||||
}
|
||||
s.allocator = allocator
|
||||
s.nodeAllocations = make(map[string][]resourceapi.AllocationResult)
|
||||
}
|
||||
|
||||
s.claims = claims
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// PreFilterExtensions returns prefilter extensions, pod add and remove.
|
||||
func (pl *DynamicResources) PreFilterExtensions() framework.PreFilterExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getStateData(cs *framework.CycleState) (*stateData, error) {
|
||||
state, err := cs.Read(stateKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s, ok := state.(*stateData)
|
||||
if !ok {
|
||||
return nil, errors.New("unable to convert state into stateData")
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
// It evaluates if a pod can fit due to the resources it requests,
|
||||
// for both allocated and unallocated claims.
|
||||
//
|
||||
// For claims that are bound, then it checks that the node affinity is
|
||||
// satisfied by the given node.
|
||||
//
|
||||
// For claims that are unbound, it checks whether the claim might get allocated
|
||||
// for the node.
|
||||
func (pl *DynamicResources) Filter(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
if !pl.enabled {
|
||||
return nil
|
||||
}
|
||||
state, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return statusError(klog.FromContext(ctx), err)
|
||||
}
|
||||
if len(state.claims) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
node := nodeInfo.Node()
|
||||
|
||||
var unavailableClaims []int
|
||||
for index, claim := range state.claims {
|
||||
logger.V(10).Info("filtering based on resource claims of the pod", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaim", klog.KObj(claim))
|
||||
|
||||
// This node selector only gets set if the claim is allocated.
|
||||
if nodeSelector := state.informationsForClaim[index].availableOnNodes; nodeSelector != nil && !nodeSelector.Match(node) {
|
||||
logger.V(5).Info("allocation's node selector does not match", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaim", klog.KObj(claim))
|
||||
unavailableClaims = append(unavailableClaims, index)
|
||||
}
|
||||
}
|
||||
|
||||
// Use allocator to check the node and cache the result in case that the node is picked.
|
||||
var allocations []resourceapi.AllocationResult
|
||||
if state.allocator != nil {
|
||||
allocCtx := ctx
|
||||
if loggerV := logger.V(5); loggerV.Enabled() {
|
||||
allocCtx = klog.NewContext(allocCtx, klog.LoggerWithValues(logger, "node", klog.KObj(node)))
|
||||
}
|
||||
|
||||
a, err := state.allocator.Allocate(allocCtx, node)
|
||||
if err != nil {
|
||||
// This should only fail if there is something wrong with the claim or class.
|
||||
// Return an error to abort scheduling of it.
|
||||
//
|
||||
// This will cause retries. It would be slightly nicer to mark it as unschedulable
|
||||
// *and* abort scheduling. Then only cluster event for updating the claim or class
|
||||
// with the broken CEL expression would trigger rescheduling.
|
||||
//
|
||||
// But we cannot do both. As this shouldn't occur often, aborting like this is
|
||||
// better than the more complicated alternative (return Unschedulable here, remember
|
||||
// the error, then later raise it again later if needed).
|
||||
return statusError(logger, err, "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaims", klog.KObjSlice(state.allocator.ClaimsToAllocate()))
|
||||
}
|
||||
// Check for exact length just to be sure. In practice this is all-or-nothing.
|
||||
if len(a) != len(state.allocator.ClaimsToAllocate()) {
|
||||
return statusUnschedulable(logger, "cannot allocate all claims", "pod", klog.KObj(pod), "node", klog.KObj(node), "resourceclaims", klog.KObjSlice(state.allocator.ClaimsToAllocate()))
|
||||
}
|
||||
// Reserve uses this information.
|
||||
allocations = a
|
||||
}
|
||||
|
||||
// Store information in state while holding the mutex.
|
||||
if state.allocator != nil || len(unavailableClaims) > 0 {
|
||||
state.mutex.Lock()
|
||||
defer state.mutex.Unlock()
|
||||
}
|
||||
|
||||
if len(unavailableClaims) > 0 {
|
||||
// Remember all unavailable claims. This might be observed
|
||||
// concurrently, so we have to lock the state before writing.
|
||||
|
||||
if state.unavailableClaims == nil {
|
||||
state.unavailableClaims = sets.New[int]()
|
||||
}
|
||||
|
||||
for _, index := range unavailableClaims {
|
||||
state.unavailableClaims.Insert(index)
|
||||
}
|
||||
return statusUnschedulable(logger, "resourceclaim not available on the node", "pod", klog.KObj(pod))
|
||||
}
|
||||
|
||||
if state.allocator != nil {
|
||||
state.nodeAllocations[node.Name] = allocations
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PostFilter checks whether there are allocated claims that could get
|
||||
// deallocated to help get the Pod schedulable. If yes, it picks one and
|
||||
// requests its deallocation. This only gets called when filtering found no
|
||||
// suitable node.
|
||||
func (pl *DynamicResources) PostFilter(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, filteredNodeStatusMap framework.NodeToStatusReader) (*framework.PostFilterResult, *framework.Status) {
|
||||
if !pl.enabled {
|
||||
return nil, framework.NewStatus(framework.Unschedulable, "plugin disabled")
|
||||
}
|
||||
logger := klog.FromContext(ctx)
|
||||
state, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return nil, statusError(logger, err)
|
||||
}
|
||||
if len(state.claims) == 0 {
|
||||
return nil, framework.NewStatus(framework.Unschedulable, "no new claims to deallocate")
|
||||
}
|
||||
|
||||
// Iterating over a map is random. This is intentional here, we want to
|
||||
// pick one claim randomly because there is no better heuristic.
|
||||
for index := range state.unavailableClaims {
|
||||
claim := state.claims[index]
|
||||
if len(claim.Status.ReservedFor) == 0 ||
|
||||
len(claim.Status.ReservedFor) == 1 && claim.Status.ReservedFor[0].UID == pod.UID {
|
||||
claim := claim.DeepCopy()
|
||||
claim.Status.ReservedFor = nil
|
||||
claim.Status.Allocation = nil
|
||||
logger.V(5).Info("Deallocation of ResourceClaim", "pod", klog.KObj(pod), "resourceclaim", klog.KObj(claim))
|
||||
if _, err := pl.clientset.ResourceV1beta1().ResourceClaims(claim.Namespace).UpdateStatus(ctx, claim, metav1.UpdateOptions{}); err != nil {
|
||||
return nil, statusError(logger, err)
|
||||
}
|
||||
return nil, framework.NewStatus(framework.Unschedulable, "deallocation of ResourceClaim completed")
|
||||
}
|
||||
}
|
||||
return nil, framework.NewStatus(framework.Unschedulable, "still not schedulable")
|
||||
}
|
||||
|
||||
// Reserve reserves claims for the pod.
|
||||
func (pl *DynamicResources) Reserve(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) {
|
||||
if !pl.enabled {
|
||||
return nil
|
||||
}
|
||||
state, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return statusError(klog.FromContext(ctx), err)
|
||||
}
|
||||
if len(state.claims) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
numClaimsWithAllocator := 0
|
||||
for _, claim := range state.claims {
|
||||
if claim.Status.Allocation != nil {
|
||||
// Allocated, but perhaps not reserved yet. We checked in PreFilter that
|
||||
// the pod could reserve the claim. Instead of reserving here by
|
||||
// updating the ResourceClaim status, we assume that reserving
|
||||
// will work and only do it for real during binding. If it fails at
|
||||
// that time, some other pod was faster and we have to try again.
|
||||
continue
|
||||
}
|
||||
|
||||
numClaimsWithAllocator++
|
||||
}
|
||||
|
||||
if numClaimsWithAllocator == 0 {
|
||||
// Nothing left to do.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Prepare allocation of claims handled by the schedulder.
|
||||
if state.allocator != nil {
|
||||
// Entries in these two slices match each other.
|
||||
claimsToAllocate := state.allocator.ClaimsToAllocate()
|
||||
allocations, ok := state.nodeAllocations[nodeName]
|
||||
if !ok {
|
||||
// We checked before that the node is suitable. This shouldn't have failed,
|
||||
// so treat this as an error.
|
||||
return statusError(logger, errors.New("claim allocation not found for node"))
|
||||
}
|
||||
|
||||
// Sanity check: do we have results for all pending claims?
|
||||
if len(allocations) != len(claimsToAllocate) ||
|
||||
len(allocations) != numClaimsWithAllocator {
|
||||
return statusError(logger, fmt.Errorf("internal error, have %d allocations, %d claims to allocate, want %d claims", len(allocations), len(claimsToAllocate), numClaimsWithAllocator))
|
||||
}
|
||||
|
||||
for i, claim := range claimsToAllocate {
|
||||
index := slices.Index(state.claims, claim)
|
||||
if index < 0 {
|
||||
return statusError(logger, fmt.Errorf("internal error, claim %s with allocation not found", claim.Name))
|
||||
}
|
||||
allocation := &allocations[i]
|
||||
state.informationsForClaim[index].allocation = allocation
|
||||
|
||||
// Strictly speaking, we don't need to store the full modified object.
|
||||
// The allocation would be enough. The full object is useful for
|
||||
// debugging, testing and the allocator, so let's make it realistic.
|
||||
claim = claim.DeepCopy()
|
||||
if !slices.Contains(claim.Finalizers, resourceapi.Finalizer) {
|
||||
claim.Finalizers = append(claim.Finalizers, resourceapi.Finalizer)
|
||||
}
|
||||
claim.Status.Allocation = allocation
|
||||
err := pl.draManager.ResourceClaims().SignalClaimPendingAllocation(claim.UID, claim)
|
||||
if err != nil {
|
||||
return statusError(logger, fmt.Errorf("internal error, couldn't signal allocation for claim %s", claim.Name))
|
||||
}
|
||||
logger.V(5).Info("Reserved resource in allocation result", "claim", klog.KObj(claim), "allocation", klog.Format(allocation))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unreserve clears the ReservedFor field for all claims.
|
||||
// It's idempotent, and does nothing if no state found for the given pod.
|
||||
func (pl *DynamicResources) Unreserve(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodeName string) {
|
||||
if !pl.enabled {
|
||||
return
|
||||
}
|
||||
state, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if len(state.claims) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
for index, claim := range state.claims {
|
||||
// If allocation was in-flight, then it's not anymore and we need to revert the
|
||||
// claim object in the assume cache to what it was before.
|
||||
if deleted := pl.draManager.ResourceClaims().RemoveClaimPendingAllocation(state.claims[index].UID); deleted {
|
||||
pl.draManager.ResourceClaims().AssumedClaimRestore(claim.Namespace, claim.Name)
|
||||
}
|
||||
|
||||
if claim.Status.Allocation != nil &&
|
||||
resourceclaim.IsReservedForPod(pod, claim) {
|
||||
// Remove pod from ReservedFor. A strategic-merge-patch is used
|
||||
// because that allows removing an individual entry without having
|
||||
// the latest slice.
|
||||
patch := fmt.Sprintf(`{"metadata": {"uid": %q}, "status": { "reservedFor": [ {"$patch": "delete", "uid": %q} ] }}`,
|
||||
claim.UID,
|
||||
pod.UID,
|
||||
)
|
||||
logger.V(5).Info("unreserve", "resourceclaim", klog.KObj(claim), "pod", klog.KObj(pod))
|
||||
claim, err := pl.clientset.ResourceV1beta1().ResourceClaims(claim.Namespace).Patch(ctx, claim.Name, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{}, "status")
|
||||
if err != nil {
|
||||
// We will get here again when pod scheduling is retried.
|
||||
logger.Error(err, "unreserve", "resourceclaim", klog.KObj(claim))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PreBind gets called in a separate goroutine after it has been determined
|
||||
// that the pod should get bound to this node. Because Reserve did not actually
|
||||
// reserve claims, we need to do it now. For claims with the builtin controller,
|
||||
// we also handle the allocation.
|
||||
//
|
||||
// If anything fails, we return an error and
|
||||
// the pod will have to go into the backoff queue. The scheduler will call
|
||||
// Unreserve as part of the error handling.
|
||||
func (pl *DynamicResources) PreBind(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status {
|
||||
if !pl.enabled {
|
||||
return nil
|
||||
}
|
||||
state, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return statusError(klog.FromContext(ctx), err)
|
||||
}
|
||||
if len(state.claims) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
for index, claim := range state.claims {
|
||||
if !resourceclaim.IsReservedForPod(pod, claim) {
|
||||
claim, err := pl.bindClaim(ctx, state, index, pod, nodeName)
|
||||
if err != nil {
|
||||
return statusError(logger, err)
|
||||
}
|
||||
state.claims[index] = claim
|
||||
}
|
||||
}
|
||||
// If we get here, we know that reserving the claim for
|
||||
// the pod worked and we can proceed with binding it.
|
||||
return nil
|
||||
}
|
||||
|
||||
// bindClaim gets called by PreBind for claim which is not reserved for the pod yet.
|
||||
// It might not even be allocated. bindClaim then ensures that the allocation
|
||||
// and reservation are recorded. This finishes the work started in Reserve.
|
||||
func (pl *DynamicResources) bindClaim(ctx context.Context, state *stateData, index int, pod *v1.Pod, nodeName string) (patchedClaim *resourceapi.ResourceClaim, finalErr error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
claim := state.claims[index].DeepCopy()
|
||||
allocation := state.informationsForClaim[index].allocation
|
||||
defer func() {
|
||||
if allocation != nil {
|
||||
// The scheduler was handling allocation. Now that has
|
||||
// completed, either successfully or with a failure.
|
||||
if finalErr == nil {
|
||||
// This can fail, but only for reasons that are okay (concurrent delete or update).
|
||||
// Shouldn't happen in this case.
|
||||
if err := pl.draManager.ResourceClaims().AssumeClaimAfterAPICall(claim); err != nil {
|
||||
logger.V(5).Info("Claim not stored in assume cache", "err", finalErr)
|
||||
}
|
||||
}
|
||||
pl.draManager.ResourceClaims().RemoveClaimPendingAllocation(claim.UID)
|
||||
}
|
||||
}()
|
||||
|
||||
logger.V(5).Info("preparing claim status update", "claim", klog.KObj(state.claims[index]), "allocation", klog.Format(allocation))
|
||||
|
||||
// We may run into a ResourceVersion conflict because there may be some
|
||||
// benign concurrent changes. In that case we get the latest claim and
|
||||
// try again.
|
||||
refreshClaim := false
|
||||
retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
|
||||
if refreshClaim {
|
||||
updatedClaim, err := pl.clientset.ResourceV1beta1().ResourceClaims(claim.Namespace).Get(ctx, claim.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("get updated claim %s after conflict: %w", klog.KObj(claim), err)
|
||||
}
|
||||
logger.V(5).Info("retrying update after conflict", "claim", klog.KObj(claim))
|
||||
claim = updatedClaim
|
||||
} else {
|
||||
// All future retries must get a new claim first.
|
||||
refreshClaim = true
|
||||
}
|
||||
|
||||
if claim.DeletionTimestamp != nil {
|
||||
return fmt.Errorf("claim %s got deleted in the meantime", klog.KObj(claim))
|
||||
}
|
||||
|
||||
// Do we need to store an allocation result from Reserve?
|
||||
if allocation != nil {
|
||||
if claim.Status.Allocation != nil {
|
||||
return fmt.Errorf("claim %s got allocated elsewhere in the meantime", klog.KObj(claim))
|
||||
}
|
||||
|
||||
// The finalizer needs to be added in a normal update.
|
||||
// If we were interrupted in the past, it might already be set and we simply continue.
|
||||
if !slices.Contains(claim.Finalizers, resourceapi.Finalizer) {
|
||||
claim.Finalizers = append(claim.Finalizers, resourceapi.Finalizer)
|
||||
updatedClaim, err := pl.clientset.ResourceV1beta1().ResourceClaims(claim.Namespace).Update(ctx, claim, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("add finalizer to claim %s: %w", klog.KObj(claim), err)
|
||||
}
|
||||
claim = updatedClaim
|
||||
}
|
||||
claim.Status.Allocation = allocation
|
||||
}
|
||||
|
||||
// We can simply try to add the pod here without checking
|
||||
// preconditions. The apiserver will tell us with a
|
||||
// non-conflict error if this isn't possible.
|
||||
claim.Status.ReservedFor = append(claim.Status.ReservedFor, resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: pod.Name, UID: pod.UID})
|
||||
updatedClaim, err := pl.clientset.ResourceV1beta1().ResourceClaims(claim.Namespace).UpdateStatus(ctx, claim, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
if allocation != nil {
|
||||
return fmt.Errorf("add allocation and reservation to claim %s: %w", klog.KObj(claim), err)
|
||||
}
|
||||
return fmt.Errorf("add reservation to claim %s: %w", klog.KObj(claim), err)
|
||||
}
|
||||
claim = updatedClaim
|
||||
return nil
|
||||
})
|
||||
|
||||
if retryErr != nil {
|
||||
return nil, retryErr
|
||||
}
|
||||
|
||||
logger.V(5).Info("reserved", "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName}, "resourceclaim", klog.Format(claim))
|
||||
return claim, nil
|
||||
}
|
||||
|
||||
// statusUnschedulable ensures that there is a log message associated with the
|
||||
// line where the status originated.
|
||||
func statusUnschedulable(logger klog.Logger, reason string, kv ...interface{}) *framework.Status {
|
||||
if loggerV := logger.V(5); loggerV.Enabled() {
|
||||
helper, loggerV := loggerV.WithCallStackHelper()
|
||||
helper()
|
||||
kv = append(kv, "reason", reason)
|
||||
// nolint: logcheck // warns because it cannot check key/values
|
||||
loggerV.Info("pod unschedulable", kv...)
|
||||
}
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, reason)
|
||||
}
|
||||
|
||||
// statusError ensures that there is a log message associated with the
|
||||
// line where the error originated.
|
||||
func statusError(logger klog.Logger, err error, kv ...interface{}) *framework.Status {
|
||||
if loggerV := logger.V(5); loggerV.Enabled() {
|
||||
helper, loggerV := loggerV.WithCallStackHelper()
|
||||
helper()
|
||||
// nolint: logcheck // warns because it cannot check key/values
|
||||
loggerV.Error(err, "dynamic resource plugin failed", kv...)
|
||||
}
|
||||
return framework.AsStatus(err)
|
||||
}
|
33
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature/feature.go
generated
vendored
Normal file
33
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature/feature.go
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package feature
|
||||
|
||||
// Features carries feature gate values used by various plugins.
|
||||
// This struct allows us to break the dependency of the plugins on
|
||||
// the internal k8s features pkg.
|
||||
type Features struct {
|
||||
EnableDRAAdminAccess bool
|
||||
EnableDynamicResourceAllocation bool
|
||||
EnableVolumeCapacityPriority bool
|
||||
EnableNodeInclusionPolicyInPodTopologySpread bool
|
||||
EnableMatchLabelKeysInPodTopologySpread bool
|
||||
EnableInPlacePodVerticalScaling bool
|
||||
EnableSidecarContainers bool
|
||||
EnableSchedulingQueueHint bool
|
||||
EnableAsyncPreemption bool
|
||||
EnablePodLevelResources bool
|
||||
}
|
55
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper/normalize_score.go
generated
vendored
Normal file
55
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper/normalize_score.go
generated
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package helper
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// DefaultNormalizeScore generates a Normalize Score function that can normalize the
|
||||
// scores from [0, max(scores)] to [0, maxPriority]. If reverse is set to true, it
|
||||
// reverses the scores by subtracting it from maxPriority.
|
||||
// Note: The input scores are always assumed to be non-negative integers.
|
||||
func DefaultNormalizeScore(maxPriority int64, reverse bool, scores framework.NodeScoreList) *framework.Status {
|
||||
var maxCount int64
|
||||
for i := range scores {
|
||||
if scores[i].Score > maxCount {
|
||||
maxCount = scores[i].Score
|
||||
}
|
||||
}
|
||||
|
||||
if maxCount == 0 {
|
||||
if reverse {
|
||||
for i := range scores {
|
||||
scores[i].Score = maxPriority
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
for i := range scores {
|
||||
score := scores[i].Score
|
||||
|
||||
score = maxPriority * score / maxCount
|
||||
if reverse {
|
||||
score = maxPriority - score
|
||||
}
|
||||
|
||||
scores[i].Score = score
|
||||
}
|
||||
return nil
|
||||
}
|
52
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper/shape_score.go
generated
vendored
Normal file
52
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper/shape_score.go
generated
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package helper
|
||||
|
||||
// FunctionShape represents a collection of FunctionShapePoint.
|
||||
type FunctionShape []FunctionShapePoint
|
||||
|
||||
// FunctionShapePoint represents a shape point.
|
||||
type FunctionShapePoint struct {
|
||||
// Utilization is function argument.
|
||||
Utilization int64
|
||||
// Score is function value.
|
||||
Score int64
|
||||
}
|
||||
|
||||
// BuildBrokenLinearFunction creates a function which is built using linear segments. Segments are defined via shape array.
|
||||
// Shape[i].Utilization slice represents points on "Utilization" axis where different segments meet.
|
||||
// Shape[i].Score represents function values at meeting points.
|
||||
//
|
||||
// function f(p) is defined as:
|
||||
//
|
||||
// shape[0].Score for p < shape[0].Utilization
|
||||
// shape[n-1].Score for p > shape[n-1].Utilization
|
||||
//
|
||||
// and linear between points (p < shape[i].Utilization)
|
||||
func BuildBrokenLinearFunction(shape FunctionShape) func(int64) int64 {
|
||||
return func(p int64) int64 {
|
||||
for i := 0; i < len(shape); i++ {
|
||||
if p <= int64(shape[i].Utilization) {
|
||||
if i == 0 {
|
||||
return shape[0].Score
|
||||
}
|
||||
return shape[i-1].Score + (shape[i].Score-shape[i-1].Score)*(p-shape[i-1].Utilization)/(shape[i].Utilization-shape[i-1].Utilization)
|
||||
}
|
||||
}
|
||||
return shape[len(shape)-1].Score
|
||||
}
|
||||
}
|
116
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper/spread.go
generated
vendored
Normal file
116
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper/spread.go
generated
vendored
Normal file
@ -0,0 +1,116 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package helper
|
||||
|
||||
import (
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
appslisters "k8s.io/client-go/listers/apps/v1"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
)
|
||||
|
||||
var (
|
||||
rcKind = v1.SchemeGroupVersion.WithKind("ReplicationController")
|
||||
rsKind = appsv1.SchemeGroupVersion.WithKind("ReplicaSet")
|
||||
ssKind = appsv1.SchemeGroupVersion.WithKind("StatefulSet")
|
||||
)
|
||||
|
||||
// DefaultSelector returns a selector deduced from the Services, Replication
|
||||
// Controllers, Replica Sets, and Stateful Sets matching the given pod.
|
||||
func DefaultSelector(
|
||||
pod *v1.Pod,
|
||||
sl corelisters.ServiceLister,
|
||||
cl corelisters.ReplicationControllerLister,
|
||||
rsl appslisters.ReplicaSetLister,
|
||||
ssl appslisters.StatefulSetLister,
|
||||
) labels.Selector {
|
||||
labelSet := make(labels.Set)
|
||||
// Since services, RCs, RSs and SSs match the pod, they won't have conflicting
|
||||
// labels. Merging is safe.
|
||||
|
||||
if services, err := GetPodServices(sl, pod); err == nil {
|
||||
for _, service := range services {
|
||||
labelSet = labels.Merge(labelSet, service.Spec.Selector)
|
||||
}
|
||||
}
|
||||
selector := labelSet.AsSelector()
|
||||
|
||||
owner := metav1.GetControllerOfNoCopy(pod)
|
||||
if owner == nil {
|
||||
return selector
|
||||
}
|
||||
|
||||
gv, err := schema.ParseGroupVersion(owner.APIVersion)
|
||||
if err != nil {
|
||||
return selector
|
||||
}
|
||||
|
||||
gvk := gv.WithKind(owner.Kind)
|
||||
switch gvk {
|
||||
case rcKind:
|
||||
if rc, err := cl.ReplicationControllers(pod.Namespace).Get(owner.Name); err == nil {
|
||||
labelSet = labels.Merge(labelSet, rc.Spec.Selector)
|
||||
selector = labelSet.AsSelector()
|
||||
}
|
||||
case rsKind:
|
||||
if rs, err := rsl.ReplicaSets(pod.Namespace).Get(owner.Name); err == nil {
|
||||
if other, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
|
||||
if r, ok := other.Requirements(); ok {
|
||||
selector = selector.Add(r...)
|
||||
}
|
||||
}
|
||||
}
|
||||
case ssKind:
|
||||
if ss, err := ssl.StatefulSets(pod.Namespace).Get(owner.Name); err == nil {
|
||||
if other, err := metav1.LabelSelectorAsSelector(ss.Spec.Selector); err == nil {
|
||||
if r, ok := other.Requirements(); ok {
|
||||
selector = selector.Add(r...)
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
// Not owned by a supported controller.
|
||||
}
|
||||
|
||||
return selector
|
||||
}
|
||||
|
||||
// GetPodServices gets the services that have the selector that match the labels on the given pod.
|
||||
func GetPodServices(sl corelisters.ServiceLister, pod *v1.Pod) ([]*v1.Service, error) {
|
||||
allServices, err := sl.Services(pod.Namespace).List(labels.Everything())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var services []*v1.Service
|
||||
for i := range allServices {
|
||||
service := allServices[i]
|
||||
if service.Spec.Selector == nil {
|
||||
// services with nil selectors match nothing, not everything.
|
||||
continue
|
||||
}
|
||||
selector := labels.Set(service.Spec.Selector).AsSelectorPreValidated()
|
||||
if selector.Matches(labels.Set(pod.Labels)) {
|
||||
services = append(services, service)
|
||||
}
|
||||
}
|
||||
|
||||
return services, nil
|
||||
}
|
28
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper/taint.go
generated
vendored
Normal file
28
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper/taint.go
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package helper
|
||||
|
||||
import v1 "k8s.io/api/core/v1"
|
||||
|
||||
// DoNotScheduleTaintsFilterFunc returns the filter function that can
|
||||
// filter out the node taints that reject scheduling Pod on a Node.
|
||||
func DoNotScheduleTaintsFilterFunc() func(t *v1.Taint) bool {
|
||||
return func(t *v1.Taint) bool {
|
||||
// PodToleratesNodeTaints is only interested in NoSchedule and NoExecute taints.
|
||||
return t.Effect == v1.TaintEffectNoSchedule || t.Effect == v1.TaintEffectNoExecute
|
||||
}
|
||||
}
|
132
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/imagelocality/image_locality.go
generated
vendored
Normal file
132
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/imagelocality/image_locality.go
generated
vendored
Normal file
@ -0,0 +1,132 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package imagelocality
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
)
|
||||
|
||||
// The two thresholds are used as bounds for the image score range. They correspond to a reasonable size range for
|
||||
// container images compressed and stored in registries; 90%ile of images on dockerhub drops into this range.
|
||||
const (
|
||||
mb int64 = 1024 * 1024
|
||||
minThreshold int64 = 23 * mb
|
||||
maxContainerThreshold int64 = 1000 * mb
|
||||
)
|
||||
|
||||
// ImageLocality is a score plugin that favors nodes that already have requested pod container's images.
|
||||
type ImageLocality struct {
|
||||
handle framework.Handle
|
||||
}
|
||||
|
||||
var _ framework.ScorePlugin = &ImageLocality{}
|
||||
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
const Name = names.ImageLocality
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *ImageLocality) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// Score invoked at the score extension point.
|
||||
func (pl *ImageLocality) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
|
||||
nodeInfo, err := pl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(fmt.Errorf("getting node %q from Snapshot: %w", nodeName, err))
|
||||
}
|
||||
|
||||
nodeInfos, err := pl.handle.SnapshotSharedLister().NodeInfos().List()
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(err)
|
||||
}
|
||||
totalNumNodes := len(nodeInfos)
|
||||
|
||||
imageScores := sumImageScores(nodeInfo, pod, totalNumNodes)
|
||||
score := calculatePriority(imageScores, len(pod.Spec.InitContainers)+len(pod.Spec.Containers))
|
||||
|
||||
return score, nil
|
||||
}
|
||||
|
||||
// ScoreExtensions of the Score plugin.
|
||||
func (pl *ImageLocality) ScoreExtensions() framework.ScoreExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, _ runtime.Object, h framework.Handle) (framework.Plugin, error) {
|
||||
return &ImageLocality{handle: h}, nil
|
||||
}
|
||||
|
||||
// calculatePriority returns the priority of a node. Given the sumScores of requested images on the node, the node's
|
||||
// priority is obtained by scaling the maximum priority value with a ratio proportional to the sumScores.
|
||||
func calculatePriority(sumScores int64, numContainers int) int64 {
|
||||
maxThreshold := maxContainerThreshold * int64(numContainers)
|
||||
if sumScores < minThreshold {
|
||||
sumScores = minThreshold
|
||||
} else if sumScores > maxThreshold {
|
||||
sumScores = maxThreshold
|
||||
}
|
||||
|
||||
return framework.MaxNodeScore * (sumScores - minThreshold) / (maxThreshold - minThreshold)
|
||||
}
|
||||
|
||||
// sumImageScores returns the sum of image scores of all the containers that are already on the node.
|
||||
// Each image receives a raw score of its size, scaled by scaledImageScore. The raw scores are later used to calculate
|
||||
// the final score.
|
||||
func sumImageScores(nodeInfo *framework.NodeInfo, pod *v1.Pod, totalNumNodes int) int64 {
|
||||
var sum int64
|
||||
for _, container := range pod.Spec.InitContainers {
|
||||
if state, ok := nodeInfo.ImageStates[normalizedImageName(container.Image)]; ok {
|
||||
sum += scaledImageScore(state, totalNumNodes)
|
||||
}
|
||||
}
|
||||
for _, container := range pod.Spec.Containers {
|
||||
if state, ok := nodeInfo.ImageStates[normalizedImageName(container.Image)]; ok {
|
||||
sum += scaledImageScore(state, totalNumNodes)
|
||||
}
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// scaledImageScore returns an adaptively scaled score for the given state of an image.
|
||||
// The size of the image is used as the base score, scaled by a factor which considers how much nodes the image has "spread" to.
|
||||
// This heuristic aims to mitigate the undesirable "node heating problem", i.e., pods get assigned to the same or
|
||||
// a few nodes due to image locality.
|
||||
func scaledImageScore(imageState *framework.ImageStateSummary, totalNumNodes int) int64 {
|
||||
spread := float64(imageState.NumNodes) / float64(totalNumNodes)
|
||||
return int64(float64(imageState.Size) * spread)
|
||||
}
|
||||
|
||||
// normalizedImageName returns the CRI compliant name for a given image.
|
||||
// TODO: cover the corner cases of missed matches, e.g,
|
||||
// 1. Using Docker as runtime and docker.io/library/test:tag in pod spec, but only test:tag will present in node status
|
||||
// 2. Using the implicit registry, i.e., test:tag or library/test:tag in pod spec but only docker.io/library/test:tag
|
||||
// in node status; note that if users consistently use one registry format, this should not happen.
|
||||
func normalizedImageName(name string) string {
|
||||
if strings.LastIndex(name, ":") <= strings.LastIndex(name, "/") {
|
||||
name = name + ":latest"
|
||||
}
|
||||
return name
|
||||
}
|
386
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity/filtering.go
generated
vendored
Normal file
386
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity/filtering.go
generated
vendored
Normal file
@ -0,0 +1,386 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package interpodaffinity
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
const (
|
||||
// preFilterStateKey is the key in CycleState to InterPodAffinity pre-computed data for Filtering.
|
||||
// Using the name of the plugin will likely help us avoid collisions with other plugins.
|
||||
preFilterStateKey = "PreFilter" + Name
|
||||
|
||||
// ErrReasonExistingAntiAffinityRulesNotMatch is used for ExistingPodsAntiAffinityRulesNotMatch predicate error.
|
||||
ErrReasonExistingAntiAffinityRulesNotMatch = "node(s) didn't satisfy existing pods anti-affinity rules"
|
||||
// ErrReasonAffinityRulesNotMatch is used for PodAffinityRulesNotMatch predicate error.
|
||||
ErrReasonAffinityRulesNotMatch = "node(s) didn't match pod affinity rules"
|
||||
// ErrReasonAntiAffinityRulesNotMatch is used for PodAntiAffinityRulesNotMatch predicate error.
|
||||
ErrReasonAntiAffinityRulesNotMatch = "node(s) didn't match pod anti-affinity rules"
|
||||
)
|
||||
|
||||
// preFilterState computed at PreFilter and used at Filter.
|
||||
type preFilterState struct {
|
||||
// A map of topology pairs to the number of existing pods that has anti-affinity terms that match the "pod".
|
||||
existingAntiAffinityCounts topologyToMatchedTermCount
|
||||
// A map of topology pairs to the number of existing pods that match the affinity terms of the "pod".
|
||||
affinityCounts topologyToMatchedTermCount
|
||||
// A map of topology pairs to the number of existing pods that match the anti-affinity terms of the "pod".
|
||||
antiAffinityCounts topologyToMatchedTermCount
|
||||
// podInfo of the incoming pod.
|
||||
podInfo *framework.PodInfo
|
||||
// A copy of the incoming pod's namespace labels.
|
||||
namespaceLabels labels.Set
|
||||
}
|
||||
|
||||
// Clone the prefilter state.
|
||||
func (s *preFilterState) Clone() framework.StateData {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
copy := preFilterState{}
|
||||
copy.affinityCounts = s.affinityCounts.clone()
|
||||
copy.antiAffinityCounts = s.antiAffinityCounts.clone()
|
||||
copy.existingAntiAffinityCounts = s.existingAntiAffinityCounts.clone()
|
||||
// No need to deep copy the podInfo because it shouldn't change.
|
||||
copy.podInfo = s.podInfo
|
||||
copy.namespaceLabels = s.namespaceLabels
|
||||
return ©
|
||||
}
|
||||
|
||||
// updateWithPod updates the preFilterState counters with the (anti)affinity matches for the given podInfo.
|
||||
func (s *preFilterState) updateWithPod(pInfo *framework.PodInfo, node *v1.Node, multiplier int64) {
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
|
||||
s.existingAntiAffinityCounts.updateWithAntiAffinityTerms(pInfo.RequiredAntiAffinityTerms, s.podInfo.Pod, s.namespaceLabels, node, multiplier)
|
||||
s.affinityCounts.updateWithAffinityTerms(s.podInfo.RequiredAffinityTerms, pInfo.Pod, node, multiplier)
|
||||
// The incoming pod's terms have the namespaceSelector merged into the namespaces, and so
|
||||
// here we don't lookup the updated pod's namespace labels, hence passing nil for nsLabels.
|
||||
s.antiAffinityCounts.updateWithAntiAffinityTerms(s.podInfo.RequiredAntiAffinityTerms, pInfo.Pod, nil, node, multiplier)
|
||||
}
|
||||
|
||||
type topologyPair struct {
|
||||
key string
|
||||
value string
|
||||
}
|
||||
type topologyToMatchedTermCount map[topologyPair]int64
|
||||
|
||||
func (m topologyToMatchedTermCount) append(toAppend topologyToMatchedTermCount) {
|
||||
for pair := range toAppend {
|
||||
m[pair] += toAppend[pair]
|
||||
}
|
||||
}
|
||||
|
||||
func (m topologyToMatchedTermCount) clone() topologyToMatchedTermCount {
|
||||
copy := make(topologyToMatchedTermCount, len(m))
|
||||
copy.append(m)
|
||||
return copy
|
||||
}
|
||||
|
||||
func (m topologyToMatchedTermCount) update(node *v1.Node, tk string, value int64) {
|
||||
if tv, ok := node.Labels[tk]; ok {
|
||||
pair := topologyPair{key: tk, value: tv}
|
||||
m[pair] += value
|
||||
// value could be negative, hence we delete the entry if it is down to zero.
|
||||
if m[pair] == 0 {
|
||||
delete(m, pair)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// updates the topologyToMatchedTermCount map with the specified value
|
||||
// for each affinity term if "targetPod" matches ALL terms.
|
||||
func (m topologyToMatchedTermCount) updateWithAffinityTerms(
|
||||
terms []framework.AffinityTerm, pod *v1.Pod, node *v1.Node, value int64) {
|
||||
if podMatchesAllAffinityTerms(terms, pod) {
|
||||
for _, t := range terms {
|
||||
m.update(node, t.TopologyKey, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// updates the topologyToMatchedTermCount map with the specified value
|
||||
// for each anti-affinity term matched the target pod.
|
||||
func (m topologyToMatchedTermCount) updateWithAntiAffinityTerms(terms []framework.AffinityTerm, pod *v1.Pod, nsLabels labels.Set, node *v1.Node, value int64) {
|
||||
// Check anti-affinity terms.
|
||||
for _, t := range terms {
|
||||
if t.Matches(pod, nsLabels) {
|
||||
m.update(node, t.TopologyKey, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// returns true IFF the given pod matches all the given terms.
|
||||
func podMatchesAllAffinityTerms(terms []framework.AffinityTerm, pod *v1.Pod) bool {
|
||||
if len(terms) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, t := range terms {
|
||||
// The incoming pod NamespaceSelector was merged into the Namespaces set, and so
|
||||
// we are not explicitly passing in namespace labels.
|
||||
if !t.Matches(pod, nil) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// calculates the following for each existing pod on each node:
|
||||
// 1. Whether it has PodAntiAffinity
|
||||
// 2. Whether any AntiAffinityTerm matches the incoming pod
|
||||
func (pl *InterPodAffinity) getExistingAntiAffinityCounts(ctx context.Context, pod *v1.Pod, nsLabels labels.Set, nodes []*framework.NodeInfo) topologyToMatchedTermCount {
|
||||
topoMaps := make([]topologyToMatchedTermCount, len(nodes))
|
||||
index := int32(-1)
|
||||
processNode := func(i int) {
|
||||
nodeInfo := nodes[i]
|
||||
node := nodeInfo.Node()
|
||||
|
||||
topoMap := make(topologyToMatchedTermCount)
|
||||
for _, existingPod := range nodeInfo.PodsWithRequiredAntiAffinity {
|
||||
topoMap.updateWithAntiAffinityTerms(existingPod.RequiredAntiAffinityTerms, pod, nsLabels, node, 1)
|
||||
}
|
||||
if len(topoMap) != 0 {
|
||||
topoMaps[atomic.AddInt32(&index, 1)] = topoMap
|
||||
}
|
||||
}
|
||||
pl.parallelizer.Until(ctx, len(nodes), processNode, pl.Name())
|
||||
|
||||
result := make(topologyToMatchedTermCount)
|
||||
for i := 0; i <= int(index); i++ {
|
||||
result.append(topoMaps[i])
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// finds existing Pods that match affinity terms of the incoming pod's (anti)affinity terms.
|
||||
// It returns a topologyToMatchedTermCount that are checked later by the affinity
|
||||
// predicate. With this topologyToMatchedTermCount available, the affinity predicate does not
|
||||
// need to check all the pods in the cluster.
|
||||
func (pl *InterPodAffinity) getIncomingAffinityAntiAffinityCounts(ctx context.Context, podInfo *framework.PodInfo, allNodes []*framework.NodeInfo) (topologyToMatchedTermCount, topologyToMatchedTermCount) {
|
||||
affinityCounts := make(topologyToMatchedTermCount)
|
||||
antiAffinityCounts := make(topologyToMatchedTermCount)
|
||||
if len(podInfo.RequiredAffinityTerms) == 0 && len(podInfo.RequiredAntiAffinityTerms) == 0 {
|
||||
return affinityCounts, antiAffinityCounts
|
||||
}
|
||||
|
||||
affinityCountsList := make([]topologyToMatchedTermCount, len(allNodes))
|
||||
antiAffinityCountsList := make([]topologyToMatchedTermCount, len(allNodes))
|
||||
index := int32(-1)
|
||||
processNode := func(i int) {
|
||||
nodeInfo := allNodes[i]
|
||||
node := nodeInfo.Node()
|
||||
|
||||
affinity := make(topologyToMatchedTermCount)
|
||||
antiAffinity := make(topologyToMatchedTermCount)
|
||||
for _, existingPod := range nodeInfo.Pods {
|
||||
affinity.updateWithAffinityTerms(podInfo.RequiredAffinityTerms, existingPod.Pod, node, 1)
|
||||
// The incoming pod's terms have the namespaceSelector merged into the namespaces, and so
|
||||
// here we don't lookup the existing pod's namespace labels, hence passing nil for nsLabels.
|
||||
antiAffinity.updateWithAntiAffinityTerms(podInfo.RequiredAntiAffinityTerms, existingPod.Pod, nil, node, 1)
|
||||
}
|
||||
|
||||
if len(affinity) > 0 || len(antiAffinity) > 0 {
|
||||
k := atomic.AddInt32(&index, 1)
|
||||
affinityCountsList[k] = affinity
|
||||
antiAffinityCountsList[k] = antiAffinity
|
||||
}
|
||||
}
|
||||
pl.parallelizer.Until(ctx, len(allNodes), processNode, pl.Name())
|
||||
|
||||
for i := 0; i <= int(index); i++ {
|
||||
affinityCounts.append(affinityCountsList[i])
|
||||
antiAffinityCounts.append(antiAffinityCountsList[i])
|
||||
}
|
||||
|
||||
return affinityCounts, antiAffinityCounts
|
||||
}
|
||||
|
||||
// PreFilter invoked at the prefilter extension point.
|
||||
func (pl *InterPodAffinity) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
var allNodes []*framework.NodeInfo
|
||||
var nodesWithRequiredAntiAffinityPods []*framework.NodeInfo
|
||||
var err error
|
||||
if allNodes, err = pl.sharedLister.NodeInfos().List(); err != nil {
|
||||
return nil, framework.AsStatus(fmt.Errorf("failed to list NodeInfos: %w", err))
|
||||
}
|
||||
if nodesWithRequiredAntiAffinityPods, err = pl.sharedLister.NodeInfos().HavePodsWithRequiredAntiAffinityList(); err != nil {
|
||||
return nil, framework.AsStatus(fmt.Errorf("failed to list NodeInfos with pods with affinity: %w", err))
|
||||
}
|
||||
|
||||
s := &preFilterState{}
|
||||
|
||||
if s.podInfo, err = framework.NewPodInfo(pod); err != nil {
|
||||
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, fmt.Sprintf("parsing pod: %+v", err))
|
||||
}
|
||||
|
||||
for i := range s.podInfo.RequiredAffinityTerms {
|
||||
if err := pl.mergeAffinityTermNamespacesIfNotEmpty(&s.podInfo.RequiredAffinityTerms[i]); err != nil {
|
||||
return nil, framework.AsStatus(err)
|
||||
}
|
||||
}
|
||||
for i := range s.podInfo.RequiredAntiAffinityTerms {
|
||||
if err := pl.mergeAffinityTermNamespacesIfNotEmpty(&s.podInfo.RequiredAntiAffinityTerms[i]); err != nil {
|
||||
return nil, framework.AsStatus(err)
|
||||
}
|
||||
}
|
||||
logger := klog.FromContext(ctx)
|
||||
s.namespaceLabels = GetNamespaceLabelsSnapshot(logger, pod.Namespace, pl.nsLister)
|
||||
|
||||
s.existingAntiAffinityCounts = pl.getExistingAntiAffinityCounts(ctx, pod, s.namespaceLabels, nodesWithRequiredAntiAffinityPods)
|
||||
s.affinityCounts, s.antiAffinityCounts = pl.getIncomingAffinityAntiAffinityCounts(ctx, s.podInfo, allNodes)
|
||||
|
||||
if len(s.existingAntiAffinityCounts) == 0 && len(s.podInfo.RequiredAffinityTerms) == 0 && len(s.podInfo.RequiredAntiAffinityTerms) == 0 {
|
||||
return nil, framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
cycleState.Write(preFilterStateKey, s)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// PreFilterExtensions returns prefilter extensions, pod add and remove.
|
||||
func (pl *InterPodAffinity) PreFilterExtensions() framework.PreFilterExtensions {
|
||||
return pl
|
||||
}
|
||||
|
||||
// AddPod from pre-computed data in cycleState.
|
||||
func (pl *InterPodAffinity) AddPod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podInfoToAdd *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
state, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
state.updateWithPod(podInfoToAdd, nodeInfo.Node(), 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemovePod from pre-computed data in cycleState.
|
||||
func (pl *InterPodAffinity) RemovePod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podInfoToRemove *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
state, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
state.updateWithPod(podInfoToRemove, nodeInfo.Node(), -1)
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error) {
|
||||
c, err := cycleState.Read(preFilterStateKey)
|
||||
if err != nil {
|
||||
// preFilterState doesn't exist, likely PreFilter wasn't invoked.
|
||||
return nil, fmt.Errorf("error reading %q from cycleState: %w", preFilterStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(*preFilterState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%+v convert to interpodaffinity.state error", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Checks if scheduling the pod onto this node would break any anti-affinity
|
||||
// terms indicated by the existing pods.
|
||||
func satisfyExistingPodsAntiAffinity(state *preFilterState, nodeInfo *framework.NodeInfo) bool {
|
||||
if len(state.existingAntiAffinityCounts) > 0 {
|
||||
// Iterate over topology pairs to get any of the pods being affected by
|
||||
// the scheduled pod anti-affinity terms
|
||||
for topologyKey, topologyValue := range nodeInfo.Node().Labels {
|
||||
tp := topologyPair{key: topologyKey, value: topologyValue}
|
||||
if state.existingAntiAffinityCounts[tp] > 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Checks if the node satisfies the incoming pod's anti-affinity rules.
|
||||
func satisfyPodAntiAffinity(state *preFilterState, nodeInfo *framework.NodeInfo) bool {
|
||||
if len(state.antiAffinityCounts) > 0 {
|
||||
for _, term := range state.podInfo.RequiredAntiAffinityTerms {
|
||||
if topologyValue, ok := nodeInfo.Node().Labels[term.TopologyKey]; ok {
|
||||
tp := topologyPair{key: term.TopologyKey, value: topologyValue}
|
||||
if state.antiAffinityCounts[tp] > 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Checks if the node satisfies the incoming pod's affinity rules.
|
||||
func satisfyPodAffinity(state *preFilterState, nodeInfo *framework.NodeInfo) bool {
|
||||
podsExist := true
|
||||
for _, term := range state.podInfo.RequiredAffinityTerms {
|
||||
if topologyValue, ok := nodeInfo.Node().Labels[term.TopologyKey]; ok {
|
||||
tp := topologyPair{key: term.TopologyKey, value: topologyValue}
|
||||
if state.affinityCounts[tp] <= 0 {
|
||||
podsExist = false
|
||||
}
|
||||
} else {
|
||||
// All topology labels must exist on the node.
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if !podsExist {
|
||||
// This pod may be the first pod in a series that have affinity to themselves. In order
|
||||
// to not leave such pods in pending state forever, we check that if no other pod
|
||||
// in the cluster matches the namespace and selector of this pod, the pod matches
|
||||
// its own terms, and the node has all the requested topologies, then we allow the pod
|
||||
// to pass the affinity check.
|
||||
if len(state.affinityCounts) == 0 && podMatchesAllAffinityTerms(state.podInfo.RequiredAffinityTerms, state.podInfo.Pod) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
// It checks if a pod can be scheduled on the specified node with pod affinity/anti-affinity configuration.
|
||||
func (pl *InterPodAffinity) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
|
||||
state, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
if !satisfyPodAffinity(state, nodeInfo) {
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonAffinityRulesNotMatch)
|
||||
}
|
||||
|
||||
if !satisfyPodAntiAffinity(state, nodeInfo) {
|
||||
return framework.NewStatus(framework.Unschedulable, ErrReasonAntiAffinityRulesNotMatch)
|
||||
}
|
||||
|
||||
if !satisfyExistingPodsAntiAffinity(state, nodeInfo) {
|
||||
return framework.NewStatus(framework.Unschedulable, ErrReasonExistingAntiAffinityRulesNotMatch)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
247
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go
generated
vendored
Normal file
247
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go
generated
vendored
Normal file
@ -0,0 +1,247 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package interpodaffinity
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
listersv1 "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
const Name = names.InterPodAffinity
|
||||
|
||||
var _ framework.PreFilterPlugin = &InterPodAffinity{}
|
||||
var _ framework.FilterPlugin = &InterPodAffinity{}
|
||||
var _ framework.PreScorePlugin = &InterPodAffinity{}
|
||||
var _ framework.ScorePlugin = &InterPodAffinity{}
|
||||
var _ framework.EnqueueExtensions = &InterPodAffinity{}
|
||||
|
||||
// InterPodAffinity is a plugin that checks inter pod affinity
|
||||
type InterPodAffinity struct {
|
||||
parallelizer parallelize.Parallelizer
|
||||
args config.InterPodAffinityArgs
|
||||
sharedLister framework.SharedLister
|
||||
nsLister listersv1.NamespaceLister
|
||||
enableSchedulingQueueHint bool
|
||||
}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *InterPodAffinity) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a failed Pod
|
||||
// schedulable
|
||||
func (pl *InterPodAffinity) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
// A note about UpdateNodeTaint event:
|
||||
// Ideally, it's supposed to register only Add | UpdateNodeLabel because UpdateNodeTaint will never change the result from this plugin.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
nodeActionType := framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint
|
||||
if pl.enableSchedulingQueueHint {
|
||||
// When QueueingHint is enabled, we don't use preCheck and we don't need to register UpdateNodeTaint event.
|
||||
nodeActionType = framework.Add | framework.UpdateNodeLabel
|
||||
}
|
||||
return []framework.ClusterEventWithHint{
|
||||
// All ActionType includes the following events:
|
||||
// - Delete. An unschedulable Pod may fail due to violating an existing Pod's anti-affinity constraints,
|
||||
// deleting an existing Pod may make it schedulable.
|
||||
// - UpdatePodLabel. Updating on an existing Pod's labels (e.g., removal) may make
|
||||
// an unschedulable Pod schedulable.
|
||||
// - Add. An unschedulable Pod may fail due to violating pod-affinity constraints,
|
||||
// adding an assigned Pod may make it schedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Add | framework.UpdatePodLabel | framework.Delete}, QueueingHintFn: pl.isSchedulableAfterPodChange},
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, plArgs runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
if h.SnapshotSharedLister() == nil {
|
||||
return nil, fmt.Errorf("SnapshotSharedlister is nil")
|
||||
}
|
||||
args, err := getArgs(plArgs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := validation.ValidateInterPodAffinityArgs(nil, &args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pl := &InterPodAffinity{
|
||||
parallelizer: h.Parallelizer(),
|
||||
args: args,
|
||||
sharedLister: h.SnapshotSharedLister(),
|
||||
nsLister: h.SharedInformerFactory().Core().V1().Namespaces().Lister(),
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
}
|
||||
|
||||
return pl, nil
|
||||
}
|
||||
|
||||
func getArgs(obj runtime.Object) (config.InterPodAffinityArgs, error) {
|
||||
ptr, ok := obj.(*config.InterPodAffinityArgs)
|
||||
if !ok {
|
||||
return config.InterPodAffinityArgs{}, fmt.Errorf("want args to be of type InterPodAffinityArgs, got %T", obj)
|
||||
}
|
||||
return *ptr, nil
|
||||
}
|
||||
|
||||
// Updates Namespaces with the set of namespaces identified by NamespaceSelector.
|
||||
// If successful, NamespaceSelector is set to nil.
|
||||
// The assumption is that the term is for an incoming pod, in which case
|
||||
// namespaceSelector is either unrolled into Namespaces (and so the selector
|
||||
// is set to Nothing()) or is Empty(), which means match everything. Therefore,
|
||||
// there when matching against this term, there is no need to lookup the existing
|
||||
// pod's namespace labels to match them against term's namespaceSelector explicitly.
|
||||
func (pl *InterPodAffinity) mergeAffinityTermNamespacesIfNotEmpty(at *framework.AffinityTerm) error {
|
||||
if at.NamespaceSelector.Empty() {
|
||||
return nil
|
||||
}
|
||||
ns, err := pl.nsLister.List(at.NamespaceSelector)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, n := range ns {
|
||||
at.Namespaces.Insert(n.Name)
|
||||
}
|
||||
at.NamespaceSelector = labels.Nothing()
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetNamespaceLabelsSnapshot returns a snapshot of the labels associated with
|
||||
// the namespace.
|
||||
func GetNamespaceLabelsSnapshot(logger klog.Logger, ns string, nsLister listersv1.NamespaceLister) (nsLabels labels.Set) {
|
||||
podNS, err := nsLister.Get(ns)
|
||||
if err == nil {
|
||||
// Create and return snapshot of the labels.
|
||||
return labels.Merge(podNS.Labels, nil)
|
||||
}
|
||||
logger.V(3).Info("getting namespace, assuming empty set of namespace labels", "namespace", ns, "err", err)
|
||||
return
|
||||
}
|
||||
|
||||
func (pl *InterPodAffinity) isSchedulableAfterPodChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalPod, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
if (modifiedPod != nil && modifiedPod.Spec.NodeName == "") || (originalPod != nil && originalPod.Spec.NodeName == "") {
|
||||
logger.V(5).Info("the added/updated/deleted pod is unscheduled, so it doesn't make the target pod schedulable",
|
||||
"pod", klog.KObj(pod), "originalPod", klog.KObj(originalPod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
terms, err := framework.GetAffinityTerms(pod, framework.GetPodAffinityTerms(pod.Spec.Affinity))
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
antiTerms, err := framework.GetAffinityTerms(pod, framework.GetPodAntiAffinityTerms(pod.Spec.Affinity))
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
// Pod is updated. Return Queue when the updated pod matching the target pod's affinity or not matching anti-affinity.
|
||||
// Note that, we don't need to check each affinity individually when the Pod has more than one affinity
|
||||
// because the current PodAffinity looks for a **single** existing pod that can satisfy **all** the terms of inter-pod affinity of an incoming pod.
|
||||
if modifiedPod != nil && originalPod != nil {
|
||||
if !podMatchesAllAffinityTerms(terms, originalPod) && podMatchesAllAffinityTerms(terms, modifiedPod) {
|
||||
logger.V(5).Info("a scheduled pod was updated to match the target pod's affinity, and the pod may be schedulable now",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
if podMatchesAllAffinityTerms(antiTerms, originalPod) && !podMatchesAllAffinityTerms(antiTerms, modifiedPod) {
|
||||
logger.V(5).Info("a scheduled pod was updated not to match the target pod's anti affinity, and the pod may be schedulable now",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
logger.V(5).Info("a scheduled pod was updated but it doesn't match the target pod's affinity or does match the target pod's anti-affinity",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Pod is added. Return Queue when the added pod matching the target pod's affinity.
|
||||
if modifiedPod != nil {
|
||||
if podMatchesAllAffinityTerms(terms, modifiedPod) {
|
||||
logger.V(5).Info("a scheduled pod was added and it matches the target pod's affinity",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
logger.V(5).Info("a scheduled pod was added and it doesn't match the target pod's affinity",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Pod is deleted. Return Queue when the deleted pod matching the target pod's anti-affinity.
|
||||
if !podMatchesAllAffinityTerms(antiTerms, originalPod) {
|
||||
logger.V(5).Info("a scheduled pod was deleted but it doesn't match the target pod's anti-affinity",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
logger.V(5).Info("a scheduled pod was deleted and it matches the target pod's anti-affinity. The pod may be schedulable now",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
func (pl *InterPodAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
terms, err := framework.GetAffinityTerms(pod, framework.GetPodAffinityTerms(pod.Spec.Affinity))
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
for _, term := range terms {
|
||||
if _, ok := modifiedNode.Labels[term.TopologyKey]; ok {
|
||||
logger.V(5).Info("a node with matched pod affinity topologyKey was added/updated and it may make pod schedulable",
|
||||
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, err
|
||||
}
|
||||
}
|
||||
|
||||
antiTerms, err := framework.GetAffinityTerms(pod, framework.GetPodAntiAffinityTerms(pod.Spec.Affinity))
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
for _, term := range antiTerms {
|
||||
if _, ok := modifiedNode.Labels[term.TopologyKey]; ok {
|
||||
logger.V(5).Info("a node with matched pod anti-affinity topologyKey was added/updated and it may make pod schedulable",
|
||||
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, err
|
||||
}
|
||||
}
|
||||
logger.V(5).Info("a node is added/updated but doesn't have any topologyKey which matches pod affinity/anti-affinity",
|
||||
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
302
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity/scoring.go
generated
vendored
Normal file
302
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity/scoring.go
generated
vendored
Normal file
@ -0,0 +1,302 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package interpodaffinity
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"sync/atomic"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// preScoreStateKey is the key in CycleState to InterPodAffinity pre-computed data for Scoring.
|
||||
const preScoreStateKey = "PreScore" + Name
|
||||
|
||||
type scoreMap map[string]map[string]int64
|
||||
|
||||
// preScoreState computed at PreScore and used at Score.
|
||||
type preScoreState struct {
|
||||
topologyScore scoreMap
|
||||
podInfo *framework.PodInfo
|
||||
// A copy of the incoming pod's namespace labels.
|
||||
namespaceLabels labels.Set
|
||||
}
|
||||
|
||||
// Clone implements the mandatory Clone interface. We don't really copy the data since
|
||||
// there is no need for that.
|
||||
func (s *preScoreState) Clone() framework.StateData {
|
||||
return s
|
||||
}
|
||||
|
||||
func (m scoreMap) processTerm(term *framework.AffinityTerm, weight int32, pod *v1.Pod, nsLabels labels.Set, node *v1.Node, multiplier int32) {
|
||||
if term.Matches(pod, nsLabels) {
|
||||
if tpValue, tpValueExist := node.Labels[term.TopologyKey]; tpValueExist {
|
||||
if m[term.TopologyKey] == nil {
|
||||
m[term.TopologyKey] = make(map[string]int64)
|
||||
}
|
||||
m[term.TopologyKey][tpValue] += int64(weight * multiplier)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m scoreMap) processTerms(terms []framework.WeightedAffinityTerm, pod *v1.Pod, nsLabels labels.Set, node *v1.Node, multiplier int32) {
|
||||
for _, term := range terms {
|
||||
m.processTerm(&term.AffinityTerm, term.Weight, pod, nsLabels, node, multiplier)
|
||||
}
|
||||
}
|
||||
|
||||
func (m scoreMap) append(other scoreMap) {
|
||||
for topology, oScores := range other {
|
||||
scores := m[topology]
|
||||
if scores == nil {
|
||||
m[topology] = oScores
|
||||
continue
|
||||
}
|
||||
for k, v := range oScores {
|
||||
scores[k] += v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (pl *InterPodAffinity) processExistingPod(
|
||||
state *preScoreState,
|
||||
existingPod *framework.PodInfo,
|
||||
existingPodNodeInfo *framework.NodeInfo,
|
||||
incomingPod *v1.Pod,
|
||||
topoScore scoreMap,
|
||||
) {
|
||||
existingPodNode := existingPodNodeInfo.Node()
|
||||
if len(existingPodNode.Labels) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// For every soft pod affinity term of <pod>, if <existingPod> matches the term,
|
||||
// increment <p.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||
// value as that of <existingPods>`s node by the term`s weight.
|
||||
// Note that the incoming pod's terms have the namespaceSelector merged into the namespaces, and so
|
||||
// here we don't lookup the existing pod's namespace labels, hence passing nil for nsLabels.
|
||||
topoScore.processTerms(state.podInfo.PreferredAffinityTerms, existingPod.Pod, nil, existingPodNode, 1)
|
||||
|
||||
// For every soft pod anti-affinity term of <pod>, if <existingPod> matches the term,
|
||||
// decrement <p.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||
// value as that of <existingPod>`s node by the term`s weight.
|
||||
// Note that the incoming pod's terms have the namespaceSelector merged into the namespaces, and so
|
||||
// here we don't lookup the existing pod's namespace labels, hence passing nil for nsLabels.
|
||||
topoScore.processTerms(state.podInfo.PreferredAntiAffinityTerms, existingPod.Pod, nil, existingPodNode, -1)
|
||||
|
||||
// For every hard pod affinity term of <existingPod>, if <pod> matches the term,
|
||||
// increment <p.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||
// value as that of <existingPod>'s node by the constant <args.hardPodAffinityWeight>
|
||||
if pl.args.HardPodAffinityWeight > 0 && len(existingPodNode.Labels) != 0 {
|
||||
for _, t := range existingPod.RequiredAffinityTerms {
|
||||
topoScore.processTerm(&t, pl.args.HardPodAffinityWeight, incomingPod, state.namespaceLabels, existingPodNode, 1)
|
||||
}
|
||||
}
|
||||
|
||||
// For every soft pod affinity term of <existingPod>, if <pod> matches the term,
|
||||
// increment <p.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||
// value as that of <existingPod>'s node by the term's weight.
|
||||
topoScore.processTerms(existingPod.PreferredAffinityTerms, incomingPod, state.namespaceLabels, existingPodNode, 1)
|
||||
|
||||
// For every soft pod anti-affinity term of <existingPod>, if <pod> matches the term,
|
||||
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||
// value as that of <existingPod>'s node by the term's weight.
|
||||
topoScore.processTerms(existingPod.PreferredAntiAffinityTerms, incomingPod, state.namespaceLabels, existingPodNode, -1)
|
||||
}
|
||||
|
||||
// PreScore builds and writes cycle state used by Score and NormalizeScore.
|
||||
func (pl *InterPodAffinity) PreScore(
|
||||
pCtx context.Context,
|
||||
cycleState *framework.CycleState,
|
||||
pod *v1.Pod,
|
||||
nodes []*framework.NodeInfo,
|
||||
) *framework.Status {
|
||||
if len(nodes) == 0 {
|
||||
// No nodes to score.
|
||||
return framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
if pl.sharedLister == nil {
|
||||
return framework.NewStatus(framework.Error, "empty shared lister in InterPodAffinity PreScore")
|
||||
}
|
||||
|
||||
affinity := pod.Spec.Affinity
|
||||
hasPreferredAffinityConstraints := affinity != nil && affinity.PodAffinity != nil && len(affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution) > 0
|
||||
hasPreferredAntiAffinityConstraints := affinity != nil && affinity.PodAntiAffinity != nil && len(affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution) > 0
|
||||
hasConstraints := hasPreferredAffinityConstraints || hasPreferredAntiAffinityConstraints
|
||||
|
||||
// Optionally ignore calculating preferences of existing pods' affinity rules
|
||||
// if the incoming pod has no inter-pod affinities.
|
||||
if pl.args.IgnorePreferredTermsOfExistingPods && !hasConstraints {
|
||||
return framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
// Unless the pod being scheduled has preferred affinity terms, we only
|
||||
// need to process nodes hosting pods with affinity.
|
||||
var allNodes []*framework.NodeInfo
|
||||
var err error
|
||||
if hasConstraints {
|
||||
allNodes, err = pl.sharedLister.NodeInfos().List()
|
||||
if err != nil {
|
||||
return framework.AsStatus(fmt.Errorf("failed to get all nodes from shared lister: %w", err))
|
||||
}
|
||||
} else {
|
||||
allNodes, err = pl.sharedLister.NodeInfos().HavePodsWithAffinityList()
|
||||
if err != nil {
|
||||
return framework.AsStatus(fmt.Errorf("failed to get pods with affinity list: %w", err))
|
||||
}
|
||||
}
|
||||
|
||||
state := &preScoreState{
|
||||
topologyScore: make(map[string]map[string]int64),
|
||||
}
|
||||
|
||||
if state.podInfo, err = framework.NewPodInfo(pod); err != nil {
|
||||
// Ideally we never reach here, because errors will be caught by PreFilter
|
||||
return framework.AsStatus(fmt.Errorf("failed to parse pod: %w", err))
|
||||
}
|
||||
|
||||
for i := range state.podInfo.PreferredAffinityTerms {
|
||||
if err := pl.mergeAffinityTermNamespacesIfNotEmpty(&state.podInfo.PreferredAffinityTerms[i].AffinityTerm); err != nil {
|
||||
return framework.AsStatus(fmt.Errorf("updating PreferredAffinityTerms: %w", err))
|
||||
}
|
||||
}
|
||||
for i := range state.podInfo.PreferredAntiAffinityTerms {
|
||||
if err := pl.mergeAffinityTermNamespacesIfNotEmpty(&state.podInfo.PreferredAntiAffinityTerms[i].AffinityTerm); err != nil {
|
||||
return framework.AsStatus(fmt.Errorf("updating PreferredAntiAffinityTerms: %w", err))
|
||||
}
|
||||
}
|
||||
logger := klog.FromContext(pCtx)
|
||||
state.namespaceLabels = GetNamespaceLabelsSnapshot(logger, pod.Namespace, pl.nsLister)
|
||||
|
||||
topoScores := make([]scoreMap, len(allNodes))
|
||||
index := int32(-1)
|
||||
processNode := func(i int) {
|
||||
nodeInfo := allNodes[i]
|
||||
|
||||
// Unless the pod being scheduled has preferred affinity terms, we only
|
||||
// need to process pods with affinity in the node.
|
||||
podsToProcess := nodeInfo.PodsWithAffinity
|
||||
if hasConstraints {
|
||||
// We need to process all the pods.
|
||||
podsToProcess = nodeInfo.Pods
|
||||
}
|
||||
|
||||
topoScore := make(scoreMap)
|
||||
for _, existingPod := range podsToProcess {
|
||||
pl.processExistingPod(state, existingPod, nodeInfo, pod, topoScore)
|
||||
}
|
||||
if len(topoScore) > 0 {
|
||||
topoScores[atomic.AddInt32(&index, 1)] = topoScore
|
||||
}
|
||||
}
|
||||
pl.parallelizer.Until(pCtx, len(allNodes), processNode, pl.Name())
|
||||
|
||||
if index == -1 {
|
||||
return framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
for i := 0; i <= int(index); i++ {
|
||||
state.topologyScore.append(topoScores[i])
|
||||
}
|
||||
|
||||
cycleState.Write(preScoreStateKey, state)
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPreScoreState(cycleState *framework.CycleState) (*preScoreState, error) {
|
||||
c, err := cycleState.Read(preScoreStateKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %q from cycleState: %w", preScoreStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(*preScoreState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%+v convert to interpodaffinity.preScoreState error", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Score invoked at the Score extension point.
|
||||
// The "score" returned in this function is the sum of weights got from cycleState which have its topologyKey matching with the node's labels.
|
||||
// it is normalized later.
|
||||
// Note: the returned "score" is positive for pod-affinity, and negative for pod-antiaffinity.
|
||||
func (pl *InterPodAffinity) Score(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
|
||||
nodeInfo, err := pl.sharedLister.NodeInfos().Get(nodeName)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(fmt.Errorf("failed to get node %q from Snapshot: %w", nodeName, err))
|
||||
}
|
||||
node := nodeInfo.Node()
|
||||
|
||||
s, err := getPreScoreState(cycleState)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(err)
|
||||
}
|
||||
var score int64
|
||||
for tpKey, tpValues := range s.topologyScore {
|
||||
if v, exist := node.Labels[tpKey]; exist {
|
||||
score += tpValues[v]
|
||||
}
|
||||
}
|
||||
|
||||
return score, nil
|
||||
}
|
||||
|
||||
// NormalizeScore normalizes the score for each filteredNode.
|
||||
func (pl *InterPodAffinity) NormalizeScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, scores framework.NodeScoreList) *framework.Status {
|
||||
s, err := getPreScoreState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
if len(s.topologyScore) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var minCount int64 = math.MaxInt64
|
||||
var maxCount int64 = math.MinInt64
|
||||
for i := range scores {
|
||||
score := scores[i].Score
|
||||
if score > maxCount {
|
||||
maxCount = score
|
||||
}
|
||||
if score < minCount {
|
||||
minCount = score
|
||||
}
|
||||
}
|
||||
|
||||
maxMinDiff := maxCount - minCount
|
||||
for i := range scores {
|
||||
fScore := float64(0)
|
||||
if maxMinDiff > 0 {
|
||||
fScore = float64(framework.MaxNodeScore) * (float64(scores[i].Score-minCount) / float64(maxMinDiff))
|
||||
}
|
||||
|
||||
scores[i].Score = int64(fScore)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ScoreExtensions of the Score plugin.
|
||||
func (pl *InterPodAffinity) ScoreExtensions() framework.ScoreExtensions {
|
||||
return pl
|
||||
}
|
39
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/names/names.go
generated
vendored
Normal file
39
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/names/names.go
generated
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package names
|
||||
|
||||
const (
|
||||
PrioritySort = "PrioritySort"
|
||||
DefaultBinder = "DefaultBinder"
|
||||
DefaultPreemption = "DefaultPreemption"
|
||||
DynamicResources = "DynamicResources"
|
||||
ImageLocality = "ImageLocality"
|
||||
InterPodAffinity = "InterPodAffinity"
|
||||
NodeAffinity = "NodeAffinity"
|
||||
NodeName = "NodeName"
|
||||
NodePorts = "NodePorts"
|
||||
NodeResourcesBalancedAllocation = "NodeResourcesBalancedAllocation"
|
||||
NodeResourcesFit = "NodeResourcesFit"
|
||||
NodeUnschedulable = "NodeUnschedulable"
|
||||
NodeVolumeLimits = "NodeVolumeLimits"
|
||||
PodTopologySpread = "PodTopologySpread"
|
||||
SchedulingGates = "SchedulingGates"
|
||||
TaintToleration = "TaintToleration"
|
||||
VolumeBinding = "VolumeBinding"
|
||||
VolumeRestrictions = "VolumeRestrictions"
|
||||
VolumeZone = "VolumeZone"
|
||||
)
|
372
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go
generated
vendored
Normal file
372
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go
generated
vendored
Normal file
@ -0,0 +1,372 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodeaffinity
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// NodeAffinity is a plugin that checks if a pod node selector matches the node label.
|
||||
type NodeAffinity struct {
|
||||
handle framework.Handle
|
||||
addedNodeSelector *nodeaffinity.NodeSelector
|
||||
addedPrefSchedTerms *nodeaffinity.PreferredSchedulingTerms
|
||||
enableSchedulingQueueHint bool
|
||||
}
|
||||
|
||||
var _ framework.PreFilterPlugin = &NodeAffinity{}
|
||||
var _ framework.FilterPlugin = &NodeAffinity{}
|
||||
var _ framework.PreScorePlugin = &NodeAffinity{}
|
||||
var _ framework.ScorePlugin = &NodeAffinity{}
|
||||
var _ framework.EnqueueExtensions = &NodeAffinity{}
|
||||
|
||||
const (
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
Name = names.NodeAffinity
|
||||
|
||||
// preScoreStateKey is the key in CycleState to NodeAffinity pre-computed data for Scoring.
|
||||
preScoreStateKey = "PreScore" + Name
|
||||
|
||||
// preFilterStateKey is the key in CycleState to NodeAffinity pre-compute data for Filtering.
|
||||
preFilterStateKey = "PreFilter" + Name
|
||||
|
||||
// ErrReasonPod is the reason for Pod's node affinity/selector not matching.
|
||||
ErrReasonPod = "node(s) didn't match Pod's node affinity/selector"
|
||||
|
||||
// errReasonEnforced is the reason for added node affinity not matching.
|
||||
errReasonEnforced = "node(s) didn't match scheduler-enforced node affinity"
|
||||
|
||||
// errReasonConflict is the reason for pod's conflicting affinity rules.
|
||||
errReasonConflict = "pod affinity terms conflict"
|
||||
)
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *NodeAffinity) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
type preFilterState struct {
|
||||
requiredNodeSelectorAndAffinity nodeaffinity.RequiredNodeAffinity
|
||||
}
|
||||
|
||||
// Clone just returns the same state because it is not affected by pod additions or deletions.
|
||||
func (s *preFilterState) Clone() framework.StateData {
|
||||
return s
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *NodeAffinity) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
// A note about UpdateNodeTaint event:
|
||||
// Ideally, it's supposed to register only Add | UpdateNodeLabel because UpdateNodeTaint will never change the result from this plugin.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
nodeActionType := framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint
|
||||
if pl.enableSchedulingQueueHint {
|
||||
// preCheck is not used when QHint is enabled, and hence we can use UpdateNodeLabel instead of Update.
|
||||
nodeActionType = framework.Add | framework.UpdateNodeLabel
|
||||
}
|
||||
|
||||
return []framework.ClusterEventWithHint{
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterNodeChange is invoked whenever a node changed. It checks whether
|
||||
// that change made a previously unschedulable pod schedulable.
|
||||
func (pl *NodeAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
if pl.addedNodeSelector != nil && !pl.addedNodeSelector.Match(modifiedNode) {
|
||||
logger.V(4).Info("added or modified node didn't match scheduler-enforced node affinity and this event won't make the Pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
requiredNodeAffinity := nodeaffinity.GetRequiredNodeAffinity(pod)
|
||||
isMatched, err := requiredNodeAffinity.Match(modifiedNode)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
if !isMatched {
|
||||
logger.V(5).Info("node was created or updated, but the pod's NodeAffinity doesn't match", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
// Since the node was added and it matches the pod's affinity criteria, we can unblock it.
|
||||
if originalNode == nil {
|
||||
logger.V(5).Info("node was created, and matches with the pod's NodeAffinity", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
// At this point we know the operation is update so we can narrow down the criteria to unmatch -> match changes only
|
||||
// (necessary affinity label was added to the node in this case).
|
||||
wasMatched, err := requiredNodeAffinity.Match(originalNode)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
if wasMatched {
|
||||
logger.V(5).Info("node updated, but the pod's NodeAffinity hasn't changed", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
logger.V(5).Info("node was updated and the pod's NodeAffinity changed to matched", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// PreFilter builds and writes cycle state used by Filter.
|
||||
func (pl *NodeAffinity) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
affinity := pod.Spec.Affinity
|
||||
noNodeAffinity := (affinity == nil ||
|
||||
affinity.NodeAffinity == nil ||
|
||||
affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil)
|
||||
if noNodeAffinity && pl.addedNodeSelector == nil && pod.Spec.NodeSelector == nil {
|
||||
// NodeAffinity Filter has nothing to do with the Pod.
|
||||
return nil, framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
state := &preFilterState{requiredNodeSelectorAndAffinity: nodeaffinity.GetRequiredNodeAffinity(pod)}
|
||||
cycleState.Write(preFilterStateKey, state)
|
||||
|
||||
if noNodeAffinity || len(affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Check if there is affinity to a specific node and return it.
|
||||
terms := affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
|
||||
var nodeNames sets.Set[string]
|
||||
for _, t := range terms {
|
||||
var termNodeNames sets.Set[string]
|
||||
for _, r := range t.MatchFields {
|
||||
if r.Key == metav1.ObjectNameField && r.Operator == v1.NodeSelectorOpIn {
|
||||
// The requirements represent ANDed constraints, and so we need to
|
||||
// find the intersection of nodes.
|
||||
s := sets.New(r.Values...)
|
||||
if termNodeNames == nil {
|
||||
termNodeNames = s
|
||||
} else {
|
||||
termNodeNames = termNodeNames.Intersection(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
if termNodeNames == nil {
|
||||
// If this term has no node.Name field affinity,
|
||||
// then all nodes are eligible because the terms are ORed.
|
||||
return nil, nil
|
||||
}
|
||||
nodeNames = nodeNames.Union(termNodeNames)
|
||||
}
|
||||
// If nodeNames is not nil, but length is 0, it means each term have conflicting affinity to node.Name;
|
||||
// therefore, pod will not match any node.
|
||||
if nodeNames != nil && len(nodeNames) == 0 {
|
||||
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, errReasonConflict)
|
||||
} else if len(nodeNames) > 0 {
|
||||
return &framework.PreFilterResult{NodeNames: nodeNames}, nil
|
||||
}
|
||||
return nil, nil
|
||||
|
||||
}
|
||||
|
||||
// PreFilterExtensions not necessary for this plugin as state doesn't depend on pod additions or deletions.
|
||||
func (pl *NodeAffinity) PreFilterExtensions() framework.PreFilterExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Filter checks if the Node matches the Pod .spec.affinity.nodeAffinity and
|
||||
// the plugin's added affinity.
|
||||
func (pl *NodeAffinity) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
node := nodeInfo.Node()
|
||||
|
||||
if pl.addedNodeSelector != nil && !pl.addedNodeSelector.Match(node) {
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, errReasonEnforced)
|
||||
}
|
||||
|
||||
s, err := getPreFilterState(state)
|
||||
if err != nil {
|
||||
// Fallback to calculate requiredNodeSelector and requiredNodeAffinity
|
||||
// here when PreFilter is disabled.
|
||||
s = &preFilterState{requiredNodeSelectorAndAffinity: nodeaffinity.GetRequiredNodeAffinity(pod)}
|
||||
}
|
||||
|
||||
// Ignore parsing errors for backwards compatibility.
|
||||
match, _ := s.requiredNodeSelectorAndAffinity.Match(node)
|
||||
if !match {
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// preScoreState computed at PreScore and used at Score.
|
||||
type preScoreState struct {
|
||||
preferredNodeAffinity *nodeaffinity.PreferredSchedulingTerms
|
||||
}
|
||||
|
||||
// Clone implements the mandatory Clone interface. We don't really copy the data since
|
||||
// there is no need for that.
|
||||
func (s *preScoreState) Clone() framework.StateData {
|
||||
return s
|
||||
}
|
||||
|
||||
// PreScore builds and writes cycle state used by Score and NormalizeScore.
|
||||
func (pl *NodeAffinity) PreScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodes []*framework.NodeInfo) *framework.Status {
|
||||
if len(nodes) == 0 {
|
||||
return nil
|
||||
}
|
||||
preferredNodeAffinity, err := getPodPreferredNodeAffinity(pod)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
if preferredNodeAffinity == nil && pl.addedPrefSchedTerms == nil {
|
||||
// NodeAffinity Score has nothing to do with the Pod.
|
||||
return framework.NewStatus(framework.Skip)
|
||||
}
|
||||
state := &preScoreState{
|
||||
preferredNodeAffinity: preferredNodeAffinity,
|
||||
}
|
||||
cycleState.Write(preScoreStateKey, state)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Score returns the sum of the weights of the terms that match the Node.
|
||||
// Terms came from the Pod .spec.affinity.nodeAffinity and from the plugin's
|
||||
// default affinity.
|
||||
func (pl *NodeAffinity) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
|
||||
nodeInfo, err := pl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(fmt.Errorf("getting node %q from Snapshot: %w", nodeName, err))
|
||||
}
|
||||
|
||||
node := nodeInfo.Node()
|
||||
|
||||
var count int64
|
||||
if pl.addedPrefSchedTerms != nil {
|
||||
count += pl.addedPrefSchedTerms.Score(node)
|
||||
}
|
||||
|
||||
s, err := getPreScoreState(state)
|
||||
if err != nil {
|
||||
// Fallback to calculate preferredNodeAffinity here when PreScore is disabled.
|
||||
preferredNodeAffinity, err := getPodPreferredNodeAffinity(pod)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(err)
|
||||
}
|
||||
s = &preScoreState{
|
||||
preferredNodeAffinity: preferredNodeAffinity,
|
||||
}
|
||||
}
|
||||
|
||||
if s.preferredNodeAffinity != nil {
|
||||
count += s.preferredNodeAffinity.Score(node)
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// NormalizeScore invoked after scoring all nodes.
|
||||
func (pl *NodeAffinity) NormalizeScore(ctx context.Context, state *framework.CycleState, pod *v1.Pod, scores framework.NodeScoreList) *framework.Status {
|
||||
return helper.DefaultNormalizeScore(framework.MaxNodeScore, false, scores)
|
||||
}
|
||||
|
||||
// ScoreExtensions of the Score plugin.
|
||||
func (pl *NodeAffinity) ScoreExtensions() framework.ScoreExtensions {
|
||||
return pl
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, plArgs runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
args, err := getArgs(plArgs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pl := &NodeAffinity{
|
||||
handle: h,
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
}
|
||||
if args.AddedAffinity != nil {
|
||||
if ns := args.AddedAffinity.RequiredDuringSchedulingIgnoredDuringExecution; ns != nil {
|
||||
pl.addedNodeSelector, err = nodeaffinity.NewNodeSelector(ns)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing addedAffinity.requiredDuringSchedulingIgnoredDuringExecution: %w", err)
|
||||
}
|
||||
}
|
||||
// TODO: parse requiredDuringSchedulingRequiredDuringExecution when it gets added to the API.
|
||||
if terms := args.AddedAffinity.PreferredDuringSchedulingIgnoredDuringExecution; len(terms) != 0 {
|
||||
pl.addedPrefSchedTerms, err = nodeaffinity.NewPreferredSchedulingTerms(terms)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing addedAffinity.preferredDuringSchedulingIgnoredDuringExecution: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return pl, nil
|
||||
}
|
||||
|
||||
func getArgs(obj runtime.Object) (config.NodeAffinityArgs, error) {
|
||||
ptr, ok := obj.(*config.NodeAffinityArgs)
|
||||
if !ok {
|
||||
return config.NodeAffinityArgs{}, fmt.Errorf("args are not of type NodeAffinityArgs, got %T", obj)
|
||||
}
|
||||
return *ptr, validation.ValidateNodeAffinityArgs(nil, ptr)
|
||||
}
|
||||
|
||||
func getPodPreferredNodeAffinity(pod *v1.Pod) (*nodeaffinity.PreferredSchedulingTerms, error) {
|
||||
affinity := pod.Spec.Affinity
|
||||
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
|
||||
return nodeaffinity.NewPreferredSchedulingTerms(affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func getPreScoreState(cycleState *framework.CycleState) (*preScoreState, error) {
|
||||
c, err := cycleState.Read(preScoreStateKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading %q from cycleState: %w", preScoreStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(*preScoreState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid PreScore state, got type %T", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error) {
|
||||
c, err := cycleState.Read(preFilterStateKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading %q from cycleState: %v", preFilterStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(*preFilterState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid PreFilter state, got type %T", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
89
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename/node_name.go
generated
vendored
Normal file
89
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename/node_name.go
generated
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodename
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
)
|
||||
|
||||
// NodeName is a plugin that checks if a pod spec node name matches the current node.
|
||||
type NodeName struct {
|
||||
enableSchedulingQueueHint bool
|
||||
}
|
||||
|
||||
var _ framework.FilterPlugin = &NodeName{}
|
||||
var _ framework.EnqueueExtensions = &NodeName{}
|
||||
|
||||
const (
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
Name = names.NodeName
|
||||
|
||||
// ErrReason returned when node name doesn't match.
|
||||
ErrReason = "node(s) didn't match the requested node name"
|
||||
)
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *NodeName) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
// A note about UpdateNodeTaint/UpdateNodeLabel event:
|
||||
// Ideally, it's supposed to register only Add because any Node update event will never change the result from this plugin.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
nodeActionType := framework.Add | framework.UpdateNodeTaint | framework.UpdateNodeLabel
|
||||
if pl.enableSchedulingQueueHint {
|
||||
// preCheck is not used when QHint is enabled, and hence Update event isn't necessary.
|
||||
nodeActionType = framework.Add
|
||||
}
|
||||
|
||||
return []framework.ClusterEventWithHint{
|
||||
// We don't need the QueueingHintFn here because the scheduling of Pods will be always retried with backoff when this Event happens.
|
||||
// (the same as Queue)
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *NodeName) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
func (pl *NodeName) Filter(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
|
||||
if !Fits(pod, nodeInfo) {
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Fits actually checks if the pod fits the node.
|
||||
func Fits(pod *v1.Pod, nodeInfo *framework.NodeInfo) bool {
|
||||
return len(pod.Spec.NodeName) == 0 || pod.Spec.NodeName == nodeInfo.Node().Name
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, _ runtime.Object, _ framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
return &NodeName{
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
}, nil
|
||||
}
|
215
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports/node_ports.go
generated
vendored
Normal file
215
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports/node_ports.go
generated
vendored
Normal file
@ -0,0 +1,215 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodeports
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// NodePorts is a plugin that checks if a node has free ports for the requested pod ports.
|
||||
type NodePorts struct {
|
||||
enableSchedulingQueueHint bool
|
||||
}
|
||||
|
||||
var _ framework.PreFilterPlugin = &NodePorts{}
|
||||
var _ framework.FilterPlugin = &NodePorts{}
|
||||
var _ framework.EnqueueExtensions = &NodePorts{}
|
||||
|
||||
const (
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
Name = names.NodePorts
|
||||
|
||||
// preFilterStateKey is the key in CycleState to NodePorts pre-computed data.
|
||||
// Using the name of the plugin will likely help us avoid collisions with other plugins.
|
||||
preFilterStateKey = "PreFilter" + Name
|
||||
|
||||
// ErrReason when node ports aren't available.
|
||||
ErrReason = "node(s) didn't have free ports for the requested pod ports"
|
||||
)
|
||||
|
||||
type preFilterState []*v1.ContainerPort
|
||||
|
||||
// Clone the prefilter state.
|
||||
func (s preFilterState) Clone() framework.StateData {
|
||||
// The state is not impacted by adding/removing existing pods, hence we don't need to make a deep copy.
|
||||
return s
|
||||
}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *NodePorts) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// getContainerPorts returns the used host ports of Pods: if 'port' was used, a 'port:true' pair
|
||||
// will be in the result; but it does not resolve port conflict.
|
||||
func getContainerPorts(pods ...*v1.Pod) []*v1.ContainerPort {
|
||||
ports := []*v1.ContainerPort{}
|
||||
for _, pod := range pods {
|
||||
for j := range pod.Spec.Containers {
|
||||
container := &pod.Spec.Containers[j]
|
||||
for k := range container.Ports {
|
||||
// Only return ports with a host port specified.
|
||||
if container.Ports[k].HostPort <= 0 {
|
||||
continue
|
||||
}
|
||||
ports = append(ports, &container.Ports[k])
|
||||
}
|
||||
}
|
||||
}
|
||||
return ports
|
||||
}
|
||||
|
||||
// PreFilter invoked at the prefilter extension point.
|
||||
func (pl *NodePorts) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
s := getContainerPorts(pod)
|
||||
// Skip if a pod has no ports.
|
||||
if len(s) == 0 {
|
||||
return nil, framework.NewStatus(framework.Skip)
|
||||
}
|
||||
cycleState.Write(preFilterStateKey, preFilterState(s))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// PreFilterExtensions do not exist for this plugin.
|
||||
func (pl *NodePorts) PreFilterExtensions() framework.PreFilterExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPreFilterState(cycleState *framework.CycleState) (preFilterState, error) {
|
||||
c, err := cycleState.Read(preFilterStateKey)
|
||||
if err != nil {
|
||||
// preFilterState doesn't exist, likely PreFilter wasn't invoked.
|
||||
return nil, fmt.Errorf("reading %q from cycleState: %w", preFilterStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(preFilterState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%+v convert to nodeports.preFilterState error", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *NodePorts) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
// A note about UpdateNodeTaint/UpdateNodeLabel event:
|
||||
// Ideally, it's supposed to register only Add because NodeUpdated event never means to have any free ports for the Pod.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
nodeActionType := framework.Add | framework.UpdateNodeTaint | framework.UpdateNodeLabel
|
||||
if pl.enableSchedulingQueueHint {
|
||||
// preCheck is not used when QHint is enabled, and hence Update event isn't necessary.
|
||||
nodeActionType = framework.Add
|
||||
}
|
||||
|
||||
return []framework.ClusterEventWithHint{
|
||||
// Due to immutable fields `spec.containers[*].ports`, pod update events are ignored.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Delete}, QueueingHintFn: pl.isSchedulableAfterPodDeleted},
|
||||
// We don't need the QueueingHintFn here because the scheduling of Pods will be always retried with backoff when this Event happens.
|
||||
// (the same as Queue)
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPodDeleted is invoked whenever a pod deleted. It checks whether
|
||||
// that change made a previously unschedulable pod schedulable.
|
||||
func (pl *NodePorts) isSchedulableAfterPodDeleted(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
deletedPod, _, err := util.As[*v1.Pod](oldObj, nil)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
// If the deleted pod is unscheduled, it doesn't make the target pod schedulable.
|
||||
if deletedPod.Spec.NodeName == "" {
|
||||
logger.V(4).Info("the deleted pod is unscheduled and it doesn't make the target pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Get the used host ports of the deleted pod.
|
||||
usedPorts := make(framework.HostPortInfo)
|
||||
for _, container := range deletedPod.Spec.Containers {
|
||||
for _, podPort := range container.Ports {
|
||||
if podPort.HostPort > 0 {
|
||||
usedPorts.Add(podPort.HostIP, string(podPort.Protocol), podPort.HostPort)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the deleted pod doesn't use any host ports, it doesn't make the target pod schedulable.
|
||||
if len(usedPorts) == 0 {
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Construct a fake NodeInfo that only has the deleted Pod.
|
||||
// If we can schedule `pod` to this fake node, it means that `pod` and the deleted pod don't have any common port(s).
|
||||
// So, deleting that pod couldn't make `pod` schedulable.
|
||||
nodeInfo := framework.NodeInfo{UsedPorts: usedPorts}
|
||||
if Fits(pod, &nodeInfo) {
|
||||
logger.V(4).Info("the deleted pod and the target pod don't have any common port(s), returning QueueSkip as deleting this Pod won't make the Pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
logger.V(4).Info("the deleted pod and the target pod have any common port(s), returning Queue as deleting this Pod may make the Pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
func (pl *NodePorts) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
wantPorts, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
fits := fitsPorts(wantPorts, nodeInfo)
|
||||
if !fits {
|
||||
return framework.NewStatus(framework.Unschedulable, ErrReason)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Fits checks if the pod fits the node.
|
||||
func Fits(pod *v1.Pod, nodeInfo *framework.NodeInfo) bool {
|
||||
return fitsPorts(getContainerPorts(pod), nodeInfo)
|
||||
}
|
||||
|
||||
func fitsPorts(wantPorts []*v1.ContainerPort, nodeInfo *framework.NodeInfo) bool {
|
||||
// try to see whether existingPorts and wantPorts will conflict or not
|
||||
existingPorts := nodeInfo.UsedPorts
|
||||
for _, cp := range wantPorts {
|
||||
if existingPorts.CheckConflict(cp.HostIP, string(cp.Protocol), cp.HostPort) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, _ runtime.Object, _ framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
return &NodePorts{
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
}, nil
|
||||
}
|
173
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/balanced_allocation.go
generated
vendored
Normal file
173
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/balanced_allocation.go
generated
vendored
Normal file
@ -0,0 +1,173 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package noderesources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
)
|
||||
|
||||
// BalancedAllocation is a score plugin that calculates the difference between the cpu and memory fraction
|
||||
// of capacity, and prioritizes the host based on how close the two metrics are to each other.
|
||||
type BalancedAllocation struct {
|
||||
handle framework.Handle
|
||||
resourceAllocationScorer
|
||||
}
|
||||
|
||||
var _ framework.PreScorePlugin = &BalancedAllocation{}
|
||||
var _ framework.ScorePlugin = &BalancedAllocation{}
|
||||
|
||||
// BalancedAllocationName is the name of the plugin used in the plugin registry and configurations.
|
||||
const (
|
||||
BalancedAllocationName = names.NodeResourcesBalancedAllocation
|
||||
|
||||
// balancedAllocationPreScoreStateKey is the key in CycleState to NodeResourcesBalancedAllocation pre-computed data for Scoring.
|
||||
balancedAllocationPreScoreStateKey = "PreScore" + BalancedAllocationName
|
||||
)
|
||||
|
||||
// balancedAllocationPreScoreState computed at PreScore and used at Score.
|
||||
type balancedAllocationPreScoreState struct {
|
||||
// podRequests have the same order of the resources defined in NodeResourcesFitArgs.Resources,
|
||||
// same for other place we store a list like that.
|
||||
podRequests []int64
|
||||
}
|
||||
|
||||
// Clone implements the mandatory Clone interface. We don't really copy the data since
|
||||
// there is no need for that.
|
||||
func (s *balancedAllocationPreScoreState) Clone() framework.StateData {
|
||||
return s
|
||||
}
|
||||
|
||||
// PreScore calculates incoming pod's resource requests and writes them to the cycle state used.
|
||||
func (ba *BalancedAllocation) PreScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodes []*framework.NodeInfo) *framework.Status {
|
||||
state := &balancedAllocationPreScoreState{
|
||||
podRequests: ba.calculatePodResourceRequestList(pod, ba.resources),
|
||||
}
|
||||
cycleState.Write(balancedAllocationPreScoreStateKey, state)
|
||||
return nil
|
||||
}
|
||||
|
||||
func getBalancedAllocationPreScoreState(cycleState *framework.CycleState) (*balancedAllocationPreScoreState, error) {
|
||||
c, err := cycleState.Read(balancedAllocationPreScoreStateKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading %q from cycleState: %w", balancedAllocationPreScoreStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(*balancedAllocationPreScoreState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid PreScore state, got type %T", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (ba *BalancedAllocation) Name() string {
|
||||
return BalancedAllocationName
|
||||
}
|
||||
|
||||
// Score invoked at the score extension point.
|
||||
func (ba *BalancedAllocation) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
|
||||
nodeInfo, err := ba.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(fmt.Errorf("getting node %q from Snapshot: %w", nodeName, err))
|
||||
}
|
||||
|
||||
s, err := getBalancedAllocationPreScoreState(state)
|
||||
if err != nil {
|
||||
s = &balancedAllocationPreScoreState{podRequests: ba.calculatePodResourceRequestList(pod, ba.resources)}
|
||||
}
|
||||
|
||||
// ba.score favors nodes with balanced resource usage rate.
|
||||
// It calculates the standard deviation for those resources and prioritizes the node based on how close the usage of those resources is to each other.
|
||||
// Detail: score = (1 - std) * MaxNodeScore, where std is calculated by the root square of Σ((fraction(i)-mean)^2)/len(resources)
|
||||
// The algorithm is partly inspired by:
|
||||
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
|
||||
return ba.score(ctx, pod, nodeInfo, s.podRequests)
|
||||
}
|
||||
|
||||
// ScoreExtensions of the Score plugin.
|
||||
func (ba *BalancedAllocation) ScoreExtensions() framework.ScoreExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewBalancedAllocation initializes a new plugin and returns it.
|
||||
func NewBalancedAllocation(_ context.Context, baArgs runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
args, ok := baArgs.(*config.NodeResourcesBalancedAllocationArgs)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("want args to be of type NodeResourcesBalancedAllocationArgs, got %T", baArgs)
|
||||
}
|
||||
|
||||
if err := validation.ValidateNodeResourcesBalancedAllocationArgs(nil, args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &BalancedAllocation{
|
||||
handle: h,
|
||||
resourceAllocationScorer: resourceAllocationScorer{
|
||||
Name: BalancedAllocationName,
|
||||
scorer: balancedResourceScorer,
|
||||
useRequested: true,
|
||||
resources: args.Resources,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func balancedResourceScorer(requested, allocable []int64) int64 {
|
||||
var resourceToFractions []float64
|
||||
var totalFraction float64
|
||||
for i := range requested {
|
||||
if allocable[i] == 0 {
|
||||
continue
|
||||
}
|
||||
fraction := float64(requested[i]) / float64(allocable[i])
|
||||
if fraction > 1 {
|
||||
fraction = 1
|
||||
}
|
||||
totalFraction += fraction
|
||||
resourceToFractions = append(resourceToFractions, fraction)
|
||||
}
|
||||
|
||||
std := 0.0
|
||||
|
||||
// For most cases, resources are limited to cpu and memory, the std could be simplified to std := (fraction1-fraction2)/2
|
||||
// len(fractions) > 2: calculate std based on the well-known formula - root square of Σ((fraction(i)-mean)^2)/len(fractions)
|
||||
// Otherwise, set the std to zero is enough.
|
||||
if len(resourceToFractions) == 2 {
|
||||
std = math.Abs((resourceToFractions[0] - resourceToFractions[1]) / 2)
|
||||
|
||||
} else if len(resourceToFractions) > 2 {
|
||||
mean := totalFraction / float64(len(resourceToFractions))
|
||||
var sum float64
|
||||
for _, fraction := range resourceToFractions {
|
||||
sum = sum + (fraction-mean)*(fraction-mean)
|
||||
}
|
||||
std = math.Sqrt(sum / float64(len(resourceToFractions)))
|
||||
}
|
||||
|
||||
// STD (standard deviation) is always a positive value. 1-deviation lets the score to be higher for node which has least deviation and
|
||||
// multiplying it with `MaxNodeScore` provides the scaling factor needed.
|
||||
return int64((1 - std) * float64(framework.MaxNodeScore))
|
||||
}
|
596
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/fit.go
generated
vendored
Normal file
596
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/fit.go
generated
vendored
Normal file
@ -0,0 +1,596 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package noderesources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/component-helpers/resource"
|
||||
"k8s.io/klog/v2"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
var _ framework.PreFilterPlugin = &Fit{}
|
||||
var _ framework.FilterPlugin = &Fit{}
|
||||
var _ framework.EnqueueExtensions = &Fit{}
|
||||
var _ framework.PreScorePlugin = &Fit{}
|
||||
var _ framework.ScorePlugin = &Fit{}
|
||||
|
||||
const (
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
Name = names.NodeResourcesFit
|
||||
|
||||
// preFilterStateKey is the key in CycleState to NodeResourcesFit pre-computed data.
|
||||
// Using the name of the plugin will likely help us avoid collisions with other plugins.
|
||||
preFilterStateKey = "PreFilter" + Name
|
||||
|
||||
// preScoreStateKey is the key in CycleState to NodeResourcesFit pre-computed data for Scoring.
|
||||
preScoreStateKey = "PreScore" + Name
|
||||
)
|
||||
|
||||
// nodeResourceStrategyTypeMap maps strategy to scorer implementation
|
||||
var nodeResourceStrategyTypeMap = map[config.ScoringStrategyType]scorer{
|
||||
config.LeastAllocated: func(args *config.NodeResourcesFitArgs) *resourceAllocationScorer {
|
||||
resources := args.ScoringStrategy.Resources
|
||||
return &resourceAllocationScorer{
|
||||
Name: string(config.LeastAllocated),
|
||||
scorer: leastResourceScorer(resources),
|
||||
resources: resources,
|
||||
}
|
||||
},
|
||||
config.MostAllocated: func(args *config.NodeResourcesFitArgs) *resourceAllocationScorer {
|
||||
resources := args.ScoringStrategy.Resources
|
||||
return &resourceAllocationScorer{
|
||||
Name: string(config.MostAllocated),
|
||||
scorer: mostResourceScorer(resources),
|
||||
resources: resources,
|
||||
}
|
||||
},
|
||||
config.RequestedToCapacityRatio: func(args *config.NodeResourcesFitArgs) *resourceAllocationScorer {
|
||||
resources := args.ScoringStrategy.Resources
|
||||
return &resourceAllocationScorer{
|
||||
Name: string(config.RequestedToCapacityRatio),
|
||||
scorer: requestedToCapacityRatioScorer(resources, args.ScoringStrategy.RequestedToCapacityRatio.Shape),
|
||||
resources: resources,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// Fit is a plugin that checks if a node has sufficient resources.
|
||||
type Fit struct {
|
||||
ignoredResources sets.Set[string]
|
||||
ignoredResourceGroups sets.Set[string]
|
||||
enableInPlacePodVerticalScaling bool
|
||||
enableSidecarContainers bool
|
||||
enableSchedulingQueueHint bool
|
||||
enablePodLevelResources bool
|
||||
handle framework.Handle
|
||||
resourceAllocationScorer
|
||||
}
|
||||
|
||||
// ScoreExtensions of the Score plugin.
|
||||
func (f *Fit) ScoreExtensions() framework.ScoreExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
// preFilterState computed at PreFilter and used at Filter.
|
||||
type preFilterState struct {
|
||||
framework.Resource
|
||||
}
|
||||
|
||||
// Clone the prefilter state.
|
||||
func (s *preFilterState) Clone() framework.StateData {
|
||||
return s
|
||||
}
|
||||
|
||||
// preScoreState computed at PreScore and used at Score.
|
||||
type preScoreState struct {
|
||||
// podRequests have the same order as the resources defined in NodeResourcesBalancedAllocationArgs.Resources,
|
||||
// same for other place we store a list like that.
|
||||
podRequests []int64
|
||||
}
|
||||
|
||||
// Clone implements the mandatory Clone interface. We don't really copy the data since
|
||||
// there is no need for that.
|
||||
func (s *preScoreState) Clone() framework.StateData {
|
||||
return s
|
||||
}
|
||||
|
||||
// PreScore calculates incoming pod's resource requests and writes them to the cycle state used.
|
||||
func (f *Fit) PreScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodes []*framework.NodeInfo) *framework.Status {
|
||||
state := &preScoreState{
|
||||
podRequests: f.calculatePodResourceRequestList(pod, f.resources),
|
||||
}
|
||||
cycleState.Write(preScoreStateKey, state)
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPreScoreState(cycleState *framework.CycleState) (*preScoreState, error) {
|
||||
c, err := cycleState.Read(preScoreStateKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading %q from cycleState: %w", preScoreStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(*preScoreState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid PreScore state, got type %T", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (f *Fit) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// NewFit initializes a new plugin and returns it.
|
||||
func NewFit(_ context.Context, plArgs runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
args, ok := plArgs.(*config.NodeResourcesFitArgs)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("want args to be of type NodeResourcesFitArgs, got %T", plArgs)
|
||||
}
|
||||
if err := validation.ValidateNodeResourcesFitArgs(nil, args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if args.ScoringStrategy == nil {
|
||||
return nil, fmt.Errorf("scoring strategy not specified")
|
||||
}
|
||||
|
||||
strategy := args.ScoringStrategy.Type
|
||||
scorePlugin, exists := nodeResourceStrategyTypeMap[strategy]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("scoring strategy %s is not supported", strategy)
|
||||
}
|
||||
|
||||
return &Fit{
|
||||
ignoredResources: sets.New(args.IgnoredResources...),
|
||||
ignoredResourceGroups: sets.New(args.IgnoredResourceGroups...),
|
||||
enableInPlacePodVerticalScaling: fts.EnableInPlacePodVerticalScaling,
|
||||
enableSidecarContainers: fts.EnableSidecarContainers,
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
handle: h,
|
||||
enablePodLevelResources: fts.EnablePodLevelResources,
|
||||
resourceAllocationScorer: *scorePlugin(args),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type ResourceRequestsOptions struct {
|
||||
EnablePodLevelResources bool
|
||||
}
|
||||
|
||||
// computePodResourceRequest returns a framework.Resource that covers the largest
|
||||
// width in each resource dimension. Because init-containers run sequentially, we collect
|
||||
// the max in each dimension iteratively. In contrast, we sum the resource vectors for
|
||||
// regular containers since they run simultaneously.
|
||||
//
|
||||
// # The resources defined for Overhead should be added to the calculated Resource request sum
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// Pod:
|
||||
//
|
||||
// InitContainers
|
||||
// IC1:
|
||||
// CPU: 2
|
||||
// Memory: 1G
|
||||
// IC2:
|
||||
// CPU: 2
|
||||
// Memory: 3G
|
||||
// Containers
|
||||
// C1:
|
||||
// CPU: 2
|
||||
// Memory: 1G
|
||||
// C2:
|
||||
// CPU: 1
|
||||
// Memory: 1G
|
||||
//
|
||||
// Result: CPU: 3, Memory: 3G
|
||||
// TODO(ndixita): modify computePodResourceRequest to accept opts of type
|
||||
// ResourceRequestOptions as the second parameter.
|
||||
func computePodResourceRequest(pod *v1.Pod, opts ResourceRequestsOptions) *preFilterState {
|
||||
// pod hasn't scheduled yet so we don't need to worry about InPlacePodVerticalScalingEnabled
|
||||
reqs := resource.PodRequests(pod, resource.PodResourcesOptions{
|
||||
// SkipPodLevelResources is set to false when PodLevelResources feature is enabled.
|
||||
SkipPodLevelResources: !opts.EnablePodLevelResources,
|
||||
})
|
||||
result := &preFilterState{}
|
||||
result.SetMaxResource(reqs)
|
||||
return result
|
||||
}
|
||||
|
||||
// PreFilter invoked at the prefilter extension point.
|
||||
func (f *Fit) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
if !f.enableSidecarContainers && hasRestartableInitContainer(pod) {
|
||||
// Scheduler will calculate resources usage for a Pod containing
|
||||
// restartable init containers that will be equal or more than kubelet will
|
||||
// require to run the Pod. So there will be no overbooking. However, to
|
||||
// avoid the inconsistency in resource calculation between the scheduler
|
||||
// and the older (before v1.28) kubelet, make the Pod unschedulable.
|
||||
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, "Pod has a restartable init container and the SidecarContainers feature is disabled")
|
||||
}
|
||||
cycleState.Write(preFilterStateKey, computePodResourceRequest(pod, ResourceRequestsOptions{EnablePodLevelResources: f.enablePodLevelResources}))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// PreFilterExtensions returns prefilter extensions, pod add and remove.
|
||||
func (f *Fit) PreFilterExtensions() framework.PreFilterExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error) {
|
||||
c, err := cycleState.Read(preFilterStateKey)
|
||||
if err != nil {
|
||||
// preFilterState doesn't exist, likely PreFilter wasn't invoked.
|
||||
return nil, fmt.Errorf("error reading %q from cycleState: %w", preFilterStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(*preFilterState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%+v convert to NodeResourcesFit.preFilterState error", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (f *Fit) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
podActionType := framework.Delete
|
||||
if f.enableInPlacePodVerticalScaling {
|
||||
// If InPlacePodVerticalScaling (KEP 1287) is enabled, then UpdatePodScaleDown event should be registered
|
||||
// for this plugin since a Pod update may free up resources that make other Pods schedulable.
|
||||
podActionType |= framework.UpdatePodScaleDown
|
||||
}
|
||||
|
||||
// A note about UpdateNodeTaint/UpdateNodeLabel event:
|
||||
// Ideally, it's supposed to register only Add | UpdateNodeAllocatable because the only resource update could change the node resource fit plugin's result.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
nodeActionType := framework.Add | framework.UpdateNodeAllocatable | framework.UpdateNodeTaint | framework.UpdateNodeLabel
|
||||
if f.enableSchedulingQueueHint {
|
||||
// preCheck is not used when QHint is enabled, and hence Update event isn't necessary.
|
||||
nodeActionType = framework.Add | framework.UpdateNodeAllocatable
|
||||
}
|
||||
|
||||
return []framework.ClusterEventWithHint{
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: podActionType}, QueueingHintFn: f.isSchedulableAfterPodEvent},
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}, QueueingHintFn: f.isSchedulableAfterNodeChange},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPodEvent is invoked whenever a pod deleted or scaled down. It checks whether
|
||||
// that change made a previously unschedulable pod schedulable.
|
||||
func (f *Fit) isSchedulableAfterPodEvent(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalPod, modifiedPod, err := schedutil.As[*v1.Pod](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
if modifiedPod == nil {
|
||||
if originalPod.Spec.NodeName == "" {
|
||||
logger.V(5).Info("the deleted pod was unscheduled and it wouldn't make the unscheduled pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(originalPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// any deletion event to a scheduled pod could make the unscheduled pod schedulable.
|
||||
logger.V(5).Info("another scheduled pod was deleted, and it may make the unscheduled pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(originalPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
if !f.enableInPlacePodVerticalScaling {
|
||||
// If InPlacePodVerticalScaling (KEP 1287) is disabled, the pod scale down event cannot free up any resources.
|
||||
logger.V(5).Info("another pod was modified, but InPlacePodVerticalScaling is disabled, so it doesn't make the unscheduled pod schedulable", "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
if !f.isSchedulableAfterPodScaleDown(pod, originalPod, modifiedPod) {
|
||||
if loggerV := logger.V(10); loggerV.Enabled() {
|
||||
// Log more information.
|
||||
loggerV.Info("pod got scaled down, but the modification isn't related to the resource requests of the target pod", "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod), "diff", cmp.Diff(originalPod, modifiedPod))
|
||||
} else {
|
||||
logger.V(5).Info("pod got scaled down, but the modification isn't related to the resource requests of the target pod", "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
}
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("another scheduled pod or the target pod itself got scaled down, and it may make the unscheduled pod schedulable", "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPodScaleDown checks whether the scale down event may make the target pod schedulable. Specifically:
|
||||
// - Returns true when the update event is for the target pod itself.
|
||||
// - Returns true when the update event shows a scheduled pod's resource request that the target pod also requests got reduced.
|
||||
func (f *Fit) isSchedulableAfterPodScaleDown(targetPod, originalPod, modifiedPod *v1.Pod) bool {
|
||||
if modifiedPod.UID == targetPod.UID {
|
||||
// If the scaling down event is for targetPod, it would make targetPod schedulable.
|
||||
return true
|
||||
}
|
||||
|
||||
if modifiedPod.Spec.NodeName == "" {
|
||||
// If the update event is for a unscheduled Pod,
|
||||
// it wouldn't make targetPod schedulable.
|
||||
return false
|
||||
}
|
||||
|
||||
// the other pod was scheduled, so modification or deletion may free up some resources.
|
||||
originalMaxResourceReq, modifiedMaxResourceReq := &framework.Resource{}, &framework.Resource{}
|
||||
originalMaxResourceReq.SetMaxResource(resource.PodRequests(originalPod, resource.PodResourcesOptions{UseStatusResources: f.enableInPlacePodVerticalScaling}))
|
||||
modifiedMaxResourceReq.SetMaxResource(resource.PodRequests(modifiedPod, resource.PodResourcesOptions{UseStatusResources: f.enableInPlacePodVerticalScaling}))
|
||||
|
||||
// check whether the resource request of the modified pod is less than the original pod.
|
||||
podRequests := resource.PodRequests(targetPod, resource.PodResourcesOptions{UseStatusResources: f.enableInPlacePodVerticalScaling})
|
||||
for rName, rValue := range podRequests {
|
||||
if rValue.IsZero() {
|
||||
// We only care about the resources requested by the pod we are trying to schedule.
|
||||
continue
|
||||
}
|
||||
switch rName {
|
||||
case v1.ResourceCPU:
|
||||
if originalMaxResourceReq.MilliCPU > modifiedMaxResourceReq.MilliCPU {
|
||||
return true
|
||||
}
|
||||
case v1.ResourceMemory:
|
||||
if originalMaxResourceReq.Memory > modifiedMaxResourceReq.Memory {
|
||||
return true
|
||||
}
|
||||
case v1.ResourceEphemeralStorage:
|
||||
if originalMaxResourceReq.EphemeralStorage > modifiedMaxResourceReq.EphemeralStorage {
|
||||
return true
|
||||
}
|
||||
default:
|
||||
if schedutil.IsScalarResourceName(rName) && originalMaxResourceReq.ScalarResources[rName] > modifiedMaxResourceReq.ScalarResources[rName] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isSchedulableAfterNodeChange is invoked whenever a node added or changed. It checks whether
|
||||
// that change could make a previously unschedulable pod schedulable.
|
||||
func (f *Fit) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalNode, modifiedNode, err := schedutil.As[*v1.Node](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
// Leaving in the queue, since the pod won't fit into the modified node anyway.
|
||||
if !isFit(pod, modifiedNode, ResourceRequestsOptions{EnablePodLevelResources: f.enablePodLevelResources}) {
|
||||
logger.V(5).Info("node was created or updated, but it doesn't have enough resource(s) to accommodate this pod", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
// The pod will fit, so since it's add, unblock scheduling.
|
||||
if originalNode == nil {
|
||||
logger.V(5).Info("node was added and it might fit the pod's resource requests", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
// The pod will fit, but since there was no increase in available resources, the change won't make the pod schedulable.
|
||||
if !haveAnyRequestedResourcesIncreased(pod, originalNode, modifiedNode, ResourceRequestsOptions{EnablePodLevelResources: f.enablePodLevelResources}) {
|
||||
logger.V(5).Info("node was updated, but haven't changed the pod's resource requestments fit assessment", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("node was updated, and may now fit the pod's resource requests", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// haveAnyRequestedResourcesIncreased returns true if any of the resources requested by the pod have increased or if allowed pod number increased.
|
||||
func haveAnyRequestedResourcesIncreased(pod *v1.Pod, originalNode, modifiedNode *v1.Node, opts ResourceRequestsOptions) bool {
|
||||
podRequest := computePodResourceRequest(pod, opts)
|
||||
originalNodeInfo := framework.NewNodeInfo()
|
||||
originalNodeInfo.SetNode(originalNode)
|
||||
modifiedNodeInfo := framework.NewNodeInfo()
|
||||
modifiedNodeInfo.SetNode(modifiedNode)
|
||||
|
||||
if modifiedNodeInfo.Allocatable.AllowedPodNumber > originalNodeInfo.Allocatable.AllowedPodNumber {
|
||||
return true
|
||||
}
|
||||
|
||||
if podRequest.MilliCPU == 0 &&
|
||||
podRequest.Memory == 0 &&
|
||||
podRequest.EphemeralStorage == 0 &&
|
||||
len(podRequest.ScalarResources) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if (podRequest.MilliCPU > 0 && modifiedNodeInfo.Allocatable.MilliCPU > originalNodeInfo.Allocatable.MilliCPU) ||
|
||||
(podRequest.Memory > 0 && modifiedNodeInfo.Allocatable.Memory > originalNodeInfo.Allocatable.Memory) ||
|
||||
(podRequest.EphemeralStorage > 0 && modifiedNodeInfo.Allocatable.EphemeralStorage > originalNodeInfo.Allocatable.EphemeralStorage) {
|
||||
return true
|
||||
}
|
||||
|
||||
for rName, rQuant := range podRequest.ScalarResources {
|
||||
// Skip in case request quantity is zero
|
||||
if rQuant == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if modifiedNodeInfo.Allocatable.ScalarResources[rName] > originalNodeInfo.Allocatable.ScalarResources[rName] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isFit checks if the pod fits the node. If the node is nil, it returns false.
|
||||
// It constructs a fake NodeInfo object for the node and checks if the pod fits the node.
|
||||
func isFit(pod *v1.Pod, node *v1.Node, opts ResourceRequestsOptions) bool {
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
nodeInfo := framework.NewNodeInfo()
|
||||
nodeInfo.SetNode(node)
|
||||
return len(Fits(pod, nodeInfo, opts)) == 0
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
// Checks if a node has sufficient resources, such as cpu, memory, gpu, opaque int resources etc to run a pod.
|
||||
// It returns a list of insufficient resources, if empty, then the node has all the resources requested by the pod.
|
||||
func (f *Fit) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
s, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
insufficientResources := fitsRequest(s, nodeInfo, f.ignoredResources, f.ignoredResourceGroups)
|
||||
|
||||
if len(insufficientResources) != 0 {
|
||||
// We will keep all failure reasons.
|
||||
failureReasons := make([]string, 0, len(insufficientResources))
|
||||
for i := range insufficientResources {
|
||||
failureReasons = append(failureReasons, insufficientResources[i].Reason)
|
||||
}
|
||||
return framework.NewStatus(framework.Unschedulable, failureReasons...)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasRestartableInitContainer(pod *v1.Pod) bool {
|
||||
for _, c := range pod.Spec.InitContainers {
|
||||
if c.RestartPolicy != nil && *c.RestartPolicy == v1.ContainerRestartPolicyAlways {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// InsufficientResource describes what kind of resource limit is hit and caused the pod to not fit the node.
|
||||
type InsufficientResource struct {
|
||||
ResourceName v1.ResourceName
|
||||
// We explicitly have a parameter for reason to avoid formatting a message on the fly
|
||||
// for common resources, which is expensive for cluster autoscaler simulations.
|
||||
Reason string
|
||||
Requested int64
|
||||
Used int64
|
||||
Capacity int64
|
||||
}
|
||||
|
||||
// Fits checks if node have enough resources to host the pod.
|
||||
func Fits(pod *v1.Pod, nodeInfo *framework.NodeInfo, opts ResourceRequestsOptions) []InsufficientResource {
|
||||
return fitsRequest(computePodResourceRequest(pod, opts), nodeInfo, nil, nil)
|
||||
}
|
||||
|
||||
func fitsRequest(podRequest *preFilterState, nodeInfo *framework.NodeInfo, ignoredExtendedResources, ignoredResourceGroups sets.Set[string]) []InsufficientResource {
|
||||
insufficientResources := make([]InsufficientResource, 0, 4)
|
||||
|
||||
allowedPodNumber := nodeInfo.Allocatable.AllowedPodNumber
|
||||
if len(nodeInfo.Pods)+1 > allowedPodNumber {
|
||||
insufficientResources = append(insufficientResources, InsufficientResource{
|
||||
ResourceName: v1.ResourcePods,
|
||||
Reason: "Too many pods",
|
||||
Requested: 1,
|
||||
Used: int64(len(nodeInfo.Pods)),
|
||||
Capacity: int64(allowedPodNumber),
|
||||
})
|
||||
}
|
||||
|
||||
if podRequest.MilliCPU == 0 &&
|
||||
podRequest.Memory == 0 &&
|
||||
podRequest.EphemeralStorage == 0 &&
|
||||
len(podRequest.ScalarResources) == 0 {
|
||||
return insufficientResources
|
||||
}
|
||||
|
||||
if podRequest.MilliCPU > 0 && podRequest.MilliCPU > (nodeInfo.Allocatable.MilliCPU-nodeInfo.Requested.MilliCPU) {
|
||||
insufficientResources = append(insufficientResources, InsufficientResource{
|
||||
ResourceName: v1.ResourceCPU,
|
||||
Reason: "Insufficient cpu",
|
||||
Requested: podRequest.MilliCPU,
|
||||
Used: nodeInfo.Requested.MilliCPU,
|
||||
Capacity: nodeInfo.Allocatable.MilliCPU,
|
||||
})
|
||||
}
|
||||
if podRequest.Memory > 0 && podRequest.Memory > (nodeInfo.Allocatable.Memory-nodeInfo.Requested.Memory) {
|
||||
insufficientResources = append(insufficientResources, InsufficientResource{
|
||||
ResourceName: v1.ResourceMemory,
|
||||
Reason: "Insufficient memory",
|
||||
Requested: podRequest.Memory,
|
||||
Used: nodeInfo.Requested.Memory,
|
||||
Capacity: nodeInfo.Allocatable.Memory,
|
||||
})
|
||||
}
|
||||
if podRequest.EphemeralStorage > 0 &&
|
||||
podRequest.EphemeralStorage > (nodeInfo.Allocatable.EphemeralStorage-nodeInfo.Requested.EphemeralStorage) {
|
||||
insufficientResources = append(insufficientResources, InsufficientResource{
|
||||
ResourceName: v1.ResourceEphemeralStorage,
|
||||
Reason: "Insufficient ephemeral-storage",
|
||||
Requested: podRequest.EphemeralStorage,
|
||||
Used: nodeInfo.Requested.EphemeralStorage,
|
||||
Capacity: nodeInfo.Allocatable.EphemeralStorage,
|
||||
})
|
||||
}
|
||||
|
||||
for rName, rQuant := range podRequest.ScalarResources {
|
||||
// Skip in case request quantity is zero
|
||||
if rQuant == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if v1helper.IsExtendedResourceName(rName) {
|
||||
// If this resource is one of the extended resources that should be ignored, we will skip checking it.
|
||||
// rName is guaranteed to have a slash due to API validation.
|
||||
var rNamePrefix string
|
||||
if ignoredResourceGroups.Len() > 0 {
|
||||
rNamePrefix = strings.Split(string(rName), "/")[0]
|
||||
}
|
||||
if ignoredExtendedResources.Has(string(rName)) || ignoredResourceGroups.Has(rNamePrefix) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if rQuant > (nodeInfo.Allocatable.ScalarResources[rName] - nodeInfo.Requested.ScalarResources[rName]) {
|
||||
insufficientResources = append(insufficientResources, InsufficientResource{
|
||||
ResourceName: rName,
|
||||
Reason: fmt.Sprintf("Insufficient %v", rName),
|
||||
Requested: podRequest.ScalarResources[rName],
|
||||
Used: nodeInfo.Requested.ScalarResources[rName],
|
||||
Capacity: nodeInfo.Allocatable.ScalarResources[rName],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return insufficientResources
|
||||
}
|
||||
|
||||
// Score invoked at the Score extension point.
|
||||
func (f *Fit) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
|
||||
nodeInfo, err := f.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(fmt.Errorf("getting node %q from Snapshot: %w", nodeName, err))
|
||||
}
|
||||
|
||||
s, err := getPreScoreState(state)
|
||||
if err != nil {
|
||||
s = &preScoreState{
|
||||
podRequests: f.calculatePodResourceRequestList(pod, f.resources),
|
||||
}
|
||||
}
|
||||
|
||||
return f.score(ctx, pod, nodeInfo, s.podRequests)
|
||||
}
|
61
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/least_allocated.go
generated
vendored
Normal file
61
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/least_allocated.go
generated
vendored
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package noderesources
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// leastResourceScorer favors nodes with fewer requested resources.
|
||||
// It calculates the percentage of memory, CPU and other resources requested by pods scheduled on the node, and
|
||||
// prioritizes based on the minimum of the average of the fraction of requested to capacity.
|
||||
//
|
||||
// Details:
|
||||
// (cpu((capacity-requested)*MaxNodeScore*cpuWeight/capacity) + memory((capacity-requested)*MaxNodeScore*memoryWeight/capacity) + ...)/weightSum
|
||||
func leastResourceScorer(resources []config.ResourceSpec) func([]int64, []int64) int64 {
|
||||
return func(requested, allocable []int64) int64 {
|
||||
var nodeScore, weightSum int64
|
||||
for i := range requested {
|
||||
if allocable[i] == 0 {
|
||||
continue
|
||||
}
|
||||
weight := resources[i].Weight
|
||||
resourceScore := leastRequestedScore(requested[i], allocable[i])
|
||||
nodeScore += resourceScore * weight
|
||||
weightSum += weight
|
||||
}
|
||||
if weightSum == 0 {
|
||||
return 0
|
||||
}
|
||||
return nodeScore / weightSum
|
||||
}
|
||||
}
|
||||
|
||||
// The unused capacity is calculated on a scale of 0-MaxNodeScore
|
||||
// 0 being the lowest priority and `MaxNodeScore` being the highest.
|
||||
// The more unused resources the higher the score is.
|
||||
func leastRequestedScore(requested, capacity int64) int64 {
|
||||
if capacity == 0 {
|
||||
return 0
|
||||
}
|
||||
if requested > capacity {
|
||||
return 0
|
||||
}
|
||||
|
||||
return ((capacity - requested) * framework.MaxNodeScore) / capacity
|
||||
}
|
65
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/most_allocated.go
generated
vendored
Normal file
65
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/most_allocated.go
generated
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package noderesources
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// mostResourceScorer favors nodes with most requested resources.
|
||||
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
|
||||
// based on the maximum of the average of the fraction of requested to capacity.
|
||||
//
|
||||
// Details:
|
||||
// (cpu(MaxNodeScore * requested * cpuWeight / capacity) + memory(MaxNodeScore * requested * memoryWeight / capacity) + ...) / weightSum
|
||||
func mostResourceScorer(resources []config.ResourceSpec) func(requested, allocable []int64) int64 {
|
||||
return func(requested, allocable []int64) int64 {
|
||||
var nodeScore, weightSum int64
|
||||
for i := range requested {
|
||||
if allocable[i] == 0 {
|
||||
continue
|
||||
}
|
||||
weight := resources[i].Weight
|
||||
resourceScore := mostRequestedScore(requested[i], allocable[i])
|
||||
nodeScore += resourceScore * weight
|
||||
weightSum += weight
|
||||
}
|
||||
if weightSum == 0 {
|
||||
return 0
|
||||
}
|
||||
return nodeScore / weightSum
|
||||
}
|
||||
}
|
||||
|
||||
// The used capacity is calculated on a scale of 0-MaxNodeScore (MaxNodeScore is
|
||||
// constant with value set to 100).
|
||||
// 0 being the lowest priority and 100 being the highest.
|
||||
// The more resources are used the higher the score is. This function
|
||||
// is almost a reversed version of noderesources.leastRequestedScore.
|
||||
func mostRequestedScore(requested, capacity int64) int64 {
|
||||
if capacity == 0 {
|
||||
return 0
|
||||
}
|
||||
if requested > capacity {
|
||||
// `requested` might be greater than `capacity` because pods with no
|
||||
// requests get minimum values.
|
||||
requested = capacity
|
||||
}
|
||||
|
||||
return (requested * framework.MaxNodeScore) / capacity
|
||||
}
|
73
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/requested_to_capacity_ratio.go
generated
vendored
Normal file
73
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/requested_to_capacity_ratio.go
generated
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package noderesources
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
|
||||
)
|
||||
|
||||
const maxUtilization = 100
|
||||
|
||||
// buildRequestedToCapacityRatioScorerFunction allows users to apply bin packing
|
||||
// on core resources like CPU, Memory as well as extended resources like accelerators.
|
||||
func buildRequestedToCapacityRatioScorerFunction(scoringFunctionShape helper.FunctionShape, resources []config.ResourceSpec) func([]int64, []int64) int64 {
|
||||
rawScoringFunction := helper.BuildBrokenLinearFunction(scoringFunctionShape)
|
||||
resourceScoringFunction := func(requested, capacity int64) int64 {
|
||||
if capacity == 0 || requested > capacity {
|
||||
return rawScoringFunction(maxUtilization)
|
||||
}
|
||||
|
||||
return rawScoringFunction(requested * maxUtilization / capacity)
|
||||
}
|
||||
return func(requested, allocable []int64) int64 {
|
||||
var nodeScore, weightSum int64
|
||||
for i := range requested {
|
||||
if allocable[i] == 0 {
|
||||
continue
|
||||
}
|
||||
weight := resources[i].Weight
|
||||
resourceScore := resourceScoringFunction(requested[i], allocable[i])
|
||||
if resourceScore > 0 {
|
||||
nodeScore += resourceScore * weight
|
||||
weightSum += weight
|
||||
}
|
||||
}
|
||||
if weightSum == 0 {
|
||||
return 0
|
||||
}
|
||||
return int64(math.Round(float64(nodeScore) / float64(weightSum)))
|
||||
}
|
||||
}
|
||||
|
||||
func requestedToCapacityRatioScorer(resources []config.ResourceSpec, shape []config.UtilizationShapePoint) func([]int64, []int64) int64 {
|
||||
shapes := make([]helper.FunctionShapePoint, 0, len(shape))
|
||||
for _, point := range shape {
|
||||
shapes = append(shapes, helper.FunctionShapePoint{
|
||||
Utilization: int64(point.Utilization),
|
||||
// MaxCustomPriorityScore may diverge from the max score used in the scheduler and defined by MaxNodeScore,
|
||||
// therefore we need to scale the score returned by requested to capacity ratio to the score range
|
||||
// used by the scheduler.
|
||||
Score: int64(point.Score) * (framework.MaxNodeScore / config.MaxCustomPriorityScore),
|
||||
})
|
||||
}
|
||||
|
||||
return buildRequestedToCapacityRatioScorerFunction(shapes, resources)
|
||||
}
|
148
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/resource_allocation.go
generated
vendored
Normal file
148
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/resource_allocation.go
generated
vendored
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package noderesources
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
resourcehelper "k8s.io/component-helpers/resource"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// scorer is decorator for resourceAllocationScorer
|
||||
type scorer func(args *config.NodeResourcesFitArgs) *resourceAllocationScorer
|
||||
|
||||
// resourceAllocationScorer contains information to calculate resource allocation score.
|
||||
type resourceAllocationScorer struct {
|
||||
Name string
|
||||
// used to decide whether to use Requested or NonZeroRequested for
|
||||
// cpu and memory.
|
||||
useRequested bool
|
||||
scorer func(requested, allocable []int64) int64
|
||||
resources []config.ResourceSpec
|
||||
}
|
||||
|
||||
// score will use `scorer` function to calculate the score.
|
||||
func (r *resourceAllocationScorer) score(
|
||||
ctx context.Context,
|
||||
pod *v1.Pod,
|
||||
nodeInfo *framework.NodeInfo,
|
||||
podRequests []int64) (int64, *framework.Status) {
|
||||
logger := klog.FromContext(ctx)
|
||||
node := nodeInfo.Node()
|
||||
|
||||
// resources not set, nothing scheduled,
|
||||
if len(r.resources) == 0 {
|
||||
return 0, framework.NewStatus(framework.Error, "resources not found")
|
||||
}
|
||||
|
||||
requested := make([]int64, len(r.resources))
|
||||
allocatable := make([]int64, len(r.resources))
|
||||
for i := range r.resources {
|
||||
alloc, req := r.calculateResourceAllocatableRequest(logger, nodeInfo, v1.ResourceName(r.resources[i].Name), podRequests[i])
|
||||
// Only fill the extended resource entry when it's non-zero.
|
||||
if alloc == 0 {
|
||||
continue
|
||||
}
|
||||
allocatable[i] = alloc
|
||||
requested[i] = req
|
||||
}
|
||||
|
||||
score := r.scorer(requested, allocatable)
|
||||
|
||||
if loggerV := logger.V(10); loggerV.Enabled() { // Serializing these maps is costly.
|
||||
loggerV.Info("Listed internal info for allocatable resources, requested resources and score", "pod",
|
||||
klog.KObj(pod), "node", klog.KObj(node), "resourceAllocationScorer", r.Name,
|
||||
"allocatableResource", allocatable, "requestedResource", requested, "resourceScore", score,
|
||||
)
|
||||
}
|
||||
|
||||
return score, nil
|
||||
}
|
||||
|
||||
// calculateResourceAllocatableRequest returns 2 parameters:
|
||||
// - 1st param: quantity of allocatable resource on the node.
|
||||
// - 2nd param: aggregated quantity of requested resource on the node.
|
||||
// Note: if it's an extended resource, and the pod doesn't request it, (0, 0) is returned.
|
||||
func (r *resourceAllocationScorer) calculateResourceAllocatableRequest(logger klog.Logger, nodeInfo *framework.NodeInfo, resource v1.ResourceName, podRequest int64) (int64, int64) {
|
||||
requested := nodeInfo.NonZeroRequested
|
||||
if r.useRequested {
|
||||
requested = nodeInfo.Requested
|
||||
}
|
||||
|
||||
// If it's an extended resource, and the pod doesn't request it. We return (0, 0)
|
||||
// as an implication to bypass scoring on this resource.
|
||||
if podRequest == 0 && schedutil.IsScalarResourceName(resource) {
|
||||
return 0, 0
|
||||
}
|
||||
switch resource {
|
||||
case v1.ResourceCPU:
|
||||
return nodeInfo.Allocatable.MilliCPU, (requested.MilliCPU + podRequest)
|
||||
case v1.ResourceMemory:
|
||||
return nodeInfo.Allocatable.Memory, (requested.Memory + podRequest)
|
||||
case v1.ResourceEphemeralStorage:
|
||||
return nodeInfo.Allocatable.EphemeralStorage, (nodeInfo.Requested.EphemeralStorage + podRequest)
|
||||
default:
|
||||
if _, exists := nodeInfo.Allocatable.ScalarResources[resource]; exists {
|
||||
return nodeInfo.Allocatable.ScalarResources[resource], (nodeInfo.Requested.ScalarResources[resource] + podRequest)
|
||||
}
|
||||
}
|
||||
logger.V(10).Info("Requested resource is omitted for node score calculation", "resourceName", resource)
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
// calculatePodResourceRequest returns the total non-zero requests. If Overhead is defined for the pod
|
||||
// the Overhead is added to the result.
|
||||
func (r *resourceAllocationScorer) calculatePodResourceRequest(pod *v1.Pod, resourceName v1.ResourceName) int64 {
|
||||
|
||||
opts := resourcehelper.PodResourcesOptions{
|
||||
UseStatusResources: utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling),
|
||||
// SkipPodLevelResources is set to false when PodLevelResources feature is enabled.
|
||||
SkipPodLevelResources: !utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources),
|
||||
}
|
||||
|
||||
if !r.useRequested {
|
||||
opts.NonMissingContainerRequests = v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(schedutil.DefaultMilliCPURequest, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(schedutil.DefaultMemoryRequest, resource.DecimalSI),
|
||||
}
|
||||
}
|
||||
|
||||
requests := resourcehelper.PodRequests(pod, opts)
|
||||
|
||||
quantity := requests[resourceName]
|
||||
if resourceName == v1.ResourceCPU {
|
||||
return quantity.MilliValue()
|
||||
}
|
||||
return quantity.Value()
|
||||
}
|
||||
|
||||
func (r *resourceAllocationScorer) calculatePodResourceRequestList(pod *v1.Pod, resources []config.ResourceSpec) []int64 {
|
||||
podRequests := make([]int64, len(resources))
|
||||
for i := range resources {
|
||||
podRequests[i] = r.calculatePodResourceRequest(pod, v1.ResourceName(resources[i].Name))
|
||||
}
|
||||
return podRequests
|
||||
}
|
57
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/test_util.go
generated
vendored
Normal file
57
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources/test_util.go
generated
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package noderesources
|
||||
|
||||
import (
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/validation/field"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
)
|
||||
|
||||
var (
|
||||
ignoreBadValueDetail = cmpopts.IgnoreFields(field.Error{}, "BadValue", "Detail")
|
||||
defaultResources = []config.ResourceSpec{
|
||||
{Name: string(v1.ResourceCPU), Weight: 1},
|
||||
{Name: string(v1.ResourceMemory), Weight: 1},
|
||||
}
|
||||
extendedRes = "abc.com/xyz"
|
||||
extendedResourceSet = []config.ResourceSpec{
|
||||
{Name: string(v1.ResourceCPU), Weight: 1},
|
||||
{Name: string(v1.ResourceMemory), Weight: 1},
|
||||
{Name: extendedRes, Weight: 1},
|
||||
}
|
||||
)
|
||||
|
||||
func makeNode(node string, milliCPU, memory int64, extendedResource map[string]int64) *v1.Node {
|
||||
resourceList := make(map[v1.ResourceName]resource.Quantity)
|
||||
for res, quantity := range extendedResource {
|
||||
resourceList[v1.ResourceName(res)] = *resource.NewQuantity(quantity, resource.DecimalSI)
|
||||
}
|
||||
resourceList[v1.ResourceCPU] = *resource.NewMilliQuantity(milliCPU, resource.DecimalSI)
|
||||
resourceList[v1.ResourceMemory] = *resource.NewQuantity(memory, resource.BinarySI)
|
||||
return &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{Name: node},
|
||||
Status: v1.NodeStatus{
|
||||
Capacity: resourceList,
|
||||
Allocatable: resourceList,
|
||||
},
|
||||
}
|
||||
}
|
154
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeunschedulable/node_unschedulable.go
generated
vendored
Normal file
154
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeunschedulable/node_unschedulable.go
generated
vendored
Normal file
@ -0,0 +1,154 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodeunschedulable
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
v1helper "k8s.io/component-helpers/scheduling/corev1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// NodeUnschedulable plugin filters nodes that set node.Spec.Unschedulable=true unless
|
||||
// the pod tolerates {key=node.kubernetes.io/unschedulable, effect:NoSchedule} taint.
|
||||
type NodeUnschedulable struct {
|
||||
enableSchedulingQueueHint bool
|
||||
}
|
||||
|
||||
var _ framework.FilterPlugin = &NodeUnschedulable{}
|
||||
var _ framework.EnqueueExtensions = &NodeUnschedulable{}
|
||||
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
const Name = names.NodeUnschedulable
|
||||
|
||||
const (
|
||||
// ErrReasonUnknownCondition is used for NodeUnknownCondition predicate error.
|
||||
ErrReasonUnknownCondition = "node(s) had unknown conditions"
|
||||
// ErrReasonUnschedulable is used for NodeUnschedulable predicate error.
|
||||
ErrReasonUnschedulable = "node(s) were unschedulable"
|
||||
)
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *NodeUnschedulable) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
if !pl.enableSchedulingQueueHint {
|
||||
return []framework.ClusterEventWithHint{
|
||||
// A note about UpdateNodeLabel event:
|
||||
// Ideally, it's supposed to register only Add | UpdateNodeTaint because UpdateNodeLabel will never change the result from this plugin.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint | framework.UpdateNodeLabel}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||
}, nil
|
||||
}
|
||||
|
||||
return []framework.ClusterEventWithHint{
|
||||
// When QueueingHint is enabled, we don't use preCheck and we don't need to register UpdateNodeLabel event.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||
// When the QueueingHint feature is enabled,
|
||||
// the scheduling queue uses Pod/Update Queueing Hint
|
||||
// to determine whether a Pod's update makes the Pod schedulable or not.
|
||||
// https://github.com/kubernetes/kubernetes/pull/122234
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodTolerations}, QueueingHintFn: pl.isSchedulableAfterPodTolerationChange},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPodTolerationChange is invoked whenever a pod's toleration changed.
|
||||
func (pl *NodeUnschedulable) isSchedulableAfterPodTolerationChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
if pod.UID == modifiedPod.UID {
|
||||
// Note: we don't need to check oldPod tolerations the taint because:
|
||||
// - Taint can be added, but can't be modified nor removed.
|
||||
// - If the Pod already has the toleration, it shouldn't have rejected by this plugin in the first place.
|
||||
// Meaning, here this Pod has been rejected by this plugin, and hence it shouldn't have the toleration yet.
|
||||
if v1helper.TolerationsTolerateTaint(modifiedPod.Spec.Tolerations, &v1.Taint{
|
||||
Key: v1.TaintNodeUnschedulable,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
}) {
|
||||
// This update makes the pod tolerate the unschedulable taint.
|
||||
logger.V(5).Info("a new toleration is added for the unschedulable Pod, and it may make it schedulable", "pod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
logger.V(5).Info("a new toleration is added for the unschedulable Pod, but it's an unrelated toleration", "pod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("a new toleration is added for a Pod, but it's an unrelated Pod and wouldn't change the TaintToleration plugin's decision", "pod", klog.KObj(modifiedPod))
|
||||
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterNodeChange is invoked for all node events reported by
|
||||
// an informer. It checks whether that change made a previously unschedulable
|
||||
// pod schedulable.
|
||||
func (pl *NodeUnschedulable) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
// We queue this Pod when -
|
||||
// 1. the node is updated from unschedulable to schedulable.
|
||||
// 2. the node is added and is schedulable.
|
||||
if (originalNode != nil && originalNode.Spec.Unschedulable && !modifiedNode.Spec.Unschedulable) ||
|
||||
(originalNode == nil && !modifiedNode.Spec.Unschedulable) {
|
||||
logger.V(5).Info("node was created or updated, pod may be schedulable now", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("node was created or updated, but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *NodeUnschedulable) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
func (pl *NodeUnschedulable) Filter(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
node := nodeInfo.Node()
|
||||
|
||||
if !node.Spec.Unschedulable {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If pod tolerate unschedulable taint, it's also tolerate `node.Spec.Unschedulable`.
|
||||
podToleratesUnschedulable := v1helper.TolerationsTolerateTaint(pod.Spec.Tolerations, &v1.Taint{
|
||||
Key: v1.TaintNodeUnschedulable,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
if !podToleratesUnschedulable {
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonUnschedulable)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, _ runtime.Object, _ framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
return &NodeUnschedulable{enableSchedulingQueueHint: fts.EnableSchedulingQueueHint}, nil
|
||||
}
|
10
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodevolumelimits/OWNERS
generated
vendored
Normal file
10
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodevolumelimits/OWNERS
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-storage-approvers
|
||||
- cofyc
|
||||
reviewers:
|
||||
- sig-storage-reviewers
|
||||
- cofyc
|
||||
labels:
|
||||
- sig/storage
|
539
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodevolumelimits/csi.go
generated
vendored
Normal file
539
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodevolumelimits/csi.go
generated
vendored
Normal file
@ -0,0 +1,539 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodevolumelimits
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
storagev1 "k8s.io/api/storage/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/rand"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
storagelisters "k8s.io/client-go/listers/storage/v1"
|
||||
ephemeral "k8s.io/component-helpers/storage/ephemeral"
|
||||
storagehelpers "k8s.io/component-helpers/storage/volume"
|
||||
csitrans "k8s.io/csi-translation-lib"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
const (
|
||||
// ErrReasonMaxVolumeCountExceeded is used for MaxVolumeCount predicate error.
|
||||
ErrReasonMaxVolumeCountExceeded = "node(s) exceed max volume count"
|
||||
)
|
||||
|
||||
// InTreeToCSITranslator contains methods required to check migratable status
|
||||
// and perform translations from InTree PV's to CSI
|
||||
type InTreeToCSITranslator interface {
|
||||
IsPVMigratable(pv *v1.PersistentVolume) bool
|
||||
IsInlineMigratable(vol *v1.Volume) bool
|
||||
IsMigratableIntreePluginByName(inTreePluginName string) bool
|
||||
GetInTreePluginNameFromSpec(pv *v1.PersistentVolume, vol *v1.Volume) (string, error)
|
||||
GetCSINameFromInTreeName(pluginName string) (string, error)
|
||||
TranslateInTreePVToCSI(logger klog.Logger, pv *v1.PersistentVolume) (*v1.PersistentVolume, error)
|
||||
TranslateInTreeInlineVolumeToCSI(logger klog.Logger, volume *v1.Volume, podNamespace string) (*v1.PersistentVolume, error)
|
||||
}
|
||||
|
||||
// CSILimits is a plugin that checks node volume limits.
|
||||
type CSILimits struct {
|
||||
csiNodeLister storagelisters.CSINodeLister
|
||||
pvLister corelisters.PersistentVolumeLister
|
||||
pvcLister corelisters.PersistentVolumeClaimLister
|
||||
scLister storagelisters.StorageClassLister
|
||||
vaLister storagelisters.VolumeAttachmentLister
|
||||
|
||||
randomVolumeIDPrefix string
|
||||
|
||||
translator InTreeToCSITranslator
|
||||
}
|
||||
|
||||
var _ framework.PreFilterPlugin = &CSILimits{}
|
||||
var _ framework.FilterPlugin = &CSILimits{}
|
||||
var _ framework.EnqueueExtensions = &CSILimits{}
|
||||
|
||||
// CSIName is the name of the plugin used in the plugin registry and configurations.
|
||||
const CSIName = names.NodeVolumeLimits
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *CSILimits) Name() string {
|
||||
return CSIName
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod.
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *CSILimits) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
return []framework.ClusterEventWithHint{
|
||||
// We don't register any `QueueingHintFn` intentionally
|
||||
// because any new CSINode could make pods that were rejected by CSI volumes schedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.CSINode, ActionType: framework.Add}},
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Delete}, QueueingHintFn: pl.isSchedulableAfterPodDeleted},
|
||||
{Event: framework.ClusterEvent{Resource: framework.PersistentVolumeClaim, ActionType: framework.Add}, QueueingHintFn: pl.isSchedulableAfterPVCAdded},
|
||||
{Event: framework.ClusterEvent{Resource: framework.VolumeAttachment, ActionType: framework.Delete}},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (pl *CSILimits) isSchedulableAfterPodDeleted(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
deletedPod, _, err := util.As[*v1.Pod](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, fmt.Errorf("unexpected objects in isSchedulableAfterPodDeleted: %w", err)
|
||||
}
|
||||
|
||||
if len(deletedPod.Spec.Volumes) == 0 {
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
if deletedPod.Spec.NodeName == "" {
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
for _, vol := range deletedPod.Spec.Volumes {
|
||||
if vol.PersistentVolumeClaim != nil || vol.Ephemeral != nil || pl.translator.IsInlineMigratable(&vol) {
|
||||
return framework.Queue, nil
|
||||
}
|
||||
}
|
||||
|
||||
logger.V(5).Info("The deleted pod does not impact the scheduling of the unscheduled pod", "deletedPod", klog.KObj(pod), "pod", klog.KObj(deletedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
func (pl *CSILimits) isSchedulableAfterPVCAdded(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, addedPvc, err := util.As[*v1.PersistentVolumeClaim](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, fmt.Errorf("unexpected objects in isSchedulableAfterPVCAdded: %w", err)
|
||||
}
|
||||
|
||||
if addedPvc.Namespace != pod.Namespace {
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
for _, volumes := range pod.Spec.Volumes {
|
||||
var pvcName string
|
||||
switch {
|
||||
case volumes.PersistentVolumeClaim != nil:
|
||||
pvcName = volumes.PersistentVolumeClaim.ClaimName
|
||||
case volumes.Ephemeral != nil:
|
||||
pvcName = ephemeral.VolumeClaimName(pod, &volumes)
|
||||
default:
|
||||
// Volume is not using a PVC, ignore
|
||||
continue
|
||||
}
|
||||
|
||||
if pvcName == addedPvc.Name {
|
||||
logger.V(5).Info("PVC that is referred from the pod was created, which might make this pod schedulable", "pod", klog.KObj(pod), "PVC", klog.KObj(addedPvc))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
}
|
||||
|
||||
logger.V(5).Info("PVC irrelevant to the Pod was created, which doesn't make this pod schedulable", "pod", klog.KObj(pod), "PVC", klog.KObj(addedPvc))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// PreFilter invoked at the prefilter extension point
|
||||
//
|
||||
// If the pod haven't those types of volumes, we'll skip the Filter phase
|
||||
func (pl *CSILimits) PreFilter(ctx context.Context, _ *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
volumes := pod.Spec.Volumes
|
||||
for i := range volumes {
|
||||
vol := &volumes[i]
|
||||
if vol.PersistentVolumeClaim != nil || vol.Ephemeral != nil || pl.translator.IsInlineMigratable(vol) {
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
// PreFilterExtensions returns prefilter extensions, pod add and remove.
|
||||
func (pl *CSILimits) PreFilterExtensions() framework.PreFilterExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
func (pl *CSILimits) Filter(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
// If the new pod doesn't have any volume attached to it, the predicate will always be true
|
||||
if len(pod.Spec.Volumes) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
node := nodeInfo.Node()
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
csiNode, err := pl.csiNodeLister.Get(node.Name)
|
||||
if err != nil {
|
||||
// TODO: return the error once CSINode is created by default (2 releases)
|
||||
logger.V(5).Info("Could not get a CSINode object for the node", "node", klog.KObj(node), "err", err)
|
||||
}
|
||||
|
||||
// Count CSI volumes from the new pod
|
||||
newVolumes := make(map[string]string)
|
||||
if err := pl.filterAttachableVolumes(logger, pod, csiNode, true /* new pod */, newVolumes); err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
// PVC is not found. This Pod will never be schedulable until PVC is created.
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
|
||||
}
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
// If the pod doesn't have any new CSI volumes, the predicate will always be true
|
||||
if len(newVolumes) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If the node doesn't have volume limits, the predicate will always be true
|
||||
nodeVolumeLimits := getVolumeLimits(csiNode)
|
||||
if len(nodeVolumeLimits) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Count CSI volumes from existing pods
|
||||
attachedVolumes := make(map[string]string)
|
||||
for _, existingPod := range nodeInfo.Pods {
|
||||
if err := pl.filterAttachableVolumes(logger, existingPod.Pod, csiNode, false /* existing pod */, attachedVolumes); err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
}
|
||||
|
||||
attachedVolumeCount := map[string]int{}
|
||||
for volumeUniqueName, driverName := range attachedVolumes {
|
||||
// Don't count single volume used in multiple pods more than once
|
||||
delete(newVolumes, volumeUniqueName)
|
||||
attachedVolumeCount[driverName]++
|
||||
}
|
||||
|
||||
// Count CSI volumes from VolumeAttachments
|
||||
volumeAttachments, err := pl.getNodeVolumeAttachmentInfo(logger, node.Name)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
for volumeUniqueName, driverName := range volumeAttachments {
|
||||
// Avoid double-counting volumes already used by existing pods
|
||||
if _, exists := attachedVolumes[volumeUniqueName]; !exists {
|
||||
attachedVolumeCount[driverName]++
|
||||
}
|
||||
}
|
||||
|
||||
// Count the new volumes count per driver
|
||||
newVolumeCount := map[string]int{}
|
||||
for _, driverName := range newVolumes {
|
||||
newVolumeCount[driverName]++
|
||||
}
|
||||
|
||||
for driverName, count := range newVolumeCount {
|
||||
maxVolumeLimit, ok := nodeVolumeLimits[driverName]
|
||||
if ok {
|
||||
currentVolumeCount := attachedVolumeCount[driverName]
|
||||
logger.V(5).Info("Found plugin volume limits", "node", node.Name, "driverName", driverName,
|
||||
"maxLimits", maxVolumeLimit, "currentVolumeCount", currentVolumeCount, "newVolumeCount", count,
|
||||
"pod", klog.KObj(pod))
|
||||
if currentVolumeCount+count > int(maxVolumeLimit) {
|
||||
return framework.NewStatus(framework.Unschedulable, ErrReasonMaxVolumeCountExceeded)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// filterAttachableVolumes filters the attachable volumes from the pod and adds them to the result map.
|
||||
// The result map is a map of volumeUniqueName to driver name. The volumeUniqueName is a unique name for
|
||||
// the volume in the format of "driverName/volumeHandle". And driver name is the CSI driver name.
|
||||
func (pl *CSILimits) filterAttachableVolumes(
|
||||
logger klog.Logger, pod *v1.Pod, csiNode *storagev1.CSINode, newPod bool, result map[string]string) error {
|
||||
for _, vol := range pod.Spec.Volumes {
|
||||
pvcName := ""
|
||||
isEphemeral := false
|
||||
switch {
|
||||
case vol.PersistentVolumeClaim != nil:
|
||||
// Normal CSI volume can only be used through PVC
|
||||
pvcName = vol.PersistentVolumeClaim.ClaimName
|
||||
case vol.Ephemeral != nil:
|
||||
// Generic ephemeral inline volumes also use a PVC,
|
||||
// just with a computed name and certain ownership.
|
||||
// That is checked below once the pvc object is
|
||||
// retrieved.
|
||||
pvcName = ephemeral.VolumeClaimName(pod, &vol)
|
||||
isEphemeral = true
|
||||
default:
|
||||
// Inline Volume does not have PVC.
|
||||
// Need to check if CSI migration is enabled for this inline volume.
|
||||
// - If the volume is migratable and CSI migration is enabled, need to count it
|
||||
// as well.
|
||||
// - If the volume is not migratable, it will be count in non_csi filter.
|
||||
if err := pl.checkAttachableInlineVolume(logger, &vol, csiNode, pod, result); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if pvcName == "" {
|
||||
return fmt.Errorf("PersistentVolumeClaim had no name")
|
||||
}
|
||||
|
||||
pvc, err := pl.pvcLister.PersistentVolumeClaims(pod.Namespace).Get(pvcName)
|
||||
|
||||
if err != nil {
|
||||
if newPod {
|
||||
// The PVC is required to proceed with
|
||||
// scheduling of a new pod because it cannot
|
||||
// run without it. Bail out immediately.
|
||||
return fmt.Errorf("looking up PVC %s/%s: %w", pod.Namespace, pvcName, err)
|
||||
}
|
||||
// If the PVC is invalid, we don't count the volume because
|
||||
// there's no guarantee that it belongs to the running predicate.
|
||||
logger.V(5).Info("Unable to look up PVC info", "pod", klog.KObj(pod), "PVC", klog.KRef(pod.Namespace, pvcName))
|
||||
continue
|
||||
}
|
||||
|
||||
// The PVC for an ephemeral volume must be owned by the pod.
|
||||
if isEphemeral {
|
||||
if err := ephemeral.VolumeIsForPod(pod, pvc); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
driverName, volumeHandle := pl.getCSIDriverInfo(logger, csiNode, pvc)
|
||||
if driverName == "" || volumeHandle == "" {
|
||||
logger.V(5).Info("Could not find a CSI driver name or volume handle, not counting volume")
|
||||
continue
|
||||
}
|
||||
|
||||
volumeUniqueName := getVolumeUniqueName(driverName, volumeHandle)
|
||||
result[volumeUniqueName] = driverName
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkAttachableInlineVolume takes an inline volume and add to the result map if the
|
||||
// volume is migratable and CSI migration for this plugin has been enabled.
|
||||
func (pl *CSILimits) checkAttachableInlineVolume(logger klog.Logger, vol *v1.Volume, csiNode *storagev1.CSINode,
|
||||
pod *v1.Pod, result map[string]string) error {
|
||||
if !pl.translator.IsInlineMigratable(vol) {
|
||||
return nil
|
||||
}
|
||||
// Check if the intree provisioner CSI migration has been enabled.
|
||||
inTreeProvisionerName, err := pl.translator.GetInTreePluginNameFromSpec(nil, vol)
|
||||
if err != nil {
|
||||
return fmt.Errorf("looking up provisioner name for volume %s: %w", vol.Name, err)
|
||||
}
|
||||
if !isCSIMigrationOn(csiNode, inTreeProvisionerName) {
|
||||
csiNodeName := ""
|
||||
if csiNode != nil {
|
||||
csiNodeName = csiNode.Name
|
||||
}
|
||||
logger.V(5).Info("CSI Migration is not enabled for provisioner", "provisioner", inTreeProvisionerName,
|
||||
"pod", klog.KObj(pod), "csiNode", csiNodeName)
|
||||
return nil
|
||||
}
|
||||
// Do translation for the in-tree volume.
|
||||
translatedPV, err := pl.translator.TranslateInTreeInlineVolumeToCSI(logger, vol, pod.Namespace)
|
||||
if err != nil || translatedPV == nil {
|
||||
return fmt.Errorf("converting volume(%s) from inline to csi: %w", vol.Name, err)
|
||||
}
|
||||
driverName, err := pl.translator.GetCSINameFromInTreeName(inTreeProvisionerName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("looking up CSI driver name for provisioner %s: %w", inTreeProvisionerName, err)
|
||||
}
|
||||
// TranslateInTreeInlineVolumeToCSI should translate inline volume to CSI. If it is not set,
|
||||
// the volume does not support inline. Skip the count.
|
||||
if translatedPV.Spec.PersistentVolumeSource.CSI == nil {
|
||||
return nil
|
||||
}
|
||||
volumeUniqueName := getVolumeUniqueName(driverName, translatedPV.Spec.PersistentVolumeSource.CSI.VolumeHandle)
|
||||
result[volumeUniqueName] = driverName
|
||||
return nil
|
||||
}
|
||||
|
||||
// getCSIDriverInfo returns the CSI driver name and volume ID of a given PVC.
|
||||
// If the PVC is from a migrated in-tree plugin, this function will return
|
||||
// the information of the CSI driver that the plugin has been migrated to.
|
||||
func (pl *CSILimits) getCSIDriverInfo(logger klog.Logger, csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
|
||||
pvName := pvc.Spec.VolumeName
|
||||
|
||||
if pvName == "" {
|
||||
logger.V(5).Info("Persistent volume had no name for claim", "PVC", klog.KObj(pvc))
|
||||
return pl.getCSIDriverInfoFromSC(logger, csiNode, pvc)
|
||||
}
|
||||
|
||||
pv, err := pl.pvLister.Get(pvName)
|
||||
if err != nil {
|
||||
logger.V(5).Info("Unable to look up PV info for PVC and PV", "PVC", klog.KObj(pvc), "PV", klog.KRef("", pvName))
|
||||
// If we can't fetch PV associated with PVC, may be it got deleted
|
||||
// or PVC was prebound to a PVC that hasn't been created yet.
|
||||
// fallback to using StorageClass for volume counting
|
||||
return pl.getCSIDriverInfoFromSC(logger, csiNode, pvc)
|
||||
}
|
||||
|
||||
csiSource := pv.Spec.PersistentVolumeSource.CSI
|
||||
if csiSource == nil {
|
||||
// We make a fast path for non-CSI volumes that aren't migratable
|
||||
if !pl.translator.IsPVMigratable(pv) {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
pluginName, err := pl.translator.GetInTreePluginNameFromSpec(pv, nil)
|
||||
if err != nil {
|
||||
logger.V(5).Info("Unable to look up plugin name from PV spec", "err", err)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
if !isCSIMigrationOn(csiNode, pluginName) {
|
||||
logger.V(5).Info("CSI Migration of plugin is not enabled", "plugin", pluginName)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
csiPV, err := pl.translator.TranslateInTreePVToCSI(logger, pv)
|
||||
if err != nil {
|
||||
logger.V(5).Info("Unable to translate in-tree volume to CSI", "err", err)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
if csiPV.Spec.PersistentVolumeSource.CSI == nil {
|
||||
logger.V(5).Info("Unable to get a valid volume source for translated PV", "PV", pvName)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
csiSource = csiPV.Spec.PersistentVolumeSource.CSI
|
||||
}
|
||||
|
||||
return csiSource.Driver, csiSource.VolumeHandle
|
||||
}
|
||||
|
||||
// getCSIDriverInfoFromSC returns the CSI driver name and a random volume ID of a given PVC's StorageClass.
|
||||
func (pl *CSILimits) getCSIDriverInfoFromSC(logger klog.Logger, csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
|
||||
namespace := pvc.Namespace
|
||||
pvcName := pvc.Name
|
||||
scName := storagehelpers.GetPersistentVolumeClaimClass(pvc)
|
||||
|
||||
// If StorageClass is not set or not found, then PVC must be using immediate binding mode
|
||||
// and hence it must be bound before scheduling. So it is safe to not count it.
|
||||
if scName == "" {
|
||||
logger.V(5).Info("PVC has no StorageClass", "PVC", klog.KObj(pvc))
|
||||
return "", ""
|
||||
}
|
||||
|
||||
storageClass, err := pl.scLister.Get(scName)
|
||||
if err != nil {
|
||||
logger.V(5).Info("Could not get StorageClass for PVC", "PVC", klog.KObj(pvc), "err", err)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
// We use random prefix to avoid conflict with volume IDs. If PVC is bound during the execution of the
|
||||
// predicate and there is another pod on the same node that uses same volume, then we will overcount
|
||||
// the volume and consider both volumes as different.
|
||||
volumeHandle := fmt.Sprintf("%s-%s/%s", pl.randomVolumeIDPrefix, namespace, pvcName)
|
||||
|
||||
provisioner := storageClass.Provisioner
|
||||
if pl.translator.IsMigratableIntreePluginByName(provisioner) {
|
||||
if !isCSIMigrationOn(csiNode, provisioner) {
|
||||
logger.V(5).Info("CSI Migration of provisioner is not enabled", "provisioner", provisioner)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
driverName, err := pl.translator.GetCSINameFromInTreeName(provisioner)
|
||||
if err != nil {
|
||||
logger.V(5).Info("Unable to look up driver name from provisioner name", "provisioner", provisioner, "err", err)
|
||||
return "", ""
|
||||
}
|
||||
return driverName, volumeHandle
|
||||
}
|
||||
|
||||
return provisioner, volumeHandle
|
||||
}
|
||||
|
||||
// NewCSI initializes a new plugin and returns it.
|
||||
func NewCSI(_ context.Context, _ runtime.Object, handle framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
informerFactory := handle.SharedInformerFactory()
|
||||
pvLister := informerFactory.Core().V1().PersistentVolumes().Lister()
|
||||
pvcLister := informerFactory.Core().V1().PersistentVolumeClaims().Lister()
|
||||
csiNodesLister := informerFactory.Storage().V1().CSINodes().Lister()
|
||||
scLister := informerFactory.Storage().V1().StorageClasses().Lister()
|
||||
vaLister := informerFactory.Storage().V1().VolumeAttachments().Lister()
|
||||
csiTranslator := csitrans.New()
|
||||
|
||||
return &CSILimits{
|
||||
csiNodeLister: csiNodesLister,
|
||||
pvLister: pvLister,
|
||||
pvcLister: pvcLister,
|
||||
scLister: scLister,
|
||||
vaLister: vaLister,
|
||||
randomVolumeIDPrefix: rand.String(32),
|
||||
translator: csiTranslator,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// getVolumeLimits reads the volume limits from CSINode object and returns a map of volume limits.
|
||||
// The key is the driver name and the value is the maximum number of volumes that can be attached to the node.
|
||||
// If a key is not found in the map, it means there is no limit for the driver on the node.
|
||||
func getVolumeLimits(csiNode *storagev1.CSINode) map[string]int64 {
|
||||
nodeVolumeLimits := make(map[string]int64)
|
||||
if csiNode == nil {
|
||||
return nodeVolumeLimits
|
||||
}
|
||||
for _, d := range csiNode.Spec.Drivers {
|
||||
if d.Allocatable != nil && d.Allocatable.Count != nil {
|
||||
nodeVolumeLimits[d.Name] = int64(*d.Allocatable.Count)
|
||||
}
|
||||
}
|
||||
return nodeVolumeLimits
|
||||
}
|
||||
|
||||
// getNodeVolumeAttachmentInfo returns a map of volumeID to driver name for the given node.
|
||||
func (pl *CSILimits) getNodeVolumeAttachmentInfo(logger klog.Logger, nodeName string) (map[string]string, error) {
|
||||
volumeAttachments := make(map[string]string)
|
||||
vas, err := pl.vaLister.List(labels.Everything())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, va := range vas {
|
||||
if va.Spec.NodeName == nodeName {
|
||||
if va.Spec.Attacher == "" {
|
||||
logger.V(5).Info("VolumeAttachment has no attacher", "VolumeAttachment", klog.KObj(va))
|
||||
continue
|
||||
}
|
||||
if va.Spec.Source.PersistentVolumeName == nil {
|
||||
logger.V(5).Info("VolumeAttachment has no PV name", "VolumeAttachment", klog.KObj(va))
|
||||
continue
|
||||
}
|
||||
pv, err := pl.pvLister.Get(*va.Spec.Source.PersistentVolumeName)
|
||||
if err != nil {
|
||||
logger.V(5).Info("Unable to get PV for VolumeAttachment", "VolumeAttachment", klog.KObj(va), "err", err)
|
||||
continue
|
||||
}
|
||||
if pv.Spec.CSI == nil {
|
||||
logger.V(5).Info("PV is not a CSI volume", "PV", klog.KObj(pv))
|
||||
continue
|
||||
}
|
||||
volumeID := getVolumeUniqueName(va.Spec.Attacher, pv.Spec.CSI.VolumeHandle)
|
||||
volumeAttachments[volumeID] = va.Spec.Attacher
|
||||
}
|
||||
}
|
||||
return volumeAttachments, nil
|
||||
}
|
||||
|
||||
func getVolumeUniqueName(driverName, volumeHandle string) string {
|
||||
return fmt.Sprintf("%s/%s", driverName, volumeHandle)
|
||||
}
|
73
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodevolumelimits/utils.go
generated
vendored
Normal file
73
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodevolumelimits/utils.go
generated
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodevolumelimits
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
storagev1 "k8s.io/api/storage/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
csilibplugins "k8s.io/csi-translation-lib/plugins"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
)
|
||||
|
||||
// isCSIMigrationOn returns a boolean value indicating whether
|
||||
// the CSI migration has been enabled for a particular storage plugin.
|
||||
func isCSIMigrationOn(csiNode *storagev1.CSINode, pluginName string) bool {
|
||||
if csiNode == nil || len(pluginName) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// In-tree storage to CSI driver migration feature should be enabled,
|
||||
// along with the plugin-specific one
|
||||
switch pluginName {
|
||||
case csilibplugins.AWSEBSInTreePluginName:
|
||||
return true
|
||||
case csilibplugins.PortworxVolumePluginName:
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationPortworx) {
|
||||
return false
|
||||
}
|
||||
case csilibplugins.GCEPDInTreePluginName:
|
||||
return true
|
||||
case csilibplugins.AzureDiskInTreePluginName:
|
||||
return true
|
||||
case csilibplugins.CinderInTreePluginName:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
||||
// The plugin name should be listed in the CSINode object annotation.
|
||||
// This indicates that the plugin has been migrated to a CSI driver in the node.
|
||||
csiNodeAnn := csiNode.GetAnnotations()
|
||||
if csiNodeAnn == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var mpaSet sets.Set[string]
|
||||
mpa := csiNodeAnn[v1.MigratedPluginsAnnotationKey]
|
||||
if len(mpa) == 0 {
|
||||
mpaSet = sets.New[string]()
|
||||
} else {
|
||||
tok := strings.Split(mpa, ",")
|
||||
mpaSet = sets.New(tok...)
|
||||
}
|
||||
|
||||
return mpaSet.Has(pluginName)
|
||||
}
|
174
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread/common.go
generated
vendored
Normal file
174
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread/common.go
generated
vendored
Normal file
@ -0,0 +1,174 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtopologyspread
|
||||
|
||||
import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
v1helper "k8s.io/component-helpers/scheduling/corev1"
|
||||
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
type topologyPair struct {
|
||||
key string
|
||||
value string
|
||||
}
|
||||
|
||||
// topologySpreadConstraint is an internal version for v1.TopologySpreadConstraint
|
||||
// and where the selector is parsed.
|
||||
// Fields are exported for comparison during testing.
|
||||
type topologySpreadConstraint struct {
|
||||
MaxSkew int32
|
||||
TopologyKey string
|
||||
Selector labels.Selector
|
||||
MinDomains int32
|
||||
NodeAffinityPolicy v1.NodeInclusionPolicy
|
||||
NodeTaintsPolicy v1.NodeInclusionPolicy
|
||||
}
|
||||
|
||||
func (tsc *topologySpreadConstraint) matchNodeInclusionPolicies(pod *v1.Pod, node *v1.Node, require nodeaffinity.RequiredNodeAffinity) bool {
|
||||
if tsc.NodeAffinityPolicy == v1.NodeInclusionPolicyHonor {
|
||||
// We ignore parsing errors here for backwards compatibility.
|
||||
if match, _ := require.Match(node); !match {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if tsc.NodeTaintsPolicy == v1.NodeInclusionPolicyHonor {
|
||||
if _, untolerated := v1helper.FindMatchingUntoleratedTaint(node.Spec.Taints, pod.Spec.Tolerations, helper.DoNotScheduleTaintsFilterFunc()); untolerated {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// buildDefaultConstraints builds the constraints for a pod using
|
||||
// .DefaultConstraints and the selectors from the services, replication
|
||||
// controllers, replica sets and stateful sets that match the pod.
|
||||
func (pl *PodTopologySpread) buildDefaultConstraints(p *v1.Pod, action v1.UnsatisfiableConstraintAction) ([]topologySpreadConstraint, error) {
|
||||
constraints, err := pl.filterTopologySpreadConstraints(pl.defaultConstraints, p.Labels, action)
|
||||
if err != nil || len(constraints) == 0 {
|
||||
return nil, err
|
||||
}
|
||||
selector := helper.DefaultSelector(p, pl.services, pl.replicationCtrls, pl.replicaSets, pl.statefulSets)
|
||||
if selector.Empty() {
|
||||
return nil, nil
|
||||
}
|
||||
for i := range constraints {
|
||||
constraints[i].Selector = selector
|
||||
}
|
||||
return constraints, nil
|
||||
}
|
||||
|
||||
// nodeLabelsMatchSpreadConstraints checks if ALL topology keys in spread Constraints are present in node labels.
|
||||
func nodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints []topologySpreadConstraint) bool {
|
||||
for _, c := range constraints {
|
||||
if _, ok := nodeLabels[c.TopologyKey]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (pl *PodTopologySpread) filterTopologySpreadConstraints(constraints []v1.TopologySpreadConstraint, podLabels map[string]string, action v1.UnsatisfiableConstraintAction) ([]topologySpreadConstraint, error) {
|
||||
var result []topologySpreadConstraint
|
||||
for _, c := range constraints {
|
||||
if c.WhenUnsatisfiable == action {
|
||||
selector, err := metav1.LabelSelectorAsSelector(c.LabelSelector)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if pl.enableMatchLabelKeysInPodTopologySpread && len(c.MatchLabelKeys) > 0 {
|
||||
matchLabels := make(labels.Set)
|
||||
for _, labelKey := range c.MatchLabelKeys {
|
||||
if value, ok := podLabels[labelKey]; ok {
|
||||
matchLabels[labelKey] = value
|
||||
}
|
||||
}
|
||||
if len(matchLabels) > 0 {
|
||||
selector = mergeLabelSetWithSelector(matchLabels, selector)
|
||||
}
|
||||
}
|
||||
|
||||
tsc := topologySpreadConstraint{
|
||||
MaxSkew: c.MaxSkew,
|
||||
TopologyKey: c.TopologyKey,
|
||||
Selector: selector,
|
||||
MinDomains: ptr.Deref(c.MinDomains, 1), // If MinDomains is nil, we treat MinDomains as 1.
|
||||
NodeAffinityPolicy: v1.NodeInclusionPolicyHonor, // If NodeAffinityPolicy is nil, we treat NodeAffinityPolicy as "Honor".
|
||||
NodeTaintsPolicy: v1.NodeInclusionPolicyIgnore, // If NodeTaintsPolicy is nil, we treat NodeTaintsPolicy as "Ignore".
|
||||
}
|
||||
if pl.enableNodeInclusionPolicyInPodTopologySpread {
|
||||
if c.NodeAffinityPolicy != nil {
|
||||
tsc.NodeAffinityPolicy = *c.NodeAffinityPolicy
|
||||
}
|
||||
if c.NodeTaintsPolicy != nil {
|
||||
tsc.NodeTaintsPolicy = *c.NodeTaintsPolicy
|
||||
}
|
||||
}
|
||||
result = append(result, tsc)
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func mergeLabelSetWithSelector(matchLabels labels.Set, s labels.Selector) labels.Selector {
|
||||
mergedSelector := labels.SelectorFromSet(matchLabels)
|
||||
|
||||
requirements, ok := s.Requirements()
|
||||
if !ok {
|
||||
return s
|
||||
}
|
||||
|
||||
for _, r := range requirements {
|
||||
mergedSelector = mergedSelector.Add(r)
|
||||
}
|
||||
|
||||
return mergedSelector
|
||||
}
|
||||
|
||||
func countPodsMatchSelector(podInfos []*framework.PodInfo, selector labels.Selector, ns string) int {
|
||||
if selector.Empty() {
|
||||
return 0
|
||||
}
|
||||
count := 0
|
||||
for _, p := range podInfos {
|
||||
// Bypass terminating Pod (see #87621).
|
||||
if p.Pod.DeletionTimestamp != nil || p.Pod.Namespace != ns {
|
||||
continue
|
||||
}
|
||||
if selector.Matches(labels.Set(p.Pod.Labels)) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// podLabelsMatchSpreadConstraints returns whether tha labels matches with the selector in any of topologySpreadConstraint
|
||||
func podLabelsMatchSpreadConstraints(constraints []topologySpreadConstraint, labels labels.Set) bool {
|
||||
for _, c := range constraints {
|
||||
if c.Selector.Matches(labels) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
371
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread/filtering.go
generated
vendored
Normal file
371
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread/filtering.go
generated
vendored
Normal file
@ -0,0 +1,371 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtopologyspread
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
const preFilterStateKey = "PreFilter" + Name
|
||||
|
||||
// preFilterState computed at PreFilter and used at Filter.
|
||||
// It combines TpKeyToCriticalPaths and TpPairToMatchNum to represent:
|
||||
// (1) critical paths where the least pods are matched on each spread constraint.
|
||||
// (2) number of pods matched on each spread constraint.
|
||||
// A nil preFilterState denotes it's not set at all (in PreFilter phase);
|
||||
// An empty preFilterState object denotes it's a legit state and is set in PreFilter phase.
|
||||
// Fields are exported for comparison during testing.
|
||||
type preFilterState struct {
|
||||
Constraints []topologySpreadConstraint
|
||||
// We record 2 critical paths instead of all critical paths here.
|
||||
// criticalPaths[0].MatchNum always holds the minimum matching number.
|
||||
// criticalPaths[1].MatchNum is always greater or equal to criticalPaths[0].MatchNum, but
|
||||
// it's not guaranteed to be the 2nd minimum match number.
|
||||
TpKeyToCriticalPaths map[string]*criticalPaths
|
||||
// TpKeyToDomainsNum is keyed with topologyKey, and valued with the number of domains.
|
||||
TpKeyToDomainsNum map[string]int
|
||||
// TpPairToMatchNum is keyed with topologyPair, and valued with the number of matching pods.
|
||||
TpPairToMatchNum map[topologyPair]int
|
||||
}
|
||||
|
||||
// minMatchNum returns the global minimum for the calculation of skew while taking MinDomains into account.
|
||||
func (s *preFilterState) minMatchNum(tpKey string, minDomains int32) (int, error) {
|
||||
paths, ok := s.TpKeyToCriticalPaths[tpKey]
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("failed to retrieve path by topology key")
|
||||
}
|
||||
|
||||
minMatchNum := paths[0].MatchNum
|
||||
domainsNum, ok := s.TpKeyToDomainsNum[tpKey]
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("failed to retrieve the number of domains by topology key")
|
||||
}
|
||||
|
||||
if domainsNum < int(minDomains) {
|
||||
// When the number of eligible domains with matching topology keys is less than `minDomains`,
|
||||
// it treats "global minimum" as 0.
|
||||
minMatchNum = 0
|
||||
}
|
||||
|
||||
return minMatchNum, nil
|
||||
}
|
||||
|
||||
// Clone makes a copy of the given state.
|
||||
func (s *preFilterState) Clone() framework.StateData {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
copy := preFilterState{
|
||||
// Constraints are shared because they don't change.
|
||||
Constraints: s.Constraints,
|
||||
TpKeyToCriticalPaths: make(map[string]*criticalPaths, len(s.TpKeyToCriticalPaths)),
|
||||
// The number of domains does not change as a result of AddPod/RemovePod methods on PreFilter Extensions
|
||||
TpKeyToDomainsNum: s.TpKeyToDomainsNum,
|
||||
TpPairToMatchNum: make(map[topologyPair]int, len(s.TpPairToMatchNum)),
|
||||
}
|
||||
for tpKey, paths := range s.TpKeyToCriticalPaths {
|
||||
copy.TpKeyToCriticalPaths[tpKey] = &criticalPaths{paths[0], paths[1]}
|
||||
}
|
||||
for tpPair, matchNum := range s.TpPairToMatchNum {
|
||||
copy.TpPairToMatchNum[tpPair] = matchNum
|
||||
}
|
||||
return ©
|
||||
}
|
||||
|
||||
// CAVEAT: the reason that `[2]criticalPath` can work is based on the implementation of current
|
||||
// preemption algorithm, in particular the following 2 facts:
|
||||
// Fact 1: we only preempt pods on the same node, instead of pods on multiple nodes.
|
||||
// Fact 2: each node is evaluated on a separate copy of the preFilterState during its preemption cycle.
|
||||
// If we plan to turn to a more complex algorithm like "arbitrary pods on multiple nodes", this
|
||||
// structure needs to be revisited.
|
||||
// Fields are exported for comparison during testing.
|
||||
type criticalPaths [2]struct {
|
||||
// TopologyValue denotes the topology value mapping to topology key.
|
||||
TopologyValue string
|
||||
// MatchNum denotes the number of matching pods.
|
||||
MatchNum int
|
||||
}
|
||||
|
||||
func newCriticalPaths() *criticalPaths {
|
||||
return &criticalPaths{{MatchNum: math.MaxInt32}, {MatchNum: math.MaxInt32}}
|
||||
}
|
||||
|
||||
func (p *criticalPaths) update(tpVal string, num int) {
|
||||
// first verify if `tpVal` exists or not
|
||||
i := -1
|
||||
if tpVal == p[0].TopologyValue {
|
||||
i = 0
|
||||
} else if tpVal == p[1].TopologyValue {
|
||||
i = 1
|
||||
}
|
||||
|
||||
if i >= 0 {
|
||||
// `tpVal` exists
|
||||
p[i].MatchNum = num
|
||||
if p[0].MatchNum > p[1].MatchNum {
|
||||
// swap paths[0] and paths[1]
|
||||
p[0], p[1] = p[1], p[0]
|
||||
}
|
||||
} else {
|
||||
// `tpVal` doesn't exist
|
||||
if num < p[0].MatchNum {
|
||||
// update paths[1] with paths[0]
|
||||
p[1] = p[0]
|
||||
// update paths[0]
|
||||
p[0].TopologyValue, p[0].MatchNum = tpVal, num
|
||||
} else if num < p[1].MatchNum {
|
||||
// update paths[1]
|
||||
p[1].TopologyValue, p[1].MatchNum = tpVal, num
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PreFilter invoked at the prefilter extension point.
|
||||
func (pl *PodTopologySpread) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
s, err := pl.calPreFilterState(ctx, pod)
|
||||
if err != nil {
|
||||
return nil, framework.AsStatus(err)
|
||||
} else if s != nil && len(s.Constraints) == 0 {
|
||||
return nil, framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
cycleState.Write(preFilterStateKey, s)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// PreFilterExtensions returns prefilter extensions, pod add and remove.
|
||||
func (pl *PodTopologySpread) PreFilterExtensions() framework.PreFilterExtensions {
|
||||
return pl
|
||||
}
|
||||
|
||||
// AddPod from pre-computed data in cycleState.
|
||||
func (pl *PodTopologySpread) AddPod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podInfoToAdd *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
s, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
pl.updateWithPod(s, podInfoToAdd.Pod, podToSchedule, nodeInfo.Node(), 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemovePod from pre-computed data in cycleState.
|
||||
func (pl *PodTopologySpread) RemovePod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podInfoToRemove *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
s, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
pl.updateWithPod(s, podInfoToRemove.Pod, podToSchedule, nodeInfo.Node(), -1)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pl *PodTopologySpread) updateWithPod(s *preFilterState, updatedPod, preemptorPod *v1.Pod, node *v1.Node, delta int) {
|
||||
if s == nil || updatedPod.Namespace != preemptorPod.Namespace || node == nil {
|
||||
return
|
||||
}
|
||||
if !nodeLabelsMatchSpreadConstraints(node.Labels, s.Constraints) {
|
||||
return
|
||||
}
|
||||
|
||||
requiredSchedulingTerm := nodeaffinity.GetRequiredNodeAffinity(preemptorPod)
|
||||
if !pl.enableNodeInclusionPolicyInPodTopologySpread {
|
||||
// spreading is applied to nodes that pass those filters.
|
||||
// Ignore parsing errors for backwards compatibility.
|
||||
if match, _ := requiredSchedulingTerm.Match(node); !match {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
podLabelSet := labels.Set(updatedPod.Labels)
|
||||
for _, constraint := range s.Constraints {
|
||||
if !constraint.Selector.Matches(podLabelSet) {
|
||||
continue
|
||||
}
|
||||
|
||||
if pl.enableNodeInclusionPolicyInPodTopologySpread &&
|
||||
!constraint.matchNodeInclusionPolicies(preemptorPod, node, requiredSchedulingTerm) {
|
||||
continue
|
||||
}
|
||||
|
||||
k, v := constraint.TopologyKey, node.Labels[constraint.TopologyKey]
|
||||
pair := topologyPair{key: k, value: v}
|
||||
s.TpPairToMatchNum[pair] += delta
|
||||
s.TpKeyToCriticalPaths[k].update(v, s.TpPairToMatchNum[pair])
|
||||
}
|
||||
}
|
||||
|
||||
// getPreFilterState fetches a pre-computed preFilterState.
|
||||
func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error) {
|
||||
c, err := cycleState.Read(preFilterStateKey)
|
||||
if err != nil {
|
||||
// preFilterState doesn't exist, likely PreFilter wasn't invoked.
|
||||
return nil, fmt.Errorf("reading %q from cycleState: %w", preFilterStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(*preFilterState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%+v convert to podtopologyspread.preFilterState error", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// calPreFilterState computes preFilterState describing how pods are spread on topologies.
|
||||
func (pl *PodTopologySpread) calPreFilterState(ctx context.Context, pod *v1.Pod) (*preFilterState, error) {
|
||||
constraints, err := pl.getConstraints(pod)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get constraints from pod: %w", err)
|
||||
}
|
||||
if len(constraints) == 0 {
|
||||
return &preFilterState{}, nil
|
||||
}
|
||||
|
||||
allNodes, err := pl.sharedLister.NodeInfos().List()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("listing NodeInfos: %w", err)
|
||||
}
|
||||
|
||||
s := preFilterState{
|
||||
Constraints: constraints,
|
||||
TpKeyToCriticalPaths: make(map[string]*criticalPaths, len(constraints)),
|
||||
TpPairToMatchNum: make(map[topologyPair]int, sizeHeuristic(len(allNodes), constraints)),
|
||||
}
|
||||
|
||||
tpCountsByNode := make([]map[topologyPair]int, len(allNodes))
|
||||
requiredNodeAffinity := nodeaffinity.GetRequiredNodeAffinity(pod)
|
||||
processNode := func(i int) {
|
||||
nodeInfo := allNodes[i]
|
||||
node := nodeInfo.Node()
|
||||
|
||||
if !pl.enableNodeInclusionPolicyInPodTopologySpread {
|
||||
// spreading is applied to nodes that pass those filters.
|
||||
// Ignore parsing errors for backwards compatibility.
|
||||
if match, _ := requiredNodeAffinity.Match(node); !match {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure current node's labels contains all topologyKeys in 'Constraints'.
|
||||
if !nodeLabelsMatchSpreadConstraints(node.Labels, constraints) {
|
||||
return
|
||||
}
|
||||
|
||||
tpCounts := make(map[topologyPair]int, len(constraints))
|
||||
for _, c := range constraints {
|
||||
if pl.enableNodeInclusionPolicyInPodTopologySpread &&
|
||||
!c.matchNodeInclusionPolicies(pod, node, requiredNodeAffinity) {
|
||||
continue
|
||||
}
|
||||
|
||||
pair := topologyPair{key: c.TopologyKey, value: node.Labels[c.TopologyKey]}
|
||||
count := countPodsMatchSelector(nodeInfo.Pods, c.Selector, pod.Namespace)
|
||||
tpCounts[pair] = count
|
||||
}
|
||||
tpCountsByNode[i] = tpCounts
|
||||
}
|
||||
pl.parallelizer.Until(ctx, len(allNodes), processNode, pl.Name())
|
||||
|
||||
for _, tpCounts := range tpCountsByNode {
|
||||
for tp, count := range tpCounts {
|
||||
s.TpPairToMatchNum[tp] += count
|
||||
}
|
||||
}
|
||||
s.TpKeyToDomainsNum = make(map[string]int, len(constraints))
|
||||
for tp := range s.TpPairToMatchNum {
|
||||
s.TpKeyToDomainsNum[tp.key]++
|
||||
}
|
||||
|
||||
// calculate min match for each topology pair
|
||||
for i := 0; i < len(constraints); i++ {
|
||||
key := constraints[i].TopologyKey
|
||||
s.TpKeyToCriticalPaths[key] = newCriticalPaths()
|
||||
}
|
||||
for pair, num := range s.TpPairToMatchNum {
|
||||
s.TpKeyToCriticalPaths[pair.key].update(pair.value, num)
|
||||
}
|
||||
|
||||
return &s, nil
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
func (pl *PodTopologySpread) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
node := nodeInfo.Node()
|
||||
|
||||
s, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
// However, "empty" preFilterState is legit which tolerates every toSchedule Pod.
|
||||
if len(s.Constraints) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
podLabelSet := labels.Set(pod.Labels)
|
||||
for _, c := range s.Constraints {
|
||||
tpKey := c.TopologyKey
|
||||
tpVal, ok := node.Labels[c.TopologyKey]
|
||||
if !ok {
|
||||
logger.V(5).Info("Node doesn't have required label", "node", klog.KObj(node), "label", tpKey)
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonNodeLabelNotMatch)
|
||||
}
|
||||
|
||||
// judging criteria:
|
||||
// 'existing matching num' + 'if self-match (1 or 0)' - 'global minimum' <= 'maxSkew'
|
||||
minMatchNum, err := s.minMatchNum(tpKey, c.MinDomains)
|
||||
if err != nil {
|
||||
logger.Error(err, "Internal error occurred while retrieving value precalculated in PreFilter", "topologyKey", tpKey, "paths", s.TpKeyToCriticalPaths)
|
||||
continue
|
||||
}
|
||||
|
||||
selfMatchNum := 0
|
||||
if c.Selector.Matches(podLabelSet) {
|
||||
selfMatchNum = 1
|
||||
}
|
||||
|
||||
pair := topologyPair{key: tpKey, value: tpVal}
|
||||
matchNum := 0
|
||||
if tpCount, ok := s.TpPairToMatchNum[pair]; ok {
|
||||
matchNum = tpCount
|
||||
}
|
||||
skew := matchNum + selfMatchNum - minMatchNum
|
||||
if skew > int(c.MaxSkew) {
|
||||
logger.V(5).Info("Node failed spreadConstraint: matchNum + selfMatchNum - minMatchNum > maxSkew", "node", klog.KObj(node), "topologyKey", tpKey, "matchNum", matchNum, "selfMatchNum", selfMatchNum, "minMatchNum", minMatchNum, "maxSkew", c.MaxSkew)
|
||||
return framework.NewStatus(framework.Unschedulable, ErrReasonConstraintsNotMatch)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func sizeHeuristic(nodes int, constraints []topologySpreadConstraint) int {
|
||||
for _, c := range constraints {
|
||||
if c.TopologyKey == v1.LabelHostname {
|
||||
return nodes
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
351
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go
generated
vendored
Normal file
351
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go
generated
vendored
Normal file
@ -0,0 +1,351 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtopologyspread
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/equality"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
appslisters "k8s.io/client-go/listers/apps/v1"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
const (
|
||||
// ErrReasonConstraintsNotMatch is used for PodTopologySpread filter error.
|
||||
ErrReasonConstraintsNotMatch = "node(s) didn't match pod topology spread constraints"
|
||||
// ErrReasonNodeLabelNotMatch is used when the node doesn't hold the required label.
|
||||
ErrReasonNodeLabelNotMatch = ErrReasonConstraintsNotMatch + " (missing required label)"
|
||||
)
|
||||
|
||||
var systemDefaultConstraints = []v1.TopologySpreadConstraint{
|
||||
{
|
||||
TopologyKey: v1.LabelHostname,
|
||||
WhenUnsatisfiable: v1.ScheduleAnyway,
|
||||
MaxSkew: 3,
|
||||
},
|
||||
{
|
||||
TopologyKey: v1.LabelTopologyZone,
|
||||
WhenUnsatisfiable: v1.ScheduleAnyway,
|
||||
MaxSkew: 5,
|
||||
},
|
||||
}
|
||||
|
||||
// PodTopologySpread is a plugin that ensures pod's topologySpreadConstraints is satisfied.
|
||||
type PodTopologySpread struct {
|
||||
systemDefaulted bool
|
||||
parallelizer parallelize.Parallelizer
|
||||
defaultConstraints []v1.TopologySpreadConstraint
|
||||
sharedLister framework.SharedLister
|
||||
services corelisters.ServiceLister
|
||||
replicationCtrls corelisters.ReplicationControllerLister
|
||||
replicaSets appslisters.ReplicaSetLister
|
||||
statefulSets appslisters.StatefulSetLister
|
||||
enableNodeInclusionPolicyInPodTopologySpread bool
|
||||
enableMatchLabelKeysInPodTopologySpread bool
|
||||
enableSchedulingQueueHint bool
|
||||
}
|
||||
|
||||
var _ framework.PreFilterPlugin = &PodTopologySpread{}
|
||||
var _ framework.FilterPlugin = &PodTopologySpread{}
|
||||
var _ framework.PreScorePlugin = &PodTopologySpread{}
|
||||
var _ framework.ScorePlugin = &PodTopologySpread{}
|
||||
var _ framework.EnqueueExtensions = &PodTopologySpread{}
|
||||
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
const Name = names.PodTopologySpread
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *PodTopologySpread) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, plArgs runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
if h.SnapshotSharedLister() == nil {
|
||||
return nil, fmt.Errorf("SnapshotSharedlister is nil")
|
||||
}
|
||||
args, err := getArgs(plArgs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := validation.ValidatePodTopologySpreadArgs(nil, &args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pl := &PodTopologySpread{
|
||||
parallelizer: h.Parallelizer(),
|
||||
sharedLister: h.SnapshotSharedLister(),
|
||||
defaultConstraints: args.DefaultConstraints,
|
||||
enableNodeInclusionPolicyInPodTopologySpread: fts.EnableNodeInclusionPolicyInPodTopologySpread,
|
||||
enableMatchLabelKeysInPodTopologySpread: fts.EnableMatchLabelKeysInPodTopologySpread,
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
}
|
||||
if args.DefaultingType == config.SystemDefaulting {
|
||||
pl.defaultConstraints = systemDefaultConstraints
|
||||
pl.systemDefaulted = true
|
||||
}
|
||||
if len(pl.defaultConstraints) != 0 {
|
||||
if h.SharedInformerFactory() == nil {
|
||||
return nil, fmt.Errorf("SharedInformerFactory is nil")
|
||||
}
|
||||
pl.setListers(h.SharedInformerFactory())
|
||||
}
|
||||
return pl, nil
|
||||
}
|
||||
|
||||
func getArgs(obj runtime.Object) (config.PodTopologySpreadArgs, error) {
|
||||
ptr, ok := obj.(*config.PodTopologySpreadArgs)
|
||||
if !ok {
|
||||
return config.PodTopologySpreadArgs{}, fmt.Errorf("want args to be of type PodTopologySpreadArgs, got %T", obj)
|
||||
}
|
||||
return *ptr, nil
|
||||
}
|
||||
|
||||
func (pl *PodTopologySpread) setListers(factory informers.SharedInformerFactory) {
|
||||
pl.services = factory.Core().V1().Services().Lister()
|
||||
pl.replicationCtrls = factory.Core().V1().ReplicationControllers().Lister()
|
||||
pl.replicaSets = factory.Apps().V1().ReplicaSets().Lister()
|
||||
pl.statefulSets = factory.Apps().V1().StatefulSets().Lister()
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *PodTopologySpread) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
podActionType := framework.Add | framework.UpdatePodLabel | framework.Delete
|
||||
if pl.enableSchedulingQueueHint {
|
||||
// When the QueueingHint feature is enabled, the scheduling queue uses Pod/Update Queueing Hint
|
||||
// to determine whether a Pod's update makes the Pod schedulable or not.
|
||||
// https://github.com/kubernetes/kubernetes/pull/122234
|
||||
// (If not, the scheduling queue always retries the unschedulable Pods when they're updated.)
|
||||
//
|
||||
// The Pod rejected by this plugin can be schedulable when the Pod has a spread constraint with NodeTaintsPolicy:Honor
|
||||
// and has got a new toleration.
|
||||
// So, we add UpdatePodTolerations here only when QHint is enabled.
|
||||
podActionType = framework.Add | framework.UpdatePodLabel | framework.UpdatePodTolerations | framework.Delete
|
||||
}
|
||||
|
||||
return []framework.ClusterEventWithHint{
|
||||
// All ActionType includes the following events:
|
||||
// - Add. An unschedulable Pod may fail due to violating topology spread constraints,
|
||||
// adding an assigned Pod may make it schedulable.
|
||||
// - UpdatePodLabel. Updating on an existing Pod's labels (e.g., removal) may make
|
||||
// an unschedulable Pod schedulable.
|
||||
// - Delete. An unschedulable Pod may fail due to violating an existing Pod's topology spread constraints,
|
||||
// deleting an existing Pod may make it schedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: podActionType}, QueueingHintFn: pl.isSchedulableAfterPodChange},
|
||||
// Node add|delete|update maybe lead an topology key changed,
|
||||
// and make these pod in scheduling schedulable or unschedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.Delete | framework.UpdateNodeLabel | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// involvedInTopologySpreading returns true if the incomingPod is involved in the topology spreading of podWithSpreading.
|
||||
func involvedInTopologySpreading(incomingPod, podWithSpreading *v1.Pod) bool {
|
||||
return incomingPod.UID == podWithSpreading.UID ||
|
||||
(incomingPod.Spec.NodeName != "" && incomingPod.Namespace == podWithSpreading.Namespace)
|
||||
}
|
||||
|
||||
// hasConstraintWithNodeTaintsPolicyHonor returns true if any constraint has `NodeTaintsPolicy: Honor`.
|
||||
func hasConstraintWithNodeTaintsPolicyHonor(constraints []topologySpreadConstraint) bool {
|
||||
for _, c := range constraints {
|
||||
if c.NodeTaintsPolicy == v1.NodeInclusionPolicyHonor {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (pl *PodTopologySpread) isSchedulableAfterPodChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalPod, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
if (modifiedPod != nil && !involvedInTopologySpreading(modifiedPod, pod)) || (originalPod != nil && !involvedInTopologySpreading(originalPod, pod)) {
|
||||
logger.V(5).Info("the added/updated/deleted pod is unscheduled or has different namespace with target pod, so it doesn't make the target pod schedulable",
|
||||
"pod", klog.KObj(pod), "originalPod", klog.KObj(originalPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
constraints, err := pl.getConstraints(pod)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
// Pod is modified. Return Queue when the label(s) matching topologySpread's selector is added, changed, or deleted.
|
||||
if modifiedPod != nil && originalPod != nil {
|
||||
if pod.UID == modifiedPod.UID && !equality.Semantic.DeepEqual(modifiedPod.Spec.Tolerations, originalPod.Spec.Tolerations) && hasConstraintWithNodeTaintsPolicyHonor(constraints) {
|
||||
// If any constraint has `NodeTaintsPolicy: Honor`, we can return Queue when the target Pod has got a new toleration.
|
||||
logger.V(5).Info("the unschedulable pod has got a new toleration, which could make it schedulable",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
if equality.Semantic.DeepEqual(modifiedPod.Labels, originalPod.Labels) {
|
||||
logger.V(5).Info("the pod's update doesn't include the label update, which doesn't make the target pod schedulable",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
for _, c := range constraints {
|
||||
if c.Selector.Matches(labels.Set(originalPod.Labels)) != c.Selector.Matches(labels.Set(modifiedPod.Labels)) {
|
||||
// This modification makes this Pod match(or not match) with this constraint.
|
||||
// Maybe now the scheduling result of topology spread gets changed by this change.
|
||||
logger.V(5).Info("a scheduled pod's label was updated and it makes the updated pod match or unmatch the pod's topology spread constraints",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
}
|
||||
// This modification of labels doesn't change whether this Pod would match selector or not in any constraints.
|
||||
logger.V(5).Info("a scheduled pod's label was updated, but it's a change unrelated to the pod's topology spread constraints",
|
||||
"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Pod is added. Return Queue when the added Pod has a label that matches with topologySpread's selector.
|
||||
if modifiedPod != nil {
|
||||
if podLabelsMatchSpreadConstraints(constraints, modifiedPod.Labels) {
|
||||
logger.V(5).Info("a scheduled pod was created and it matches with the pod's topology spread constraints",
|
||||
"pod", klog.KObj(pod), "createdPod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
logger.V(5).Info("a scheduled pod was created, but it doesn't matches with the pod's topology spread constraints",
|
||||
"pod", klog.KObj(pod), "createdPod", klog.KObj(modifiedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Pod is deleted. Return Queue when the deleted Pod has a label that matches with topologySpread's selector.
|
||||
if podLabelsMatchSpreadConstraints(constraints, originalPod.Labels) {
|
||||
logger.V(5).Info("a scheduled pod which matches with the pod's topology spread constraints was deleted, and the pod may be schedulable now",
|
||||
"pod", klog.KObj(pod), "deletedPod", klog.KObj(originalPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
logger.V(5).Info("a scheduled pod was deleted, but it's unrelated to the pod's topology spread constraints",
|
||||
"pod", klog.KObj(pod), "deletedPod", klog.KObj(originalPod))
|
||||
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// getConstraints extracts topologySpreadConstraint(s) from the Pod spec.
|
||||
// If the Pod doesn't have any topologySpreadConstraint, it returns default constraints.
|
||||
func (pl *PodTopologySpread) getConstraints(pod *v1.Pod) ([]topologySpreadConstraint, error) {
|
||||
var constraints []topologySpreadConstraint
|
||||
var err error
|
||||
if len(pod.Spec.TopologySpreadConstraints) > 0 {
|
||||
// We have feature gating in APIServer to strip the spec
|
||||
// so don't need to re-check feature gate, just check length of Constraints.
|
||||
constraints, err = pl.filterTopologySpreadConstraints(
|
||||
pod.Spec.TopologySpreadConstraints,
|
||||
pod.Labels,
|
||||
v1.DoNotSchedule,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("obtaining pod's hard topology spread constraints: %w", err)
|
||||
}
|
||||
} else {
|
||||
constraints, err = pl.buildDefaultConstraints(pod, v1.DoNotSchedule)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("setting default hard topology spread constraints: %w", err)
|
||||
}
|
||||
}
|
||||
return constraints, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterNodeChange returns Queue when node has topologyKey in its labels, else return QueueSkip.
|
||||
func (pl *PodTopologySpread) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
constraints, err := pl.getConstraints(pod)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
var originalNodeMatching, modifiedNodeMatching bool
|
||||
if originalNode != nil {
|
||||
originalNodeMatching = nodeLabelsMatchSpreadConstraints(originalNode.Labels, constraints)
|
||||
}
|
||||
if modifiedNode != nil {
|
||||
modifiedNodeMatching = nodeLabelsMatchSpreadConstraints(modifiedNode.Labels, constraints)
|
||||
}
|
||||
|
||||
// We return Queue in the following cases:
|
||||
// 1. Node/UpdateNodeLabel:
|
||||
// - The original node matched the pod's topology spread constraints, but the modified node does not.
|
||||
// - The modified node matches the pod's topology spread constraints, but the original node does not.
|
||||
// - The modified node matches the pod's topology spread constraints, and the original node and the modified node have different label values for any topologyKey.
|
||||
// 2. Node/UpdateNodeTaint:
|
||||
// - The modified node match the pod's topology spread constraints, and the original node and the modified node have different taints.
|
||||
// 3. Node/Add: The created node matches the pod's topology spread constraints.
|
||||
// 4. Node/Delete: The original node matched the pod's topology spread constraints.
|
||||
if originalNode != nil && modifiedNode != nil {
|
||||
if originalNodeMatching != modifiedNodeMatching {
|
||||
logger.V(5).Info("the node is updated and now pod topology spread constraints has changed, and the pod may be schedulable now",
|
||||
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode), "originalMatching", originalNodeMatching, "newMatching", modifiedNodeMatching)
|
||||
return framework.Queue, nil
|
||||
}
|
||||
if modifiedNodeMatching && (checkTopologyKeyLabelsChanged(originalNode.Labels, modifiedNode.Labels, constraints) || !equality.Semantic.DeepEqual(originalNode.Spec.Taints, modifiedNode.Spec.Taints)) {
|
||||
logger.V(5).Info("the node is updated and now has different taints or labels, and the pod may be schedulable now",
|
||||
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
if modifiedNode != nil {
|
||||
if !modifiedNodeMatching {
|
||||
logger.V(5).Info("the created node doesn't match pod topology spread constraints",
|
||||
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
logger.V(5).Info("the created node matches topology spread constraints, and the pod may be schedulable now",
|
||||
"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
if !originalNodeMatching {
|
||||
logger.V(5).Info("the deleted node doesn't match pod topology spread constraints", "pod", klog.KObj(pod), "node", klog.KObj(originalNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
logger.V(5).Info("the deleted node matches topology spread constraints, and the pod may be schedulable now",
|
||||
"pod", klog.KObj(pod), "node", klog.KObj(originalNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// checkTopologyKeyLabelsChanged checks if any of the labels specified as topologyKey in the constraints have changed.
|
||||
func checkTopologyKeyLabelsChanged(originalLabels, modifiedLabels map[string]string, constraints []topologySpreadConstraint) bool {
|
||||
for _, constraint := range constraints {
|
||||
topologyKey := constraint.TopologyKey
|
||||
if originalLabels[topologyKey] != modifiedLabels[topologyKey] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
305
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go
generated
vendored
Normal file
305
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go
generated
vendored
Normal file
@ -0,0 +1,305 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package podtopologyspread
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"sync/atomic"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
const preScoreStateKey = "PreScore" + Name
|
||||
const invalidScore = -1
|
||||
|
||||
// preScoreState computed at PreScore and used at Score.
|
||||
// Fields are exported for comparison during testing.
|
||||
type preScoreState struct {
|
||||
Constraints []topologySpreadConstraint
|
||||
// IgnoredNodes is a set of node names which miss some Constraints[*].topologyKey.
|
||||
IgnoredNodes sets.Set[string]
|
||||
// TopologyPairToPodCounts is keyed with topologyPair, and valued with the number of matching pods.
|
||||
TopologyPairToPodCounts map[topologyPair]*int64
|
||||
// TopologyNormalizingWeight is the weight we give to the counts per topology.
|
||||
// This allows the pod counts of smaller topologies to not be watered down by
|
||||
// bigger ones.
|
||||
TopologyNormalizingWeight []float64
|
||||
}
|
||||
|
||||
// Clone implements the mandatory Clone interface. We don't really copy the data since
|
||||
// there is no need for that.
|
||||
func (s *preScoreState) Clone() framework.StateData {
|
||||
return s
|
||||
}
|
||||
|
||||
// initPreScoreState iterates "filteredNodes" to filter out the nodes which
|
||||
// don't have required topologyKey(s), and initialize:
|
||||
// 1) s.TopologyPairToPodCounts: keyed with both eligible topology pair and node names.
|
||||
// 2) s.IgnoredNodes: the set of nodes that shouldn't be scored.
|
||||
// 3) s.TopologyNormalizingWeight: The weight to be given to each constraint based on the number of values in a topology.
|
||||
func (pl *PodTopologySpread) initPreScoreState(s *preScoreState, pod *v1.Pod, filteredNodes []*framework.NodeInfo, requireAllTopologies bool) error {
|
||||
var err error
|
||||
if len(pod.Spec.TopologySpreadConstraints) > 0 {
|
||||
s.Constraints, err = pl.filterTopologySpreadConstraints(
|
||||
pod.Spec.TopologySpreadConstraints,
|
||||
pod.Labels,
|
||||
v1.ScheduleAnyway,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("obtaining pod's soft topology spread constraints: %w", err)
|
||||
}
|
||||
} else {
|
||||
s.Constraints, err = pl.buildDefaultConstraints(pod, v1.ScheduleAnyway)
|
||||
if err != nil {
|
||||
return fmt.Errorf("setting default soft topology spread constraints: %w", err)
|
||||
}
|
||||
}
|
||||
if len(s.Constraints) == 0 {
|
||||
return nil
|
||||
}
|
||||
topoSize := make([]int, len(s.Constraints))
|
||||
for _, node := range filteredNodes {
|
||||
if requireAllTopologies && !nodeLabelsMatchSpreadConstraints(node.Node().Labels, s.Constraints) {
|
||||
// Nodes which don't have all required topologyKeys present are ignored
|
||||
// when scoring later.
|
||||
s.IgnoredNodes.Insert(node.Node().Name)
|
||||
continue
|
||||
}
|
||||
for i, constraint := range s.Constraints {
|
||||
// per-node counts are calculated during Score.
|
||||
if constraint.TopologyKey == v1.LabelHostname {
|
||||
continue
|
||||
}
|
||||
pair := topologyPair{key: constraint.TopologyKey, value: node.Node().Labels[constraint.TopologyKey]}
|
||||
if s.TopologyPairToPodCounts[pair] == nil {
|
||||
s.TopologyPairToPodCounts[pair] = new(int64)
|
||||
topoSize[i]++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.TopologyNormalizingWeight = make([]float64, len(s.Constraints))
|
||||
for i, c := range s.Constraints {
|
||||
sz := topoSize[i]
|
||||
if c.TopologyKey == v1.LabelHostname {
|
||||
sz = len(filteredNodes) - len(s.IgnoredNodes)
|
||||
}
|
||||
s.TopologyNormalizingWeight[i] = topologyNormalizingWeight(sz)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// PreScore builds and writes cycle state used by Score and NormalizeScore.
|
||||
func (pl *PodTopologySpread) PreScore(
|
||||
ctx context.Context,
|
||||
cycleState *framework.CycleState,
|
||||
pod *v1.Pod,
|
||||
filteredNodes []*framework.NodeInfo,
|
||||
) *framework.Status {
|
||||
allNodes, err := pl.sharedLister.NodeInfos().List()
|
||||
if err != nil {
|
||||
return framework.AsStatus(fmt.Errorf("getting all nodes: %w", err))
|
||||
}
|
||||
|
||||
if len(allNodes) == 0 {
|
||||
// No need to score.
|
||||
return framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
state := &preScoreState{
|
||||
IgnoredNodes: sets.New[string](),
|
||||
TopologyPairToPodCounts: make(map[topologyPair]*int64),
|
||||
}
|
||||
// Only require that nodes have all the topology labels if using
|
||||
// non-system-default spreading rules. This allows nodes that don't have a
|
||||
// zone label to still have hostname spreading.
|
||||
requireAllTopologies := len(pod.Spec.TopologySpreadConstraints) > 0 || !pl.systemDefaulted
|
||||
err = pl.initPreScoreState(state, pod, filteredNodes, requireAllTopologies)
|
||||
if err != nil {
|
||||
return framework.AsStatus(fmt.Errorf("calculating preScoreState: %w", err))
|
||||
}
|
||||
|
||||
// return Skip if incoming pod doesn't have soft topology spread Constraints.
|
||||
if len(state.Constraints) == 0 {
|
||||
return framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
// Ignore parsing errors for backwards compatibility.
|
||||
requiredNodeAffinity := nodeaffinity.GetRequiredNodeAffinity(pod)
|
||||
processAllNode := func(i int) {
|
||||
nodeInfo := allNodes[i]
|
||||
node := nodeInfo.Node()
|
||||
|
||||
if !pl.enableNodeInclusionPolicyInPodTopologySpread {
|
||||
// `node` should satisfy incoming pod's NodeSelector/NodeAffinity
|
||||
if match, _ := requiredNodeAffinity.Match(node); !match {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// All topologyKeys need to be present in `node`
|
||||
if requireAllTopologies && !nodeLabelsMatchSpreadConstraints(node.Labels, state.Constraints) {
|
||||
return
|
||||
}
|
||||
|
||||
for _, c := range state.Constraints {
|
||||
if pl.enableNodeInclusionPolicyInPodTopologySpread &&
|
||||
!c.matchNodeInclusionPolicies(pod, node, requiredNodeAffinity) {
|
||||
continue
|
||||
}
|
||||
|
||||
pair := topologyPair{key: c.TopologyKey, value: node.Labels[c.TopologyKey]}
|
||||
// If current topology pair is not associated with any candidate node,
|
||||
// continue to avoid unnecessary calculation.
|
||||
// Per-node counts are also skipped, as they are done during Score.
|
||||
tpCount := state.TopologyPairToPodCounts[pair]
|
||||
if tpCount == nil {
|
||||
continue
|
||||
}
|
||||
count := countPodsMatchSelector(nodeInfo.Pods, c.Selector, pod.Namespace)
|
||||
atomic.AddInt64(tpCount, int64(count))
|
||||
}
|
||||
}
|
||||
pl.parallelizer.Until(ctx, len(allNodes), processAllNode, pl.Name())
|
||||
|
||||
cycleState.Write(preScoreStateKey, state)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Score invoked at the Score extension point.
|
||||
// The "score" returned in this function is the matching number of pods on the `nodeName`,
|
||||
// it is normalized later.
|
||||
func (pl *PodTopologySpread) Score(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
|
||||
nodeInfo, err := pl.sharedLister.NodeInfos().Get(nodeName)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(fmt.Errorf("getting node %q from Snapshot: %w", nodeName, err))
|
||||
}
|
||||
|
||||
node := nodeInfo.Node()
|
||||
s, err := getPreScoreState(cycleState)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(err)
|
||||
}
|
||||
|
||||
// Return if the node is not qualified.
|
||||
if s.IgnoredNodes.Has(node.Name) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// For each present <pair>, current node gets a credit of <matchSum>.
|
||||
// And we sum up <matchSum> and return it as this node's score.
|
||||
var score float64
|
||||
for i, c := range s.Constraints {
|
||||
if tpVal, ok := node.Labels[c.TopologyKey]; ok {
|
||||
var cnt int64
|
||||
if c.TopologyKey == v1.LabelHostname {
|
||||
cnt = int64(countPodsMatchSelector(nodeInfo.Pods, c.Selector, pod.Namespace))
|
||||
} else {
|
||||
pair := topologyPair{key: c.TopologyKey, value: tpVal}
|
||||
cnt = *s.TopologyPairToPodCounts[pair]
|
||||
}
|
||||
score += scoreForCount(cnt, c.MaxSkew, s.TopologyNormalizingWeight[i])
|
||||
}
|
||||
}
|
||||
return int64(math.Round(score)), nil
|
||||
}
|
||||
|
||||
// NormalizeScore invoked after scoring all nodes.
|
||||
func (pl *PodTopologySpread) NormalizeScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, scores framework.NodeScoreList) *framework.Status {
|
||||
s, err := getPreScoreState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Calculate <minScore> and <maxScore>
|
||||
var minScore int64 = math.MaxInt64
|
||||
var maxScore int64
|
||||
for i, score := range scores {
|
||||
// it's mandatory to check if <score.Name> is present in m.IgnoredNodes
|
||||
if s.IgnoredNodes.Has(score.Name) {
|
||||
scores[i].Score = invalidScore
|
||||
continue
|
||||
}
|
||||
if score.Score < minScore {
|
||||
minScore = score.Score
|
||||
}
|
||||
if score.Score > maxScore {
|
||||
maxScore = score.Score
|
||||
}
|
||||
}
|
||||
|
||||
for i := range scores {
|
||||
if scores[i].Score == invalidScore {
|
||||
scores[i].Score = 0
|
||||
continue
|
||||
}
|
||||
if maxScore == 0 {
|
||||
scores[i].Score = framework.MaxNodeScore
|
||||
continue
|
||||
}
|
||||
s := scores[i].Score
|
||||
scores[i].Score = framework.MaxNodeScore * (maxScore + minScore - s) / maxScore
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ScoreExtensions of the Score plugin.
|
||||
func (pl *PodTopologySpread) ScoreExtensions() framework.ScoreExtensions {
|
||||
return pl
|
||||
}
|
||||
|
||||
func getPreScoreState(cycleState *framework.CycleState) (*preScoreState, error) {
|
||||
c, err := cycleState.Read(preScoreStateKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading %q from cycleState: %w", preScoreStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(*preScoreState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%+v convert to podtopologyspread.preScoreState error", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// topologyNormalizingWeight calculates the weight for the topology, based on
|
||||
// the number of values that exist for a topology.
|
||||
// Since <size> is at least 1 (all nodes that passed the Filters are in the
|
||||
// same topology), and k8s supports 5k nodes, the result is in the interval
|
||||
// <1.09, 8.52>.
|
||||
//
|
||||
// Note: <size> could also be zero when no nodes have the required topologies,
|
||||
// however we don't care about topology weight in this case as we return a 0
|
||||
// score for all nodes.
|
||||
func topologyNormalizingWeight(size int) float64 {
|
||||
return math.Log(float64(size + 2))
|
||||
}
|
||||
|
||||
// scoreForCount calculates the score based on number of matching pods in a
|
||||
// topology domain, the constraint's maxSkew and the topology weight.
|
||||
// `maxSkew-1` is added to the score so that differences between topology
|
||||
// domains get watered down, controlling the tolerance of the score to skews.
|
||||
func scoreForCount(cnt int64, maxSkew int32, tpWeight float64) float64 {
|
||||
return float64(cnt)*tpWeight + float64(maxSkew-1)
|
||||
}
|
53
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort/priority_sort.go
generated
vendored
Normal file
53
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort/priority_sort.go
generated
vendored
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queuesort
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
corev1helpers "k8s.io/component-helpers/scheduling/corev1"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
)
|
||||
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
const Name = names.PrioritySort
|
||||
|
||||
// PrioritySort is a plugin that implements Priority based sorting.
|
||||
type PrioritySort struct{}
|
||||
|
||||
var _ framework.QueueSortPlugin = &PrioritySort{}
|
||||
|
||||
// Name returns name of the plugin.
|
||||
func (pl *PrioritySort) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// Less is the function used by the activeQ heap algorithm to sort pods.
|
||||
// It sorts pods based on their priority. When priorities are equal, it uses
|
||||
// PodQueueInfo.timestamp.
|
||||
func (pl *PrioritySort) Less(pInfo1, pInfo2 *framework.QueuedPodInfo) bool {
|
||||
p1 := corev1helpers.PodPriority(pInfo1.Pod)
|
||||
p2 := corev1helpers.PodPriority(pInfo2.Pod)
|
||||
return (p1 > p2) || (p1 == p2 && pInfo1.Timestamp.Before(pInfo2.Timestamp))
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, _ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
|
||||
return &PrioritySort{}, nil
|
||||
}
|
84
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/registry.go
generated
vendored
Normal file
84
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/registry.go
generated
vendored
Normal file
@ -0,0 +1,84 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package plugins
|
||||
|
||||
import (
|
||||
"k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/dynamicresources"
|
||||
plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/imagelocality"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeunschedulable"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodevolumelimits"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/schedulinggates"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/tainttoleration"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumerestrictions"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumezone"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/runtime"
|
||||
)
|
||||
|
||||
// NewInTreeRegistry builds the registry with all the in-tree plugins.
|
||||
// A scheduler that runs out of tree plugins can register additional plugins
|
||||
// through the WithFrameworkOutOfTreeRegistry option.
|
||||
func NewInTreeRegistry() runtime.Registry {
|
||||
fts := plfeature.Features{
|
||||
EnableDRAAdminAccess: feature.DefaultFeatureGate.Enabled(features.DRAAdminAccess),
|
||||
EnableDynamicResourceAllocation: feature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation),
|
||||
EnableVolumeCapacityPriority: feature.DefaultFeatureGate.Enabled(features.VolumeCapacityPriority),
|
||||
EnableNodeInclusionPolicyInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.NodeInclusionPolicyInPodTopologySpread),
|
||||
EnableMatchLabelKeysInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.MatchLabelKeysInPodTopologySpread),
|
||||
EnableInPlacePodVerticalScaling: feature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling),
|
||||
EnableSidecarContainers: feature.DefaultFeatureGate.Enabled(features.SidecarContainers),
|
||||
EnableSchedulingQueueHint: feature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints),
|
||||
EnableAsyncPreemption: feature.DefaultFeatureGate.Enabled(features.SchedulerAsyncPreemption),
|
||||
EnablePodLevelResources: feature.DefaultFeatureGate.Enabled(features.PodLevelResources),
|
||||
}
|
||||
|
||||
registry := runtime.Registry{
|
||||
dynamicresources.Name: runtime.FactoryAdapter(fts, dynamicresources.New),
|
||||
imagelocality.Name: imagelocality.New,
|
||||
tainttoleration.Name: runtime.FactoryAdapter(fts, tainttoleration.New),
|
||||
nodename.Name: runtime.FactoryAdapter(fts, nodename.New),
|
||||
nodeports.Name: runtime.FactoryAdapter(fts, nodeports.New),
|
||||
nodeaffinity.Name: runtime.FactoryAdapter(fts, nodeaffinity.New),
|
||||
podtopologyspread.Name: runtime.FactoryAdapter(fts, podtopologyspread.New),
|
||||
nodeunschedulable.Name: runtime.FactoryAdapter(fts, nodeunschedulable.New),
|
||||
noderesources.Name: runtime.FactoryAdapter(fts, noderesources.NewFit),
|
||||
noderesources.BalancedAllocationName: runtime.FactoryAdapter(fts, noderesources.NewBalancedAllocation),
|
||||
volumebinding.Name: runtime.FactoryAdapter(fts, volumebinding.New),
|
||||
volumerestrictions.Name: runtime.FactoryAdapter(fts, volumerestrictions.New),
|
||||
volumezone.Name: runtime.FactoryAdapter(fts, volumezone.New),
|
||||
nodevolumelimits.CSIName: runtime.FactoryAdapter(fts, nodevolumelimits.NewCSI),
|
||||
interpodaffinity.Name: runtime.FactoryAdapter(fts, interpodaffinity.New),
|
||||
queuesort.Name: queuesort.New,
|
||||
defaultbinder.Name: defaultbinder.New,
|
||||
defaultpreemption.Name: runtime.FactoryAdapter(fts, defaultpreemption.New),
|
||||
schedulinggates.Name: runtime.FactoryAdapter(fts, schedulinggates.New),
|
||||
}
|
||||
|
||||
return registry
|
||||
}
|
94
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/schedulinggates/scheduling_gates.go
generated
vendored
Normal file
94
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/schedulinggates/scheduling_gates.go
generated
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package schedulinggates
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// Name of the plugin used in the plugin registry and configurations.
|
||||
const Name = names.SchedulingGates
|
||||
|
||||
// SchedulingGates checks if a Pod carries .spec.schedulingGates.
|
||||
type SchedulingGates struct {
|
||||
enableSchedulingQueueHint bool
|
||||
}
|
||||
|
||||
var _ framework.PreEnqueuePlugin = &SchedulingGates{}
|
||||
var _ framework.EnqueueExtensions = &SchedulingGates{}
|
||||
|
||||
func (pl *SchedulingGates) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
func (pl *SchedulingGates) PreEnqueue(ctx context.Context, p *v1.Pod) *framework.Status {
|
||||
if len(p.Spec.SchedulingGates) == 0 {
|
||||
return nil
|
||||
}
|
||||
gates := make([]string, 0, len(p.Spec.SchedulingGates))
|
||||
for _, gate := range p.Spec.SchedulingGates {
|
||||
gates = append(gates, gate.Name)
|
||||
}
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, fmt.Sprintf("waiting for scheduling gates: %v", gates))
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *SchedulingGates) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
if !pl.enableSchedulingQueueHint {
|
||||
return nil, nil
|
||||
}
|
||||
// When the QueueingHint feature is enabled,
|
||||
// the scheduling queue uses Pod/Update Queueing Hint
|
||||
// to determine whether a Pod's update makes the Pod schedulable or not.
|
||||
// https://github.com/kubernetes/kubernetes/pull/122234
|
||||
return []framework.ClusterEventWithHint{
|
||||
// Pods can be more schedulable once it's gates are removed
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodSchedulingGatesEliminated}, QueueingHintFn: pl.isSchedulableAfterUpdatePodSchedulingGatesEliminated},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, _ runtime.Object, _ framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
return &SchedulingGates{
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (pl *SchedulingGates) isSchedulableAfterUpdatePodSchedulingGatesEliminated(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
if modifiedPod.UID != pod.UID {
|
||||
// If the update event is not for targetPod, it wouldn't make targetPod schedulable.
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
return framework.Queue, nil
|
||||
}
|
236
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/tainttoleration/taint_toleration.go
generated
vendored
Normal file
236
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/tainttoleration/taint_toleration.go
generated
vendored
Normal file
@ -0,0 +1,236 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package tainttoleration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
v1helper "k8s.io/component-helpers/scheduling/corev1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// TaintToleration is a plugin that checks if a pod tolerates a node's taints.
|
||||
type TaintToleration struct {
|
||||
handle framework.Handle
|
||||
enableSchedulingQueueHint bool
|
||||
}
|
||||
|
||||
var _ framework.FilterPlugin = &TaintToleration{}
|
||||
var _ framework.PreScorePlugin = &TaintToleration{}
|
||||
var _ framework.ScorePlugin = &TaintToleration{}
|
||||
var _ framework.EnqueueExtensions = &TaintToleration{}
|
||||
|
||||
const (
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
Name = names.TaintToleration
|
||||
// preScoreStateKey is the key in CycleState to TaintToleration pre-computed data for Scoring.
|
||||
preScoreStateKey = "PreScore" + Name
|
||||
// ErrReasonNotMatch is the Filter reason status when not matching.
|
||||
ErrReasonNotMatch = "node(s) had taints that the pod didn't tolerate"
|
||||
)
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *TaintToleration) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *TaintToleration) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
if pl.enableSchedulingQueueHint {
|
||||
return []framework.ClusterEventWithHint{
|
||||
// When the QueueingHint feature is enabled, preCheck is eliminated and we don't need additional UpdateNodeLabel.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||
// When the QueueingHint feature is enabled,
|
||||
// the scheduling queue uses Pod/Update Queueing Hint
|
||||
// to determine whether a Pod's update makes the Pod schedulable or not.
|
||||
// https://github.com/kubernetes/kubernetes/pull/122234
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodTolerations}, QueueingHintFn: pl.isSchedulableAfterPodTolerationChange},
|
||||
}, nil
|
||||
}
|
||||
|
||||
return []framework.ClusterEventWithHint{
|
||||
// A note about UpdateNodeLabel event:
|
||||
// Ideally, it's supposed to register only Add | UpdateNodeTaint because UpdateNodeLabel will never change the result from this plugin.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint | framework.UpdateNodeLabel}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
||||
// No need to register the Pod event; the update to the unschedulable Pods already triggers the scheduling retry when QHint is disabled.
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterNodeChange is invoked for all node events reported by
|
||||
// an informer. It checks whether that change made a previously unschedulable
|
||||
// pod schedulable.
|
||||
func (pl *TaintToleration) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
wasUntolerated := true
|
||||
if originalNode != nil {
|
||||
_, wasUntolerated = v1helper.FindMatchingUntoleratedTaint(originalNode.Spec.Taints, pod.Spec.Tolerations, helper.DoNotScheduleTaintsFilterFunc())
|
||||
}
|
||||
|
||||
_, isUntolerated := v1helper.FindMatchingUntoleratedTaint(modifiedNode.Spec.Taints, pod.Spec.Tolerations, helper.DoNotScheduleTaintsFilterFunc())
|
||||
|
||||
if wasUntolerated && !isUntolerated {
|
||||
logger.V(5).Info("node was created or updated, and this may make the Pod rejected by TaintToleration plugin in the previous scheduling cycle schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("node was created or updated, but it doesn't change the TaintToleration plugin's decision", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
func (pl *TaintToleration) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
node := nodeInfo.Node()
|
||||
|
||||
taint, isUntolerated := v1helper.FindMatchingUntoleratedTaint(node.Spec.Taints, pod.Spec.Tolerations, helper.DoNotScheduleTaintsFilterFunc())
|
||||
if !isUntolerated {
|
||||
return nil
|
||||
}
|
||||
|
||||
errReason := fmt.Sprintf("node(s) had untolerated taint {%s: %s}", taint.Key, taint.Value)
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, errReason)
|
||||
}
|
||||
|
||||
// preScoreState computed at PreScore and used at Score.
|
||||
type preScoreState struct {
|
||||
tolerationsPreferNoSchedule []v1.Toleration
|
||||
}
|
||||
|
||||
// Clone implements the mandatory Clone interface. We don't really copy the data since
|
||||
// there is no need for that.
|
||||
func (s *preScoreState) Clone() framework.StateData {
|
||||
return s
|
||||
}
|
||||
|
||||
// getAllTolerationEffectPreferNoSchedule gets the list of all Tolerations with Effect PreferNoSchedule or with no effect.
|
||||
func getAllTolerationPreferNoSchedule(tolerations []v1.Toleration) (tolerationList []v1.Toleration) {
|
||||
for _, toleration := range tolerations {
|
||||
// Empty effect means all effects which includes PreferNoSchedule, so we need to collect it as well.
|
||||
if len(toleration.Effect) == 0 || toleration.Effect == v1.TaintEffectPreferNoSchedule {
|
||||
tolerationList = append(tolerationList, toleration)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// PreScore builds and writes cycle state used by Score and NormalizeScore.
|
||||
func (pl *TaintToleration) PreScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodes []*framework.NodeInfo) *framework.Status {
|
||||
if len(nodes) == 0 {
|
||||
return nil
|
||||
}
|
||||
tolerationsPreferNoSchedule := getAllTolerationPreferNoSchedule(pod.Spec.Tolerations)
|
||||
state := &preScoreState{
|
||||
tolerationsPreferNoSchedule: tolerationsPreferNoSchedule,
|
||||
}
|
||||
cycleState.Write(preScoreStateKey, state)
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPreScoreState(cycleState *framework.CycleState) (*preScoreState, error) {
|
||||
c, err := cycleState.Read(preScoreStateKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %q from cycleState: %w", preScoreStateKey, err)
|
||||
}
|
||||
|
||||
s, ok := c.(*preScoreState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%+v convert to tainttoleration.preScoreState error", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// CountIntolerableTaintsPreferNoSchedule gives the count of intolerable taints of a pod with effect PreferNoSchedule
|
||||
func countIntolerableTaintsPreferNoSchedule(taints []v1.Taint, tolerations []v1.Toleration) (intolerableTaints int) {
|
||||
for _, taint := range taints {
|
||||
// check only on taints that have effect PreferNoSchedule
|
||||
if taint.Effect != v1.TaintEffectPreferNoSchedule {
|
||||
continue
|
||||
}
|
||||
|
||||
if !v1helper.TolerationsTolerateTaint(tolerations, &taint) {
|
||||
intolerableTaints++
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Score invoked at the Score extension point.
|
||||
func (pl *TaintToleration) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
|
||||
nodeInfo, err := pl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(fmt.Errorf("getting node %q from Snapshot: %w", nodeName, err))
|
||||
}
|
||||
node := nodeInfo.Node()
|
||||
|
||||
s, err := getPreScoreState(state)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(err)
|
||||
}
|
||||
|
||||
score := int64(countIntolerableTaintsPreferNoSchedule(node.Spec.Taints, s.tolerationsPreferNoSchedule))
|
||||
return score, nil
|
||||
}
|
||||
|
||||
// NormalizeScore invoked after scoring all nodes.
|
||||
func (pl *TaintToleration) NormalizeScore(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, scores framework.NodeScoreList) *framework.Status {
|
||||
return helper.DefaultNormalizeScore(framework.MaxNodeScore, true, scores)
|
||||
}
|
||||
|
||||
// ScoreExtensions of the Score plugin.
|
||||
func (pl *TaintToleration) ScoreExtensions() framework.ScoreExtensions {
|
||||
return pl
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, _ runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
return &TaintToleration{
|
||||
handle: h,
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPodTolerationChange is invoked whenever a pod's toleration changed.
|
||||
func (pl *TaintToleration) isSchedulableAfterPodTolerationChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
if pod.UID == modifiedPod.UID {
|
||||
// The updated Pod is the unschedulable Pod.
|
||||
logger.V(5).Info("a new toleration is added for the unschedulable Pod, and it may make it schedulable", "pod", klog.KObj(modifiedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("a new toleration is added for a Pod, but it's an unrelated Pod and wouldn't change the TaintToleration plugin's decision", "pod", klog.KObj(modifiedPod))
|
||||
|
||||
return framework.QueueSkip, nil
|
||||
}
|
10
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/OWNERS
generated
vendored
Normal file
10
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/OWNERS
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-storage-approvers
|
||||
- cofyc
|
||||
reviewers:
|
||||
- sig-storage-reviewers
|
||||
- cofyc
|
||||
labels:
|
||||
- sig/storage
|
131
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/assume_cache.go
generated
vendored
Normal file
131
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/assume_cache.go
generated
vendored
Normal file
@ -0,0 +1,131 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package volumebinding
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
storagehelpers "k8s.io/component-helpers/storage/volume"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util/assumecache"
|
||||
)
|
||||
|
||||
// PVAssumeCache is a AssumeCache for PersistentVolume objects
|
||||
type PVAssumeCache struct {
|
||||
*assumecache.AssumeCache
|
||||
logger klog.Logger
|
||||
}
|
||||
|
||||
func pvStorageClassIndexFunc(obj interface{}) ([]string, error) {
|
||||
if pv, ok := obj.(*v1.PersistentVolume); ok {
|
||||
return []string{storagehelpers.GetPersistentVolumeClass(pv)}, nil
|
||||
}
|
||||
return []string{""}, fmt.Errorf("object is not a v1.PersistentVolume: %v", obj)
|
||||
}
|
||||
|
||||
// NewPVAssumeCache creates a PV assume cache.
|
||||
func NewPVAssumeCache(logger klog.Logger, informer assumecache.Informer) *PVAssumeCache {
|
||||
logger = klog.LoggerWithName(logger, "PV Cache")
|
||||
return &PVAssumeCache{
|
||||
AssumeCache: assumecache.NewAssumeCache(logger, informer, "v1.PersistentVolume", "storageclass", pvStorageClassIndexFunc),
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *PVAssumeCache) GetPV(pvName string) (*v1.PersistentVolume, error) {
|
||||
obj, err := c.Get(pvName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pv, ok := obj.(*v1.PersistentVolume)
|
||||
if !ok {
|
||||
return nil, &assumecache.WrongTypeError{TypeName: "v1.PersistentVolume", Object: obj}
|
||||
}
|
||||
return pv, nil
|
||||
}
|
||||
|
||||
func (c *PVAssumeCache) GetAPIPV(pvName string) (*v1.PersistentVolume, error) {
|
||||
obj, err := c.GetAPIObj(pvName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pv, ok := obj.(*v1.PersistentVolume)
|
||||
if !ok {
|
||||
return nil, &assumecache.WrongTypeError{TypeName: "v1.PersistentVolume", Object: obj}
|
||||
}
|
||||
return pv, nil
|
||||
}
|
||||
|
||||
func (c *PVAssumeCache) ListPVs(storageClassName string) []*v1.PersistentVolume {
|
||||
objs := c.List(&v1.PersistentVolume{
|
||||
Spec: v1.PersistentVolumeSpec{
|
||||
StorageClassName: storageClassName,
|
||||
},
|
||||
})
|
||||
pvs := []*v1.PersistentVolume{}
|
||||
for _, obj := range objs {
|
||||
pv, ok := obj.(*v1.PersistentVolume)
|
||||
if !ok {
|
||||
c.logger.Error(&assumecache.WrongTypeError{TypeName: "v1.PersistentVolume", Object: obj}, "ListPVs")
|
||||
continue
|
||||
}
|
||||
pvs = append(pvs, pv)
|
||||
}
|
||||
return pvs
|
||||
}
|
||||
|
||||
// PVCAssumeCache is a AssumeCache for PersistentVolumeClaim objects
|
||||
type PVCAssumeCache struct {
|
||||
*assumecache.AssumeCache
|
||||
logger klog.Logger
|
||||
}
|
||||
|
||||
// NewPVCAssumeCache creates a PVC assume cache.
|
||||
func NewPVCAssumeCache(logger klog.Logger, informer assumecache.Informer) *PVCAssumeCache {
|
||||
logger = klog.LoggerWithName(logger, "PVC Cache")
|
||||
return &PVCAssumeCache{
|
||||
AssumeCache: assumecache.NewAssumeCache(logger, informer, "v1.PersistentVolumeClaim", "", nil),
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *PVCAssumeCache) GetPVC(pvcKey string) (*v1.PersistentVolumeClaim, error) {
|
||||
obj, err := c.Get(pvcKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pvc, ok := obj.(*v1.PersistentVolumeClaim)
|
||||
if !ok {
|
||||
return nil, &assumecache.WrongTypeError{TypeName: "v1.PersistentVolumeClaim", Object: obj}
|
||||
}
|
||||
return pvc, nil
|
||||
}
|
||||
|
||||
func (c *PVCAssumeCache) GetAPIPVC(pvcKey string) (*v1.PersistentVolumeClaim, error) {
|
||||
obj, err := c.GetAPIObj(pvcKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pvc, ok := obj.(*v1.PersistentVolumeClaim)
|
||||
if !ok {
|
||||
return nil, &assumecache.WrongTypeError{TypeName: "v1.PersistentVolumeClaim", Object: obj}
|
||||
}
|
||||
return pvc, nil
|
||||
}
|
1100
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/binder.go
generated
vendored
Normal file
1100
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/binder.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
75
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/fake_binder.go
generated
vendored
Normal file
75
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/fake_binder.go
generated
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package volumebinding
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// FakeVolumeBinderConfig holds configurations for fake volume binder.
|
||||
type FakeVolumeBinderConfig struct {
|
||||
AllBound bool
|
||||
FindReasons ConflictReasons
|
||||
FindErr error
|
||||
AssumeErr error
|
||||
BindErr error
|
||||
}
|
||||
|
||||
// NewFakeVolumeBinder sets up all the caches needed for the scheduler to make
|
||||
// topology-aware volume binding decisions.
|
||||
func NewFakeVolumeBinder(config *FakeVolumeBinderConfig) *FakeVolumeBinder {
|
||||
return &FakeVolumeBinder{
|
||||
config: config,
|
||||
}
|
||||
}
|
||||
|
||||
// FakeVolumeBinder represents a fake volume binder for testing.
|
||||
type FakeVolumeBinder struct {
|
||||
config *FakeVolumeBinderConfig
|
||||
AssumeCalled bool
|
||||
BindCalled bool
|
||||
}
|
||||
|
||||
var _ SchedulerVolumeBinder = &FakeVolumeBinder{}
|
||||
|
||||
// GetPodVolumeClaims implements SchedulerVolumeBinder.GetPodVolumes.
|
||||
func (b *FakeVolumeBinder) GetPodVolumeClaims(_ klog.Logger, pod *v1.Pod) (podVolumeClaims *PodVolumeClaims, err error) {
|
||||
return &PodVolumeClaims{}, nil
|
||||
}
|
||||
|
||||
// FindPodVolumes implements SchedulerVolumeBinder.FindPodVolumes.
|
||||
func (b *FakeVolumeBinder) FindPodVolumes(_ klog.Logger, pod *v1.Pod, _ *PodVolumeClaims, node *v1.Node) (podVolumes *PodVolumes, reasons ConflictReasons, err error) {
|
||||
return nil, b.config.FindReasons, b.config.FindErr
|
||||
}
|
||||
|
||||
// AssumePodVolumes implements SchedulerVolumeBinder.AssumePodVolumes.
|
||||
func (b *FakeVolumeBinder) AssumePodVolumes(_ klog.Logger, assumedPod *v1.Pod, nodeName string, podVolumes *PodVolumes) (bool, error) {
|
||||
b.AssumeCalled = true
|
||||
return b.config.AllBound, b.config.AssumeErr
|
||||
}
|
||||
|
||||
// RevertAssumedPodVolumes implements SchedulerVolumeBinder.RevertAssumedPodVolumes
|
||||
func (b *FakeVolumeBinder) RevertAssumedPodVolumes(_ *PodVolumes) {}
|
||||
|
||||
// BindPodVolumes implements SchedulerVolumeBinder.BindPodVolumes.
|
||||
func (b *FakeVolumeBinder) BindPodVolumes(ctx context.Context, assumedPod *v1.Pod, podVolumes *PodVolumes) error {
|
||||
b.BindCalled = true
|
||||
return b.config.BindErr
|
||||
}
|
55
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/metrics/metrics.go
generated
vendored
Normal file
55
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/metrics/metrics.go
generated
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
)
|
||||
|
||||
// VolumeSchedulerSubsystem - subsystem name used by scheduler
|
||||
const VolumeSchedulerSubsystem = "scheduler_volume"
|
||||
|
||||
var (
|
||||
// VolumeBindingRequestSchedulerBinderCache tracks the number of volume binder cache operations.
|
||||
VolumeBindingRequestSchedulerBinderCache = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: VolumeSchedulerSubsystem,
|
||||
Name: "binder_cache_requests_total",
|
||||
Help: "Total number for request volume binding cache",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"operation"},
|
||||
)
|
||||
// VolumeSchedulingStageFailed tracks the number of failed volume scheduling operations.
|
||||
VolumeSchedulingStageFailed = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: VolumeSchedulerSubsystem,
|
||||
Name: "scheduling_stage_error_total",
|
||||
Help: "Volume scheduling stage error count",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"operation"},
|
||||
)
|
||||
)
|
||||
|
||||
// RegisterVolumeSchedulingMetrics is used for scheduler, because the volume binding cache is a library
|
||||
// used by scheduler process.
|
||||
func RegisterVolumeSchedulingMetrics() {
|
||||
legacyregistry.MustRegister(VolumeBindingRequestSchedulerBinderCache)
|
||||
legacyregistry.MustRegister(VolumeSchedulingStageFailed)
|
||||
}
|
54
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/scorer.go
generated
vendored
Normal file
54
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/scorer.go
generated
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package volumebinding
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
|
||||
)
|
||||
|
||||
// classResourceMap holds a map of storage class to resource.
|
||||
type classResourceMap map[string]*StorageResource
|
||||
|
||||
// volumeCapacityScorer calculates the score based on class storage resource information.
|
||||
type volumeCapacityScorer func(classResourceMap) int64
|
||||
|
||||
// buildScorerFunction builds volumeCapacityScorer from the scoring function shape.
|
||||
func buildScorerFunction(scoringFunctionShape helper.FunctionShape) volumeCapacityScorer {
|
||||
rawScoringFunction := helper.BuildBrokenLinearFunction(scoringFunctionShape)
|
||||
f := func(requested, capacity int64) int64 {
|
||||
if capacity == 0 || requested > capacity {
|
||||
return rawScoringFunction(maxUtilization)
|
||||
}
|
||||
|
||||
return rawScoringFunction(requested * maxUtilization / capacity)
|
||||
}
|
||||
return func(classResources classResourceMap) int64 {
|
||||
var nodeScore int64
|
||||
// in alpha stage, all classes have the same weight
|
||||
weightSum := len(classResources)
|
||||
if weightSum == 0 {
|
||||
return 0
|
||||
}
|
||||
for _, resource := range classResources {
|
||||
classScore := f(resource.Requested, resource.Capacity)
|
||||
nodeScore += classScore
|
||||
}
|
||||
return int64(math.Round(float64(nodeScore) / float64(weightSum)))
|
||||
}
|
||||
}
|
217
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/test_utils.go
generated
vendored
Normal file
217
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/test_utils.go
generated
vendored
Normal file
@ -0,0 +1,217 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package volumebinding
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/component-helpers/storage/volume"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
type nodeBuilder struct {
|
||||
*v1.Node
|
||||
}
|
||||
|
||||
func makeNode(name string) nodeBuilder {
|
||||
return nodeBuilder{Node: &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Labels: map[string]string{
|
||||
v1.LabelHostname: name,
|
||||
},
|
||||
},
|
||||
}}
|
||||
}
|
||||
|
||||
func (nb nodeBuilder) withLabel(key, value string) nodeBuilder {
|
||||
if nb.Node.ObjectMeta.Labels == nil {
|
||||
nb.Node.ObjectMeta.Labels = map[string]string{}
|
||||
}
|
||||
nb.Node.ObjectMeta.Labels[key] = value
|
||||
return nb
|
||||
}
|
||||
|
||||
type pvBuilder struct {
|
||||
*v1.PersistentVolume
|
||||
}
|
||||
|
||||
func makePV(name, className string) pvBuilder {
|
||||
return pvBuilder{PersistentVolume: &v1.PersistentVolume{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
},
|
||||
Spec: v1.PersistentVolumeSpec{
|
||||
StorageClassName: className,
|
||||
},
|
||||
}}
|
||||
}
|
||||
|
||||
func (pvb pvBuilder) withNodeAffinity(keyValues map[string][]string) pvBuilder {
|
||||
matchExpressions := make([]v1.NodeSelectorRequirement, 0)
|
||||
for key, values := range keyValues {
|
||||
matchExpressions = append(matchExpressions, v1.NodeSelectorRequirement{
|
||||
Key: key,
|
||||
Operator: v1.NodeSelectorOpIn,
|
||||
Values: values,
|
||||
})
|
||||
}
|
||||
pvb.PersistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
|
||||
Required: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: matchExpressions,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
return pvb
|
||||
}
|
||||
|
||||
func (pvb pvBuilder) withVersion(version string) pvBuilder {
|
||||
pvb.PersistentVolume.ObjectMeta.ResourceVersion = version
|
||||
return pvb
|
||||
}
|
||||
|
||||
func (pvb pvBuilder) withCapacity(capacity resource.Quantity) pvBuilder {
|
||||
pvb.PersistentVolume.Spec.Capacity = v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceStorage): capacity,
|
||||
}
|
||||
return pvb
|
||||
}
|
||||
|
||||
func (pvb pvBuilder) withPhase(phase v1.PersistentVolumePhase) pvBuilder {
|
||||
pvb.PersistentVolume.Status = v1.PersistentVolumeStatus{
|
||||
Phase: phase,
|
||||
}
|
||||
return pvb
|
||||
}
|
||||
|
||||
type pvcBuilder struct {
|
||||
*v1.PersistentVolumeClaim
|
||||
}
|
||||
|
||||
func makePVC(name string, storageClassName string) pvcBuilder {
|
||||
return pvcBuilder{PersistentVolumeClaim: &v1.PersistentVolumeClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: v1.NamespaceDefault,
|
||||
},
|
||||
Spec: v1.PersistentVolumeClaimSpec{
|
||||
StorageClassName: ptr.To(storageClassName),
|
||||
},
|
||||
}}
|
||||
}
|
||||
|
||||
func (pvcb pvcBuilder) withBoundPV(pvName string) pvcBuilder {
|
||||
pvcb.PersistentVolumeClaim.Spec.VolumeName = pvName
|
||||
metav1.SetMetaDataAnnotation(&pvcb.PersistentVolumeClaim.ObjectMeta, volume.AnnBindCompleted, "true")
|
||||
return pvcb
|
||||
}
|
||||
|
||||
func (pvcb pvcBuilder) withRequestStorage(request resource.Quantity) pvcBuilder {
|
||||
pvcb.PersistentVolumeClaim.Spec.Resources = v1.VolumeResourceRequirements{
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceStorage): request,
|
||||
},
|
||||
}
|
||||
return pvcb
|
||||
}
|
||||
|
||||
func (pvcb pvcBuilder) withPhase(phase v1.PersistentVolumeClaimPhase) pvcBuilder {
|
||||
pvcb.PersistentVolumeClaim.Status = v1.PersistentVolumeClaimStatus{
|
||||
Phase: phase,
|
||||
}
|
||||
return pvcb
|
||||
}
|
||||
|
||||
type podBuilder struct {
|
||||
*v1.Pod
|
||||
}
|
||||
|
||||
func makePod(name string) podBuilder {
|
||||
pb := podBuilder{Pod: &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: v1.NamespaceDefault,
|
||||
},
|
||||
}}
|
||||
pb.Pod.Spec.Volumes = make([]v1.Volume, 0)
|
||||
return pb
|
||||
}
|
||||
|
||||
func (pb podBuilder) withNodeName(name string) podBuilder {
|
||||
pb.Pod.Spec.NodeName = name
|
||||
return pb
|
||||
}
|
||||
|
||||
func (pb podBuilder) withNamespace(name string) podBuilder {
|
||||
pb.Pod.ObjectMeta.Namespace = name
|
||||
return pb
|
||||
}
|
||||
|
||||
func (pb podBuilder) withPVCVolume(pvcName, name string) podBuilder {
|
||||
pb.Pod.Spec.Volumes = append(pb.Pod.Spec.Volumes, v1.Volume{
|
||||
Name: name,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: pvcName,
|
||||
},
|
||||
},
|
||||
})
|
||||
return pb
|
||||
}
|
||||
|
||||
func (pb podBuilder) withPVCSVolume(pvcs []*v1.PersistentVolumeClaim) podBuilder {
|
||||
for i, pvc := range pvcs {
|
||||
pb.withPVCVolume(pvc.Name, fmt.Sprintf("vol%v", i))
|
||||
}
|
||||
return pb
|
||||
}
|
||||
|
||||
func (pb podBuilder) withEmptyDirVolume() podBuilder {
|
||||
pb.Pod.Spec.Volumes = append(pb.Pod.Spec.Volumes, v1.Volume{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{},
|
||||
},
|
||||
})
|
||||
return pb
|
||||
}
|
||||
|
||||
func (pb podBuilder) withGenericEphemeralVolume(name string) podBuilder {
|
||||
pb.Pod.Spec.Volumes = append(pb.Pod.Spec.Volumes, v1.Volume{
|
||||
Name: name,
|
||||
VolumeSource: v1.VolumeSource{
|
||||
Ephemeral: &v1.EphemeralVolumeSource{},
|
||||
},
|
||||
})
|
||||
return pb
|
||||
}
|
||||
|
||||
func (pb podBuilder) withCSI(driver string) podBuilder {
|
||||
pb.Pod.Spec.Volumes = append(pb.Pod.Spec.Volumes, v1.Volume{
|
||||
VolumeSource: v1.VolumeSource{
|
||||
CSI: &v1.CSIVolumeSource{
|
||||
Driver: driver,
|
||||
},
|
||||
},
|
||||
})
|
||||
return pb
|
||||
}
|
602
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/volume_binding.go
generated
vendored
Normal file
602
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/volume_binding.go
generated
vendored
Normal file
@ -0,0 +1,602 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package volumebinding
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
storagev1 "k8s.io/api/storage/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/component-helpers/storage/ephemeral"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
||||
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
const (
|
||||
stateKey framework.StateKey = Name
|
||||
|
||||
maxUtilization = 100
|
||||
)
|
||||
|
||||
// the state is initialized in PreFilter phase. because we save the pointer in
|
||||
// framework.CycleState, in the later phases we don't need to call Write method
|
||||
// to update the value
|
||||
type stateData struct {
|
||||
allBound bool
|
||||
// podVolumesByNode holds the pod's volume information found in the Filter
|
||||
// phase for each node
|
||||
// it's initialized in the PreFilter phase
|
||||
podVolumesByNode map[string]*PodVolumes
|
||||
podVolumeClaims *PodVolumeClaims
|
||||
// hasStaticBindings declares whether the pod contains one or more StaticBinding.
|
||||
// If not, vloumeBinding will skip score extension point.
|
||||
hasStaticBindings bool
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
func (d *stateData) Clone() framework.StateData {
|
||||
return d
|
||||
}
|
||||
|
||||
// VolumeBinding is a plugin that binds pod volumes in scheduling.
|
||||
// In the Filter phase, pod binding cache is created for the pod and used in
|
||||
// Reserve and PreBind phases.
|
||||
type VolumeBinding struct {
|
||||
Binder SchedulerVolumeBinder
|
||||
PVCLister corelisters.PersistentVolumeClaimLister
|
||||
scorer volumeCapacityScorer
|
||||
fts feature.Features
|
||||
}
|
||||
|
||||
var _ framework.PreFilterPlugin = &VolumeBinding{}
|
||||
var _ framework.FilterPlugin = &VolumeBinding{}
|
||||
var _ framework.ReservePlugin = &VolumeBinding{}
|
||||
var _ framework.PreBindPlugin = &VolumeBinding{}
|
||||
var _ framework.PreScorePlugin = &VolumeBinding{}
|
||||
var _ framework.ScorePlugin = &VolumeBinding{}
|
||||
var _ framework.EnqueueExtensions = &VolumeBinding{}
|
||||
|
||||
// Name is the name of the plugin used in Registry and configurations.
|
||||
const Name = names.VolumeBinding
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *VolumeBinding) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *VolumeBinding) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
// Pods may fail to find available PVs because the node labels do not
|
||||
// match the storage class's allowed topologies or PV's node affinity.
|
||||
// A new or updated node may make pods schedulable.
|
||||
//
|
||||
// A note about UpdateNodeTaint event:
|
||||
// Ideally, it's supposed to register only Add | UpdateNodeLabel because UpdateNodeTaint will never change the result from this plugin.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
nodeActionType := framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint
|
||||
if pl.fts.EnableSchedulingQueueHint {
|
||||
// When scheduling queue hint is enabled, we don't use the problematic preCheck and don't need to register UpdateNodeTaint event.
|
||||
nodeActionType = framework.Add | framework.UpdateNodeLabel
|
||||
}
|
||||
events := []framework.ClusterEventWithHint{
|
||||
// Pods may fail because of missing or mis-configured storage class
|
||||
// (e.g., allowedTopologies, volumeBindingMode), and hence may become
|
||||
// schedulable upon StorageClass Add or Update events.
|
||||
{Event: framework.ClusterEvent{Resource: framework.StorageClass, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterStorageClassChange},
|
||||
|
||||
// We bind PVCs with PVs, so any changes may make the pods schedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.PersistentVolumeClaim, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterPersistentVolumeClaimChange},
|
||||
{Event: framework.ClusterEvent{Resource: framework.PersistentVolume, ActionType: framework.Add | framework.Update}},
|
||||
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}},
|
||||
|
||||
// We rely on CSI node to translate in-tree PV to CSI.
|
||||
// TODO: kube-schduler will unregister the CSINode events once all the volume plugins has completed their CSI migration.
|
||||
{Event: framework.ClusterEvent{Resource: framework.CSINode, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterCSINodeChange},
|
||||
|
||||
// When CSIStorageCapacity is enabled, pods may become schedulable
|
||||
// on CSI driver & storage capacity changes.
|
||||
{Event: framework.ClusterEvent{Resource: framework.CSIDriver, ActionType: framework.Update}, QueueingHintFn: pl.isSchedulableAfterCSIDriverChange},
|
||||
{Event: framework.ClusterEvent{Resource: framework.CSIStorageCapacity, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterCSIStorageCapacityChange},
|
||||
}
|
||||
return events, nil
|
||||
}
|
||||
|
||||
func (pl *VolumeBinding) isSchedulableAfterCSINodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
if oldObj == nil {
|
||||
logger.V(5).Info("CSINode creation could make the pod schedulable")
|
||||
return framework.Queue, nil
|
||||
}
|
||||
oldCSINode, modifiedCSINode, err := util.As[*storagev1.CSINode](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
logger = klog.LoggerWithValues(
|
||||
logger,
|
||||
"Pod", klog.KObj(pod),
|
||||
"CSINode", klog.KObj(modifiedCSINode),
|
||||
)
|
||||
|
||||
if oldCSINode.ObjectMeta.Annotations[v1.MigratedPluginsAnnotationKey] != modifiedCSINode.ObjectMeta.Annotations[v1.MigratedPluginsAnnotationKey] {
|
||||
logger.V(5).Info("CSINode's migrated plugins annotation is updated and that may make the pod schedulable")
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("CISNode was created or updated but it doesn't make this pod schedulable")
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
func (pl *VolumeBinding) isSchedulableAfterPersistentVolumeClaimChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, newPVC, err := util.As[*v1.PersistentVolumeClaim](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
logger = klog.LoggerWithValues(
|
||||
logger,
|
||||
"Pod", klog.KObj(pod),
|
||||
"PersistentVolumeClaim", klog.KObj(newPVC),
|
||||
)
|
||||
|
||||
if pod.Namespace != newPVC.Namespace {
|
||||
logger.V(5).Info("PersistentVolumeClaim was created or updated, but it doesn't make this pod schedulable because the PVC belongs to a different namespace")
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
for _, vol := range pod.Spec.Volumes {
|
||||
var pvcName string
|
||||
switch {
|
||||
case vol.PersistentVolumeClaim != nil:
|
||||
pvcName = vol.PersistentVolumeClaim.ClaimName
|
||||
case vol.Ephemeral != nil:
|
||||
pvcName = ephemeral.VolumeClaimName(pod, &vol)
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
if pvcName == newPVC.Name {
|
||||
// Return Queue because, in this case,
|
||||
// all PVC creations and almost all PVC updates could make the Pod schedulable.
|
||||
logger.V(5).Info("PersistentVolumeClaim the pod requires was created or updated, potentially making the target Pod schedulable")
|
||||
return framework.Queue, nil
|
||||
}
|
||||
}
|
||||
|
||||
logger.V(5).Info("PersistentVolumeClaim was created or updated, but it doesn't make this pod schedulable")
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterStorageClassChange checks whether an StorageClass event might make a Pod schedulable or not.
|
||||
// Any StorageClass addition and a StorageClass update to allowedTopologies
|
||||
// might make a Pod schedulable.
|
||||
// Note that an update to volume binding mode is not allowed and we don't have to consider while examining the update event.
|
||||
func (pl *VolumeBinding) isSchedulableAfterStorageClassChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
oldSC, newSC, err := util.As[*storagev1.StorageClass](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
logger = klog.LoggerWithValues(
|
||||
logger,
|
||||
"Pod", klog.KObj(pod),
|
||||
"StorageClass", klog.KObj(newSC),
|
||||
)
|
||||
|
||||
if oldSC == nil {
|
||||
// No further filtering can be made for a creation event,
|
||||
// and we just always return Queue.
|
||||
logger.V(5).Info("A new StorageClass was created, which could make a Pod schedulable")
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
if !apiequality.Semantic.DeepEqual(newSC.AllowedTopologies, oldSC.AllowedTopologies) {
|
||||
logger.V(5).Info("StorageClass got an update in AllowedTopologies", "AllowedTopologies", newSC.AllowedTopologies)
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("StorageClass was updated, but it doesn't make this pod schedulable")
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterCSIStorageCapacityChange checks whether a CSIStorageCapacity event
|
||||
// might make a Pod schedulable or not.
|
||||
// Any CSIStorageCapacity addition and a CSIStorageCapacity update to volume limit
|
||||
// (calculated based on capacity and maximumVolumeSize) might make a Pod schedulable.
|
||||
// Note that an update to nodeTopology and storageClassName is not allowed and
|
||||
// we don't have to consider while examining the update event.
|
||||
func (pl *VolumeBinding) isSchedulableAfterCSIStorageCapacityChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
oldCap, newCap, err := util.As[*storagev1.CSIStorageCapacity](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
if oldCap == nil {
|
||||
logger.V(5).Info(
|
||||
"A new CSIStorageCapacity was created, which could make a Pod schedulable",
|
||||
"Pod", klog.KObj(pod),
|
||||
"CSIStorageCapacity", klog.KObj(newCap),
|
||||
)
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
oldLimit := volumeLimit(oldCap)
|
||||
newLimit := volumeLimit(newCap)
|
||||
|
||||
logger = klog.LoggerWithValues(
|
||||
logger,
|
||||
"Pod", klog.KObj(pod),
|
||||
"CSIStorageCapacity", klog.KObj(newCap),
|
||||
"volumeLimit(new)", newLimit,
|
||||
"volumeLimit(old)", oldLimit,
|
||||
)
|
||||
|
||||
if newLimit != nil && (oldLimit == nil || newLimit.Value() > oldLimit.Value()) {
|
||||
logger.V(5).Info("VolumeLimit was increased, which could make a Pod schedulable")
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("CSIStorageCapacity was updated, but it doesn't make this pod schedulable")
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
func (pl *VolumeBinding) isSchedulableAfterCSIDriverChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalCSIDriver, modifiedCSIDriver, err := util.As[*storagev1.CSIDriver](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, err
|
||||
}
|
||||
|
||||
logger = klog.LoggerWithValues(
|
||||
logger,
|
||||
"Pod", klog.KObj(pod),
|
||||
"CSIDriver", klog.KObj(modifiedCSIDriver),
|
||||
)
|
||||
|
||||
for _, vol := range pod.Spec.Volumes {
|
||||
if vol.CSI == nil || vol.CSI.Driver != modifiedCSIDriver.Name {
|
||||
continue
|
||||
}
|
||||
if (originalCSIDriver.Spec.StorageCapacity != nil && *originalCSIDriver.Spec.StorageCapacity) &&
|
||||
(modifiedCSIDriver.Spec.StorageCapacity == nil || !*modifiedCSIDriver.Spec.StorageCapacity) {
|
||||
logger.V(5).Info("CSIDriver was updated and storage capacity got disabled, which may make the pod schedulable")
|
||||
return framework.Queue, nil
|
||||
}
|
||||
}
|
||||
|
||||
logger.V(5).Info("CSIDriver was created or updated but it doesn't make this pod schedulable")
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// podHasPVCs returns 2 values:
|
||||
// - the first one to denote if the given "pod" has any PVC defined.
|
||||
// - the second one to return any error if the requested PVC is illegal.
|
||||
func (pl *VolumeBinding) podHasPVCs(pod *v1.Pod) (bool, error) {
|
||||
hasPVC := false
|
||||
for _, vol := range pod.Spec.Volumes {
|
||||
var pvcName string
|
||||
isEphemeral := false
|
||||
switch {
|
||||
case vol.PersistentVolumeClaim != nil:
|
||||
pvcName = vol.PersistentVolumeClaim.ClaimName
|
||||
case vol.Ephemeral != nil:
|
||||
pvcName = ephemeral.VolumeClaimName(pod, &vol)
|
||||
isEphemeral = true
|
||||
default:
|
||||
// Volume is not using a PVC, ignore
|
||||
continue
|
||||
}
|
||||
hasPVC = true
|
||||
pvc, err := pl.PVCLister.PersistentVolumeClaims(pod.Namespace).Get(pvcName)
|
||||
if err != nil {
|
||||
// The error usually has already enough context ("persistentvolumeclaim "myclaim" not found"),
|
||||
// but we can do better for generic ephemeral inline volumes where that situation
|
||||
// is normal directly after creating a pod.
|
||||
if isEphemeral && apierrors.IsNotFound(err) {
|
||||
err = fmt.Errorf("waiting for ephemeral volume controller to create the persistentvolumeclaim %q", pvcName)
|
||||
}
|
||||
return hasPVC, err
|
||||
}
|
||||
|
||||
if pvc.Status.Phase == v1.ClaimLost {
|
||||
return hasPVC, fmt.Errorf("persistentvolumeclaim %q bound to non-existent persistentvolume %q", pvc.Name, pvc.Spec.VolumeName)
|
||||
}
|
||||
|
||||
if pvc.DeletionTimestamp != nil {
|
||||
return hasPVC, fmt.Errorf("persistentvolumeclaim %q is being deleted", pvc.Name)
|
||||
}
|
||||
|
||||
if isEphemeral {
|
||||
if err := ephemeral.VolumeIsForPod(pod, pvc); err != nil {
|
||||
return hasPVC, err
|
||||
}
|
||||
}
|
||||
}
|
||||
return hasPVC, nil
|
||||
}
|
||||
|
||||
// PreFilter invoked at the prefilter extension point to check if pod has all
|
||||
// immediate PVCs bound. If not all immediate PVCs are bound, an
|
||||
// UnschedulableAndUnresolvable is returned.
|
||||
func (pl *VolumeBinding) PreFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
logger := klog.FromContext(ctx)
|
||||
// If pod does not reference any PVC, we don't need to do anything.
|
||||
if hasPVC, err := pl.podHasPVCs(pod); err != nil {
|
||||
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
|
||||
} else if !hasPVC {
|
||||
state.Write(stateKey, &stateData{})
|
||||
return nil, framework.NewStatus(framework.Skip)
|
||||
}
|
||||
podVolumeClaims, err := pl.Binder.GetPodVolumeClaims(logger, pod)
|
||||
if err != nil {
|
||||
return nil, framework.AsStatus(err)
|
||||
}
|
||||
if len(podVolumeClaims.unboundClaimsImmediate) > 0 {
|
||||
// Return UnschedulableAndUnresolvable error if immediate claims are
|
||||
// not bound. Pod will be moved to active/backoff queues once these
|
||||
// claims are bound by PV controller.
|
||||
status := framework.NewStatus(framework.UnschedulableAndUnresolvable)
|
||||
status.AppendReason("pod has unbound immediate PersistentVolumeClaims")
|
||||
return nil, status
|
||||
}
|
||||
state.Write(stateKey, &stateData{
|
||||
podVolumesByNode: make(map[string]*PodVolumes),
|
||||
podVolumeClaims: &PodVolumeClaims{
|
||||
boundClaims: podVolumeClaims.boundClaims,
|
||||
unboundClaimsDelayBinding: podVolumeClaims.unboundClaimsDelayBinding,
|
||||
unboundVolumesDelayBinding: podVolumeClaims.unboundVolumesDelayBinding,
|
||||
},
|
||||
})
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// PreFilterExtensions returns prefilter extensions, pod add and remove.
|
||||
func (pl *VolumeBinding) PreFilterExtensions() framework.PreFilterExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getStateData(cs *framework.CycleState) (*stateData, error) {
|
||||
state, err := cs.Read(stateKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s, ok := state.(*stateData)
|
||||
if !ok {
|
||||
return nil, errors.New("unable to convert state into stateData")
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
// It evaluates if a pod can fit due to the volumes it requests,
|
||||
// for both bound and unbound PVCs.
|
||||
//
|
||||
// For PVCs that are bound, then it checks that the corresponding PV's node affinity is
|
||||
// satisfied by the given node.
|
||||
//
|
||||
// For PVCs that are unbound, it tries to find available PVs that can satisfy the PVC requirements
|
||||
// and that the PV node affinity is satisfied by the given node.
|
||||
//
|
||||
// If storage capacity tracking is enabled, then enough space has to be available
|
||||
// for the node and volumes that still need to be created.
|
||||
//
|
||||
// The predicate returns true if all bound PVCs have compatible PVs with the node, and if all unbound
|
||||
// PVCs can be matched with an available and node-compatible PV.
|
||||
func (pl *VolumeBinding) Filter(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
logger := klog.FromContext(ctx)
|
||||
node := nodeInfo.Node()
|
||||
|
||||
state, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
podVolumes, reasons, err := pl.Binder.FindPodVolumes(logger, pod, state.podVolumeClaims, node)
|
||||
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
if len(reasons) > 0 {
|
||||
status := framework.NewStatus(framework.UnschedulableAndUnresolvable)
|
||||
for _, reason := range reasons {
|
||||
status.AppendReason(string(reason))
|
||||
}
|
||||
return status
|
||||
}
|
||||
|
||||
// multiple goroutines call `Filter` on different nodes simultaneously and the `CycleState` may be duplicated, so we must use a local lock here
|
||||
state.Lock()
|
||||
state.podVolumesByNode[node.Name] = podVolumes
|
||||
state.hasStaticBindings = state.hasStaticBindings || (podVolumes != nil && len(podVolumes.StaticBindings) > 0)
|
||||
state.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// PreScore invoked at the preScore extension point. It checks whether volumeBinding can skip Score
|
||||
func (pl *VolumeBinding) PreScore(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodes []*framework.NodeInfo) *framework.Status {
|
||||
if pl.scorer == nil {
|
||||
return framework.NewStatus(framework.Skip)
|
||||
}
|
||||
state, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
if state.hasStaticBindings {
|
||||
return nil
|
||||
}
|
||||
return framework.NewStatus(framework.Skip)
|
||||
}
|
||||
|
||||
// Score invoked at the score extension point.
|
||||
func (pl *VolumeBinding) Score(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
|
||||
if pl.scorer == nil {
|
||||
return 0, nil
|
||||
}
|
||||
state, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return 0, framework.AsStatus(err)
|
||||
}
|
||||
podVolumes, ok := state.podVolumesByNode[nodeName]
|
||||
if !ok {
|
||||
return 0, nil
|
||||
}
|
||||
// group by storage class
|
||||
classResources := make(classResourceMap)
|
||||
for _, staticBinding := range podVolumes.StaticBindings {
|
||||
class := staticBinding.StorageClassName()
|
||||
storageResource := staticBinding.StorageResource()
|
||||
if _, ok := classResources[class]; !ok {
|
||||
classResources[class] = &StorageResource{
|
||||
Requested: 0,
|
||||
Capacity: 0,
|
||||
}
|
||||
}
|
||||
classResources[class].Requested += storageResource.Requested
|
||||
classResources[class].Capacity += storageResource.Capacity
|
||||
}
|
||||
return pl.scorer(classResources), nil
|
||||
}
|
||||
|
||||
// ScoreExtensions of the Score plugin.
|
||||
func (pl *VolumeBinding) ScoreExtensions() framework.ScoreExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Reserve reserves volumes of pod and saves binding status in cycle state.
|
||||
func (pl *VolumeBinding) Reserve(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status {
|
||||
state, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
// we don't need to hold the lock as only one node will be reserved for the given pod
|
||||
podVolumes, ok := state.podVolumesByNode[nodeName]
|
||||
if ok {
|
||||
allBound, err := pl.Binder.AssumePodVolumes(klog.FromContext(ctx), pod, nodeName, podVolumes)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
state.allBound = allBound
|
||||
} else {
|
||||
// may not exist if the pod does not reference any PVC
|
||||
state.allBound = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// PreBind will make the API update with the assumed bindings and wait until
|
||||
// the PV controller has completely finished the binding operation.
|
||||
//
|
||||
// If binding errors, times out or gets undone, then an error will be returned to
|
||||
// retry scheduling.
|
||||
func (pl *VolumeBinding) PreBind(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status {
|
||||
s, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
if s.allBound {
|
||||
// no need to bind volumes
|
||||
return nil
|
||||
}
|
||||
// we don't need to hold the lock as only one node will be pre-bound for the given pod
|
||||
podVolumes, ok := s.podVolumesByNode[nodeName]
|
||||
if !ok {
|
||||
return framework.AsStatus(fmt.Errorf("no pod volumes found for node %q", nodeName))
|
||||
}
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(5).Info("Trying to bind volumes for pod", "pod", klog.KObj(pod))
|
||||
err = pl.Binder.BindPodVolumes(ctx, pod, podVolumes)
|
||||
if err != nil {
|
||||
logger.V(5).Info("Failed to bind volumes for pod", "pod", klog.KObj(pod), "err", err)
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
logger.V(5).Info("Success binding volumes for pod", "pod", klog.KObj(pod))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unreserve clears assumed PV and PVC cache.
|
||||
// It's idempotent, and does nothing if no cache found for the given pod.
|
||||
func (pl *VolumeBinding) Unreserve(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodeName string) {
|
||||
s, err := getStateData(cs)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// we don't need to hold the lock as only one node may be unreserved
|
||||
podVolumes, ok := s.podVolumesByNode[nodeName]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
pl.Binder.RevertAssumedPodVolumes(podVolumes)
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(ctx context.Context, plArgs runtime.Object, fh framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
args, ok := plArgs.(*config.VolumeBindingArgs)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("want args to be of type VolumeBindingArgs, got %T", plArgs)
|
||||
}
|
||||
if err := validation.ValidateVolumeBindingArgsWithOptions(nil, args, validation.VolumeBindingArgsValidationOptions{
|
||||
AllowVolumeCapacityPriority: fts.EnableVolumeCapacityPriority,
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
podInformer := fh.SharedInformerFactory().Core().V1().Pods()
|
||||
nodeInformer := fh.SharedInformerFactory().Core().V1().Nodes()
|
||||
pvcInformer := fh.SharedInformerFactory().Core().V1().PersistentVolumeClaims()
|
||||
pvInformer := fh.SharedInformerFactory().Core().V1().PersistentVolumes()
|
||||
storageClassInformer := fh.SharedInformerFactory().Storage().V1().StorageClasses()
|
||||
csiNodeInformer := fh.SharedInformerFactory().Storage().V1().CSINodes()
|
||||
capacityCheck := CapacityCheck{
|
||||
CSIDriverInformer: fh.SharedInformerFactory().Storage().V1().CSIDrivers(),
|
||||
CSIStorageCapacityInformer: fh.SharedInformerFactory().Storage().V1().CSIStorageCapacities(),
|
||||
}
|
||||
binder := NewVolumeBinder(klog.FromContext(ctx), fh.ClientSet(), podInformer, nodeInformer, csiNodeInformer, pvcInformer, pvInformer, storageClassInformer, capacityCheck, time.Duration(args.BindTimeoutSeconds)*time.Second)
|
||||
|
||||
// build score function
|
||||
var scorer volumeCapacityScorer
|
||||
if fts.EnableVolumeCapacityPriority {
|
||||
shape := make(helper.FunctionShape, 0, len(args.Shape))
|
||||
for _, point := range args.Shape {
|
||||
shape = append(shape, helper.FunctionShapePoint{
|
||||
Utilization: int64(point.Utilization),
|
||||
Score: int64(point.Score) * (framework.MaxNodeScore / config.MaxCustomPriorityScore),
|
||||
})
|
||||
}
|
||||
scorer = buildScorerFunction(shape)
|
||||
}
|
||||
return &VolumeBinding{
|
||||
Binder: binder,
|
||||
PVCLister: pvcInformer.Lister(),
|
||||
scorer: scorer,
|
||||
fts: fts,
|
||||
}, nil
|
||||
}
|
10
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumerestrictions/OWNERS
generated
vendored
Normal file
10
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumerestrictions/OWNERS
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-storage-approvers
|
||||
- cofyc
|
||||
reviewers:
|
||||
- sig-storage-reviewers
|
||||
- cofyc
|
||||
labels:
|
||||
- sig/storage
|
426
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumerestrictions/volume_restrictions.go
generated
vendored
Normal file
426
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumerestrictions/volume_restrictions.go
generated
vendored
Normal file
@ -0,0 +1,426 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package volumerestrictions
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// VolumeRestrictions is a plugin that checks volume restrictions.
|
||||
type VolumeRestrictions struct {
|
||||
pvcLister corelisters.PersistentVolumeClaimLister
|
||||
sharedLister framework.SharedLister
|
||||
enableSchedulingQueueHint bool
|
||||
}
|
||||
|
||||
var _ framework.PreFilterPlugin = &VolumeRestrictions{}
|
||||
var _ framework.FilterPlugin = &VolumeRestrictions{}
|
||||
var _ framework.EnqueueExtensions = &VolumeRestrictions{}
|
||||
var _ framework.StateData = &preFilterState{}
|
||||
|
||||
const (
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
Name = names.VolumeRestrictions
|
||||
// preFilterStateKey is the key in CycleState to VolumeRestrictions pre-computed data for Filtering.
|
||||
// Using the name of the plugin will likely help us avoid collisions with other plugins.
|
||||
preFilterStateKey = "PreFilter" + Name
|
||||
|
||||
// ErrReasonDiskConflict is used for NoDiskConflict predicate error.
|
||||
ErrReasonDiskConflict = "node(s) had no available disk"
|
||||
// ErrReasonReadWriteOncePodConflict is used when a pod is found using the same PVC with the ReadWriteOncePod access mode.
|
||||
ErrReasonReadWriteOncePodConflict = "node has pod using PersistentVolumeClaim with the same name and ReadWriteOncePod access mode"
|
||||
)
|
||||
|
||||
// preFilterState computed at PreFilter and used at Filter.
|
||||
type preFilterState struct {
|
||||
// Names of the pod's volumes using the ReadWriteOncePod access mode.
|
||||
readWriteOncePodPVCs sets.Set[string]
|
||||
// The number of references to these ReadWriteOncePod volumes by scheduled pods.
|
||||
conflictingPVCRefCount int
|
||||
}
|
||||
|
||||
func (s *preFilterState) updateWithPod(podInfo *framework.PodInfo, multiplier int) {
|
||||
s.conflictingPVCRefCount += multiplier * s.conflictingPVCRefCountForPod(podInfo)
|
||||
}
|
||||
|
||||
func (s *preFilterState) conflictingPVCRefCountForPod(podInfo *framework.PodInfo) int {
|
||||
conflicts := 0
|
||||
for _, volume := range podInfo.Pod.Spec.Volumes {
|
||||
if volume.PersistentVolumeClaim == nil {
|
||||
continue
|
||||
}
|
||||
if s.readWriteOncePodPVCs.Has(volume.PersistentVolumeClaim.ClaimName) {
|
||||
conflicts += 1
|
||||
}
|
||||
}
|
||||
return conflicts
|
||||
}
|
||||
|
||||
// Clone the prefilter state.
|
||||
func (s *preFilterState) Clone() framework.StateData {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return &preFilterState{
|
||||
readWriteOncePodPVCs: s.readWriteOncePodPVCs,
|
||||
conflictingPVCRefCount: s.conflictingPVCRefCount,
|
||||
}
|
||||
}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *VolumeRestrictions) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
func isVolumeConflict(volume *v1.Volume, pod *v1.Pod) bool {
|
||||
for _, existingVolume := range pod.Spec.Volumes {
|
||||
// Same GCE disk mounted by multiple pods conflicts unless all pods mount it read-only.
|
||||
if volume.GCEPersistentDisk != nil && existingVolume.GCEPersistentDisk != nil {
|
||||
disk, existingDisk := volume.GCEPersistentDisk, existingVolume.GCEPersistentDisk
|
||||
if disk.PDName == existingDisk.PDName && !(disk.ReadOnly && existingDisk.ReadOnly) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if volume.AWSElasticBlockStore != nil && existingVolume.AWSElasticBlockStore != nil {
|
||||
if volume.AWSElasticBlockStore.VolumeID == existingVolume.AWSElasticBlockStore.VolumeID {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if volume.ISCSI != nil && existingVolume.ISCSI != nil {
|
||||
iqn := volume.ISCSI.IQN
|
||||
eiqn := existingVolume.ISCSI.IQN
|
||||
// two ISCSI volumes are same, if they share the same iqn. As iscsi volumes are of type
|
||||
// RWO or ROX, we could permit only one RW mount. Same iscsi volume mounted by multiple Pods
|
||||
// conflict unless all other pods mount as read only.
|
||||
if iqn == eiqn && !(volume.ISCSI.ReadOnly && existingVolume.ISCSI.ReadOnly) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if volume.RBD != nil && existingVolume.RBD != nil {
|
||||
mon, pool, image := volume.RBD.CephMonitors, volume.RBD.RBDPool, volume.RBD.RBDImage
|
||||
emon, epool, eimage := existingVolume.RBD.CephMonitors, existingVolume.RBD.RBDPool, existingVolume.RBD.RBDImage
|
||||
// two RBDs images are the same if they share the same Ceph monitor, are in the same RADOS Pool, and have the same image name
|
||||
// only one read-write mount is permitted for the same RBD image.
|
||||
// same RBD image mounted by multiple Pods conflicts unless all Pods mount the image read-only
|
||||
if haveOverlap(mon, emon) && pool == epool && image == eimage && !(volume.RBD.ReadOnly && existingVolume.RBD.ReadOnly) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// haveOverlap searches two arrays and returns true if they have at least one common element; returns false otherwise.
|
||||
func haveOverlap(a1, a2 []string) bool {
|
||||
if len(a1) > len(a2) {
|
||||
a1, a2 = a2, a1
|
||||
}
|
||||
m := sets.New(a1...)
|
||||
for _, val := range a2 {
|
||||
if _, ok := m[val]; ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// return true if there are conflict checking targets.
|
||||
func needsRestrictionsCheck(v v1.Volume) bool {
|
||||
return v.GCEPersistentDisk != nil || v.AWSElasticBlockStore != nil || v.RBD != nil || v.ISCSI != nil
|
||||
}
|
||||
|
||||
// PreFilter computes and stores cycleState containing details for enforcing ReadWriteOncePod.
|
||||
func (pl *VolumeRestrictions) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
needsCheck := false
|
||||
for i := range pod.Spec.Volumes {
|
||||
if needsRestrictionsCheck(pod.Spec.Volumes[i]) {
|
||||
needsCheck = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
pvcs, err := pl.readWriteOncePodPVCsForPod(ctx, pod)
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
|
||||
}
|
||||
return nil, framework.AsStatus(err)
|
||||
}
|
||||
|
||||
s, err := pl.calPreFilterState(ctx, pod, pvcs)
|
||||
if err != nil {
|
||||
return nil, framework.AsStatus(err)
|
||||
}
|
||||
|
||||
if !needsCheck && s.conflictingPVCRefCount == 0 {
|
||||
return nil, framework.NewStatus(framework.Skip)
|
||||
}
|
||||
cycleState.Write(preFilterStateKey, s)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// AddPod from pre-computed data in cycleState.
|
||||
func (pl *VolumeRestrictions) AddPod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podInfoToAdd *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
state, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
state.updateWithPod(podInfoToAdd, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemovePod from pre-computed data in cycleState.
|
||||
func (pl *VolumeRestrictions) RemovePod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podInfoToRemove *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
state, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
state.updateWithPod(podInfoToRemove, -1)
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error) {
|
||||
c, err := cycleState.Read(preFilterStateKey)
|
||||
if err != nil {
|
||||
// preFilterState doesn't exist, likely PreFilter wasn't invoked.
|
||||
return nil, fmt.Errorf("cannot read %q from cycleState", preFilterStateKey)
|
||||
}
|
||||
|
||||
s, ok := c.(*preFilterState)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%+v convert to volumerestrictions.state error", c)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// calPreFilterState computes preFilterState describing which PVCs use ReadWriteOncePod
|
||||
// and which pods in the cluster are in conflict.
|
||||
func (pl *VolumeRestrictions) calPreFilterState(ctx context.Context, pod *v1.Pod, pvcs sets.Set[string]) (*preFilterState, error) {
|
||||
conflictingPVCRefCount := 0
|
||||
for pvc := range pvcs {
|
||||
key := framework.GetNamespacedName(pod.Namespace, pvc)
|
||||
if pl.sharedLister.StorageInfos().IsPVCUsedByPods(key) {
|
||||
// There can only be at most one pod using the ReadWriteOncePod PVC.
|
||||
conflictingPVCRefCount += 1
|
||||
}
|
||||
}
|
||||
return &preFilterState{
|
||||
readWriteOncePodPVCs: pvcs,
|
||||
conflictingPVCRefCount: conflictingPVCRefCount,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (pl *VolumeRestrictions) readWriteOncePodPVCsForPod(ctx context.Context, pod *v1.Pod) (sets.Set[string], error) {
|
||||
pvcs := sets.New[string]()
|
||||
for _, volume := range pod.Spec.Volumes {
|
||||
if volume.PersistentVolumeClaim == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
pvc, err := pl.pvcLister.PersistentVolumeClaims(pod.Namespace).Get(volume.PersistentVolumeClaim.ClaimName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !v1helper.ContainsAccessMode(pvc.Spec.AccessModes, v1.ReadWriteOncePod) {
|
||||
continue
|
||||
}
|
||||
pvcs.Insert(pvc.Name)
|
||||
}
|
||||
return pvcs, nil
|
||||
}
|
||||
|
||||
// Checks if scheduling the pod onto this node would cause any conflicts with
|
||||
// existing volumes.
|
||||
func satisfyVolumeConflicts(pod *v1.Pod, nodeInfo *framework.NodeInfo) bool {
|
||||
for i := range pod.Spec.Volumes {
|
||||
v := pod.Spec.Volumes[i]
|
||||
if !needsRestrictionsCheck(v) {
|
||||
continue
|
||||
}
|
||||
for _, ev := range nodeInfo.Pods {
|
||||
if isVolumeConflict(&v, ev.Pod) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Checks if scheduling the pod would cause any ReadWriteOncePod PVC access mode conflicts.
|
||||
func satisfyReadWriteOncePod(ctx context.Context, state *preFilterState) *framework.Status {
|
||||
if state == nil {
|
||||
return nil
|
||||
}
|
||||
if state.conflictingPVCRefCount > 0 {
|
||||
return framework.NewStatus(framework.Unschedulable, ErrReasonReadWriteOncePodConflict)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// PreFilterExtensions returns prefilter extensions, pod add and remove.
|
||||
func (pl *VolumeRestrictions) PreFilterExtensions() framework.PreFilterExtensions {
|
||||
return pl
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
// It evaluates if a pod can fit due to the volumes it requests, and those that
|
||||
// are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume
|
||||
// can't be scheduled there.
|
||||
// This is GCE, Amazon EBS, ISCSI and Ceph RBD specific for now:
|
||||
// - GCE PD allows multiple mounts as long as they're all read-only
|
||||
// - AWS EBS forbids any two pods mounting the same volume ID
|
||||
// - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image, and the image is read-only
|
||||
// - ISCSI forbids if any two pods share at least same IQN and ISCSI volume is read-only
|
||||
// If the pod uses PVCs with the ReadWriteOncePod access mode, it evaluates if
|
||||
// these PVCs are already in-use and if preemption will help.
|
||||
func (pl *VolumeRestrictions) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
if !satisfyVolumeConflicts(pod, nodeInfo) {
|
||||
return framework.NewStatus(framework.Unschedulable, ErrReasonDiskConflict)
|
||||
}
|
||||
state, err := getPreFilterState(cycleState)
|
||||
if err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
return satisfyReadWriteOncePod(ctx, state)
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *VolumeRestrictions) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
// A note about UpdateNodeTaint/UpdateNodeLabel event:
|
||||
// Ideally, it's supposed to register only Add because any Node update event will never change the result from this plugin.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
nodeActionType := framework.Add | framework.UpdateNodeTaint | framework.UpdateNodeLabel
|
||||
if pl.enableSchedulingQueueHint {
|
||||
// preCheck is not used when QHint is enabled, and hence Update event isn't necessary.
|
||||
nodeActionType = framework.Add
|
||||
}
|
||||
|
||||
return []framework.ClusterEventWithHint{
|
||||
// Pods may fail to schedule because of volumes conflicting with other pods on same node.
|
||||
// Once running pods are deleted and volumes have been released, the unschedulable pod will be schedulable.
|
||||
// Due to immutable fields `spec.volumes`, pod update events are ignored.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Delete}, QueueingHintFn: pl.isSchedulableAfterPodDeleted},
|
||||
// A new Node may make a pod schedulable.
|
||||
// We intentionally don't set QueueingHint since all Node/Add events could make Pods schedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}},
|
||||
// Pods may fail to schedule because the PVC it uses has not yet been created.
|
||||
// This PVC is required to exist to check its access modes.
|
||||
{Event: framework.ClusterEvent{Resource: framework.PersistentVolumeClaim, ActionType: framework.Add},
|
||||
QueueingHintFn: pl.isSchedulableAfterPersistentVolumeClaimAdded},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPersistentVolumeClaimAdded is invoked whenever a PersistentVolumeClaim added or changed, It checks whether
|
||||
// that change made a previously unschedulable pod schedulable.
|
||||
func (pl *VolumeRestrictions) isSchedulableAfterPersistentVolumeClaimAdded(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, newPersistentVolumeClaim, err := util.As[*v1.PersistentVolumeClaim](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, fmt.Errorf("unexpected objects in isSchedulableAfterPersistentVolumeClaimChange: %w", err)
|
||||
}
|
||||
|
||||
if newPersistentVolumeClaim.Namespace != pod.Namespace {
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
for _, volume := range pod.Spec.Volumes {
|
||||
if volume.PersistentVolumeClaim == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if volume.PersistentVolumeClaim.ClaimName == newPersistentVolumeClaim.Name {
|
||||
logger.V(5).Info("PVC that is referred from the pod was created, which might make this pod schedulable", "pod", klog.KObj(pod), "PVC", klog.KObj(newPersistentVolumeClaim))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
}
|
||||
logger.V(5).Info("PVC irrelevant to the Pod was created, which doesn't make this pod schedulable", "pod", klog.KObj(pod), "PVC", klog.KObj(newPersistentVolumeClaim))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPodDeleted is invoked whenever a pod deleted,
|
||||
// It checks whether the deleted pod will conflict with volumes of other pods on the same node
|
||||
func (pl *VolumeRestrictions) isSchedulableAfterPodDeleted(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
deletedPod, _, err := util.As[*v1.Pod](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, fmt.Errorf("unexpected objects in isSchedulableAfterPodDeleted: %w", err)
|
||||
}
|
||||
|
||||
if deletedPod.Namespace != pod.Namespace {
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
nodeInfo := framework.NewNodeInfo(deletedPod)
|
||||
if !satisfyVolumeConflicts(pod, nodeInfo) {
|
||||
logger.V(5).Info("Pod with the volume that the target pod requires was deleted, which might make this pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// Return Queue if a deleted pod uses the same PVC since the pod may be unschedulable due to the ReadWriteOncePod access mode of the PVC.
|
||||
//
|
||||
// For now, we don't actually fetch PVC and check the access mode because that operation could be expensive.
|
||||
// Once the observability around QHint is established,
|
||||
// we may want to do that depending on how much the operation would impact the QHint latency negatively.
|
||||
// https://github.com/kubernetes/kubernetes/issues/124566
|
||||
claims := sets.New[string]()
|
||||
for _, volume := range pod.Spec.Volumes {
|
||||
if volume.PersistentVolumeClaim != nil {
|
||||
claims.Insert(volume.PersistentVolumeClaim.ClaimName)
|
||||
}
|
||||
}
|
||||
for _, volume := range deletedPod.Spec.Volumes {
|
||||
if volume.PersistentVolumeClaim != nil && claims.Has(volume.PersistentVolumeClaim.ClaimName) {
|
||||
logger.V(5).Info("Pod with the same PVC that the target pod requires was deleted, which might make this pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
}
|
||||
|
||||
logger.V(5).Info("An irrelevant Pod was deleted, which doesn't make this pod schedulable", "pod", klog.KObj(pod), "deletedPod", klog.KObj(deletedPod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, _ runtime.Object, handle framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
informerFactory := handle.SharedInformerFactory()
|
||||
pvcLister := informerFactory.Core().V1().PersistentVolumeClaims().Lister()
|
||||
sharedLister := handle.SnapshotSharedLister()
|
||||
|
||||
return &VolumeRestrictions{
|
||||
pvcLister: pvcLister,
|
||||
sharedLister: sharedLister,
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
}, nil
|
||||
}
|
10
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumezone/OWNERS
generated
vendored
Normal file
10
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumezone/OWNERS
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-storage-approvers
|
||||
- cofyc
|
||||
reviewers:
|
||||
- sig-storage-reviewers
|
||||
- cofyc
|
||||
labels:
|
||||
- sig/storage
|
410
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumezone/volume_zone.go
generated
vendored
Normal file
410
vendor/k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumezone/volume_zone.go
generated
vendored
Normal file
@ -0,0 +1,410 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package volumezone
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
storage "k8s.io/api/storage/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
storagelisters "k8s.io/client-go/listers/storage/v1"
|
||||
volumehelpers "k8s.io/cloud-provider/volume/helpers"
|
||||
storagehelpers "k8s.io/component-helpers/storage/volume"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// VolumeZone is a plugin that checks volume zone.
|
||||
type VolumeZone struct {
|
||||
pvLister corelisters.PersistentVolumeLister
|
||||
pvcLister corelisters.PersistentVolumeClaimLister
|
||||
scLister storagelisters.StorageClassLister
|
||||
enableSchedulingQueueHint bool
|
||||
}
|
||||
|
||||
var _ framework.FilterPlugin = &VolumeZone{}
|
||||
var _ framework.PreFilterPlugin = &VolumeZone{}
|
||||
var _ framework.EnqueueExtensions = &VolumeZone{}
|
||||
|
||||
const (
|
||||
// Name is the name of the plugin used in the plugin registry and configurations.
|
||||
Name = names.VolumeZone
|
||||
|
||||
preFilterStateKey framework.StateKey = "PreFilter" + Name
|
||||
|
||||
// ErrReasonConflict is used for NoVolumeZoneConflict predicate error.
|
||||
ErrReasonConflict = "node(s) had no available volume zone"
|
||||
)
|
||||
|
||||
// pvTopology holds the value of a pv's topologyLabel
|
||||
type pvTopology struct {
|
||||
pvName string
|
||||
key string
|
||||
values sets.Set[string]
|
||||
}
|
||||
|
||||
// the state is initialized in PreFilter phase. because we save the pointer in
|
||||
// framework.CycleState, in the later phases we don't need to call Write method
|
||||
// to update the value
|
||||
type stateData struct {
|
||||
// podPVTopologies holds the pv information we need
|
||||
// it's initialized in the PreFilter phase
|
||||
podPVTopologies []pvTopology
|
||||
}
|
||||
|
||||
func (d *stateData) Clone() framework.StateData {
|
||||
return d
|
||||
}
|
||||
|
||||
var topologyLabels = []string{
|
||||
v1.LabelFailureDomainBetaZone,
|
||||
v1.LabelFailureDomainBetaRegion,
|
||||
v1.LabelTopologyZone,
|
||||
v1.LabelTopologyRegion,
|
||||
}
|
||||
|
||||
func translateToGALabel(label string) string {
|
||||
if label == v1.LabelFailureDomainBetaRegion {
|
||||
return v1.LabelTopologyRegion
|
||||
}
|
||||
if label == v1.LabelFailureDomainBetaZone {
|
||||
return v1.LabelTopologyZone
|
||||
}
|
||||
return label
|
||||
}
|
||||
|
||||
// Name returns name of the plugin. It is used in logs, etc.
|
||||
func (pl *VolumeZone) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
// PreFilter invoked at the prefilter extension point
|
||||
//
|
||||
// # It finds the topology of the PersistentVolumes corresponding to the volumes a pod requests
|
||||
//
|
||||
// Currently, this is only supported with PersistentVolumeClaims,
|
||||
// and only looks for the bound PersistentVolume.
|
||||
func (pl *VolumeZone) PreFilter(ctx context.Context, cs *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
logger := klog.FromContext(ctx)
|
||||
podPVTopologies, status := pl.getPVbyPod(logger, pod)
|
||||
if !status.IsSuccess() {
|
||||
return nil, status
|
||||
}
|
||||
if len(podPVTopologies) == 0 {
|
||||
return nil, framework.NewStatus(framework.Skip)
|
||||
}
|
||||
cs.Write(preFilterStateKey, &stateData{podPVTopologies: podPVTopologies})
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// getPVbyPod gets PVTopology from pod
|
||||
func (pl *VolumeZone) getPVbyPod(logger klog.Logger, pod *v1.Pod) ([]pvTopology, *framework.Status) {
|
||||
podPVTopologies := make([]pvTopology, 0)
|
||||
|
||||
pvcNames := pl.getPersistentVolumeClaimNameFromPod(pod)
|
||||
for _, pvcName := range pvcNames {
|
||||
if pvcName == "" {
|
||||
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, "PersistentVolumeClaim had no name")
|
||||
}
|
||||
pvc, err := pl.pvcLister.PersistentVolumeClaims(pod.Namespace).Get(pvcName)
|
||||
if s := getErrorAsStatus(err); !s.IsSuccess() {
|
||||
return nil, s
|
||||
}
|
||||
|
||||
pvName := pvc.Spec.VolumeName
|
||||
if pvName == "" {
|
||||
scName := storagehelpers.GetPersistentVolumeClaimClass(pvc)
|
||||
if len(scName) == 0 {
|
||||
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, "PersistentVolumeClaim had no pv name and storageClass name")
|
||||
}
|
||||
|
||||
class, err := pl.scLister.Get(scName)
|
||||
if s := getErrorAsStatus(err); !s.IsSuccess() {
|
||||
return nil, s
|
||||
}
|
||||
if class.VolumeBindingMode == nil {
|
||||
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, fmt.Sprintf("VolumeBindingMode not set for StorageClass %q", scName))
|
||||
}
|
||||
if *class.VolumeBindingMode == storage.VolumeBindingWaitForFirstConsumer {
|
||||
// Skip unbound volumes
|
||||
continue
|
||||
}
|
||||
|
||||
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, "PersistentVolume had no name")
|
||||
}
|
||||
|
||||
pv, err := pl.pvLister.Get(pvName)
|
||||
if s := getErrorAsStatus(err); !s.IsSuccess() {
|
||||
return nil, s
|
||||
}
|
||||
podPVTopologies = append(podPVTopologies, pl.getPVTopologies(logger, pv)...)
|
||||
}
|
||||
return podPVTopologies, nil
|
||||
}
|
||||
|
||||
// PreFilterExtensions returns prefilter extensions, pod add and remove.
|
||||
func (pl *VolumeZone) PreFilterExtensions() framework.PreFilterExtensions {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
//
|
||||
// It evaluates if a pod can fit due to the volumes it requests, given
|
||||
// that some volumes may have zone scheduling constraints. The requirement is that any
|
||||
// volume zone-labels must match the equivalent zone-labels on the node. It is OK for
|
||||
// the node to have more zone-label constraints (for example, a hypothetical replicated
|
||||
// volume might allow region-wide access)
|
||||
//
|
||||
// Currently this is only supported with PersistentVolumeClaims, and looks to the labels
|
||||
// only on the bound PersistentVolume.
|
||||
//
|
||||
// Working with volumes declared inline in the pod specification (i.e. not
|
||||
// using a PersistentVolume) is likely to be harder, as it would require
|
||||
// determining the zone of a volume during scheduling, and that is likely to
|
||||
// require calling out to the cloud provider. It seems that we are moving away
|
||||
// from inline volume declarations anyway.
|
||||
func (pl *VolumeZone) Filter(ctx context.Context, cs *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
logger := klog.FromContext(ctx)
|
||||
// If a pod doesn't have any volume attached to it, the predicate will always be true.
|
||||
// Thus we make a fast path for it, to avoid unnecessary computations in this case.
|
||||
if len(pod.Spec.Volumes) == 0 {
|
||||
return nil
|
||||
}
|
||||
var podPVTopologies []pvTopology
|
||||
state, err := getStateData(cs)
|
||||
if err != nil {
|
||||
// Fallback to calculate pv list here
|
||||
var status *framework.Status
|
||||
podPVTopologies, status = pl.getPVbyPod(logger, pod)
|
||||
if !status.IsSuccess() {
|
||||
return status
|
||||
}
|
||||
} else {
|
||||
podPVTopologies = state.podPVTopologies
|
||||
}
|
||||
|
||||
node := nodeInfo.Node()
|
||||
hasAnyNodeConstraint := false
|
||||
for _, topologyLabel := range topologyLabels {
|
||||
if _, ok := node.Labels[topologyLabel]; ok {
|
||||
hasAnyNodeConstraint = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !hasAnyNodeConstraint {
|
||||
// The node has no zone constraints, so we're OK to schedule.
|
||||
// This is to handle a single-zone cluster scenario where the node may not have any topology labels.
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, pvTopology := range podPVTopologies {
|
||||
v, ok := node.Labels[pvTopology.key]
|
||||
if !ok {
|
||||
// if we can't match the beta label, try to match pv's beta label with node's ga label
|
||||
v, ok = node.Labels[translateToGALabel(pvTopology.key)]
|
||||
}
|
||||
if !ok || !pvTopology.values.Has(v) {
|
||||
logger.V(10).Info("Won't schedule pod onto node due to volume (mismatch on label key)", "pod", klog.KObj(pod), "node", klog.KObj(node), "PV", klog.KRef("", pvTopology.pvName), "PVLabelKey", pvTopology.key)
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonConflict)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getStateData(cs *framework.CycleState) (*stateData, error) {
|
||||
state, err := cs.Read(preFilterStateKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s, ok := state.(*stateData)
|
||||
if !ok {
|
||||
return nil, errors.New("unable to convert state into stateData")
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func getErrorAsStatus(err error) *framework.Status {
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
return framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
|
||||
}
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// EventsToRegister returns the possible events that may make a Pod
|
||||
// failed by this plugin schedulable.
|
||||
func (pl *VolumeZone) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
|
||||
// A new node or updating a node's volume zone labels may make a pod schedulable.
|
||||
// A note about UpdateNodeTaint event:
|
||||
// Ideally, it's supposed to register only Add | UpdateNodeLabel because UpdateNodeTaint will never change the result from this plugin.
|
||||
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
||||
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
||||
nodeActionType := framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint
|
||||
if pl.enableSchedulingQueueHint {
|
||||
// preCheck is not used when QHint is enabled.
|
||||
nodeActionType = framework.Add | framework.UpdateNodeLabel
|
||||
}
|
||||
|
||||
return []framework.ClusterEventWithHint{
|
||||
// New storageClass with bind mode `VolumeBindingWaitForFirstConsumer` will make a pod schedulable.
|
||||
// Due to immutable field `storageClass.volumeBindingMode`, storageClass update events are ignored.
|
||||
{Event: framework.ClusterEvent{Resource: framework.StorageClass, ActionType: framework.Add}, QueueingHintFn: pl.isSchedulableAfterStorageClassAdded},
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}},
|
||||
// A new pvc may make a pod schedulable.
|
||||
// Also, if pvc's VolumeName is filled, that also could make a pod schedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.PersistentVolumeClaim, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterPersistentVolumeClaimChange},
|
||||
// A new pv or updating a pv's volume zone labels may make a pod schedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.PersistentVolume, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterPersistentVolumeChange},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// getPersistentVolumeClaimNameFromPod gets pvc names bound to a pod.
|
||||
func (pl *VolumeZone) getPersistentVolumeClaimNameFromPod(pod *v1.Pod) []string {
|
||||
var pvcNames []string
|
||||
for i := range pod.Spec.Volumes {
|
||||
volume := pod.Spec.Volumes[i]
|
||||
if volume.PersistentVolumeClaim == nil {
|
||||
continue
|
||||
}
|
||||
pvcName := volume.PersistentVolumeClaim.ClaimName
|
||||
pvcNames = append(pvcNames, pvcName)
|
||||
}
|
||||
return pvcNames
|
||||
}
|
||||
|
||||
// isSchedulableAfterPersistentVolumeClaimChange is invoked whenever a PersistentVolumeClaim added or updated.
|
||||
// It checks whether the change of PVC has made a previously unschedulable pod schedulable.
|
||||
func (pl *VolumeZone) isSchedulableAfterPersistentVolumeClaimChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, modifiedPVC, err := util.As[*v1.PersistentVolumeClaim](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, fmt.Errorf("unexpected objects in isSchedulableAfterPersistentVolumeClaimChange: %w", err)
|
||||
}
|
||||
if pl.isPVCRequestedFromPod(logger, modifiedPVC, pod) {
|
||||
logger.V(5).Info("PVC that is referred from the pod was created or updated, which might make this pod schedulable", "pod", klog.KObj(pod), "PVC", klog.KObj(modifiedPVC))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("PVC irrelevant to the Pod was created or updated, which doesn't make this pod schedulable", "pod", klog.KObj(pod), "PVC", klog.KObj(modifiedPVC))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// isPVCRequestedFromPod verifies if the PVC is requested from a given Pod.
|
||||
func (pl *VolumeZone) isPVCRequestedFromPod(logger klog.Logger, pvc *v1.PersistentVolumeClaim, pod *v1.Pod) bool {
|
||||
if (pvc == nil) || (pod.Namespace != pvc.Namespace) {
|
||||
return false
|
||||
}
|
||||
pvcNames := pl.getPersistentVolumeClaimNameFromPod(pod)
|
||||
for _, pvcName := range pvcNames {
|
||||
if pvc.Name == pvcName {
|
||||
logger.V(5).Info("PVC is referred from the pod", "pod", klog.KObj(pod), "PVC", klog.KObj(pvc))
|
||||
return true
|
||||
}
|
||||
}
|
||||
logger.V(5).Info("PVC is not referred from the pod", "pod", klog.KObj(pod), "PVC", klog.KObj(pvc))
|
||||
return false
|
||||
}
|
||||
|
||||
// isSchedulableAfterStorageClassAdded is invoked whenever a StorageClass is added.
|
||||
// It checks whether the addition of StorageClass has made a previously unschedulable pod schedulable.
|
||||
// Only a new StorageClass with WaitForFirstConsumer will cause a pod to become schedulable.
|
||||
func (pl *VolumeZone) isSchedulableAfterStorageClassAdded(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
_, addedStorageClass, err := util.As[*storage.StorageClass](nil, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, fmt.Errorf("unexpected objects in isSchedulableAfterStorageClassAdded: %w", err)
|
||||
}
|
||||
if (addedStorageClass.VolumeBindingMode == nil) || (*addedStorageClass.VolumeBindingMode != storage.VolumeBindingWaitForFirstConsumer) {
|
||||
logger.V(5).Info("StorageClass is created, but its VolumeBindingMode is not waitForFirstConsumer, which doesn't make the pod schedulable", "storageClass", klog.KObj(addedStorageClass), "pod", klog.KObj(pod))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("StorageClass with waitForFirstConsumer mode was created and it might make this pod schedulable", "pod", klog.KObj(pod), "StorageClass", klog.KObj(addedStorageClass))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
// isSchedulableAfterPersistentVolumeChange is invoked whenever a PersistentVolume added or updated.
|
||||
// It checks whether the change of PV has made a previously unschedulable pod schedulable.
|
||||
// Changing the PV topology labels could cause the pod to become schedulable.
|
||||
func (pl *VolumeZone) isSchedulableAfterPersistentVolumeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
|
||||
originalPV, modifiedPV, err := util.As[*v1.PersistentVolume](oldObj, newObj)
|
||||
if err != nil {
|
||||
return framework.Queue, fmt.Errorf("unexpected objects in isSchedulableAfterPersistentVolumeChange: %w", err)
|
||||
}
|
||||
if originalPV == nil {
|
||||
logger.V(5).Info("PV is newly created, which might make the pod schedulable")
|
||||
return framework.Queue, nil
|
||||
}
|
||||
originalPVTopologies := pl.getPVTopologies(logger, originalPV)
|
||||
modifiedPVTopologies := pl.getPVTopologies(logger, modifiedPV)
|
||||
if !reflect.DeepEqual(originalPVTopologies, modifiedPVTopologies) {
|
||||
logger.V(5).Info("PV's topology was updated, which might make the pod schedulable.", "pod", klog.KObj(pod), "PV", klog.KObj(modifiedPV))
|
||||
return framework.Queue, nil
|
||||
}
|
||||
|
||||
logger.V(5).Info("PV was updated, but the topology is unchanged, which it doesn't make the pod schedulable", "pod", klog.KObj(pod), "PV", klog.KObj(modifiedPV))
|
||||
return framework.QueueSkip, nil
|
||||
}
|
||||
|
||||
// getPVTopologies retrieves pvTopology from a given PV and returns the array
|
||||
// This function doesn't check spec.nodeAffinity
|
||||
// because it's read-only after creation and thus cannot be updated
|
||||
// and nodeAffinity is being handled in node affinity plugin
|
||||
func (pl *VolumeZone) getPVTopologies(logger klog.Logger, pv *v1.PersistentVolume) []pvTopology {
|
||||
podPVTopologies := make([]pvTopology, 0)
|
||||
for _, key := range topologyLabels {
|
||||
if value, ok := pv.ObjectMeta.Labels[key]; ok {
|
||||
labelZonesSet, err := volumehelpers.LabelZonesToSet(value)
|
||||
if err != nil {
|
||||
logger.V(5).Info("failed to parse PV's topology label, ignoring the label", "label", fmt.Sprintf("%s:%s", key, value), "err", err)
|
||||
continue
|
||||
}
|
||||
podPVTopologies = append(podPVTopologies, pvTopology{
|
||||
pvName: pv.Name,
|
||||
key: key,
|
||||
values: sets.Set[string](labelZonesSet),
|
||||
})
|
||||
}
|
||||
}
|
||||
return podPVTopologies
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(_ context.Context, _ runtime.Object, handle framework.Handle, fts feature.Features) (framework.Plugin, error) {
|
||||
informerFactory := handle.SharedInformerFactory()
|
||||
pvLister := informerFactory.Core().V1().PersistentVolumes().Lister()
|
||||
pvcLister := informerFactory.Core().V1().PersistentVolumeClaims().Lister()
|
||||
scLister := informerFactory.Storage().V1().StorageClasses().Lister()
|
||||
return &VolumeZone{
|
||||
pvLister: pvLister,
|
||||
pvcLister: pvcLister,
|
||||
scLister: scLister,
|
||||
enableSchedulingQueueHint: fts.EnableSchedulingQueueHint,
|
||||
}, nil
|
||||
}
|
738
vendor/k8s.io/kubernetes/pkg/scheduler/framework/preemption/preemption.go
generated
vendored
Normal file
738
vendor/k8s.io/kubernetes/pkg/scheduler/framework/preemption/preemption.go
generated
vendored
Normal file
@ -0,0 +1,738 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package preemption
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policy "k8s.io/api/policy/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
policylisters "k8s.io/client-go/listers/policy/v1"
|
||||
corev1helpers "k8s.io/component-helpers/scheduling/corev1"
|
||||
"k8s.io/klog/v2"
|
||||
extenderv1 "k8s.io/kube-scheduler/extender/v1"
|
||||
apipod "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
"k8s.io/kubernetes/pkg/scheduler/util"
|
||||
)
|
||||
|
||||
// Candidate represents a nominated node on which the preemptor can be scheduled,
|
||||
// along with the list of victims that should be evicted for the preemptor to fit the node.
|
||||
type Candidate interface {
|
||||
// Victims wraps a list of to-be-preempted Pods and the number of PDB violation.
|
||||
Victims() *extenderv1.Victims
|
||||
// Name returns the target node name where the preemptor gets nominated to run.
|
||||
Name() string
|
||||
}
|
||||
|
||||
type candidate struct {
|
||||
victims *extenderv1.Victims
|
||||
name string
|
||||
}
|
||||
|
||||
// Victims returns s.victims.
|
||||
func (s *candidate) Victims() *extenderv1.Victims {
|
||||
return s.victims
|
||||
}
|
||||
|
||||
// Name returns s.name.
|
||||
func (s *candidate) Name() string {
|
||||
return s.name
|
||||
}
|
||||
|
||||
type candidateList struct {
|
||||
idx int32
|
||||
items []Candidate
|
||||
}
|
||||
|
||||
func newCandidateList(size int32) *candidateList {
|
||||
return &candidateList{idx: -1, items: make([]Candidate, size)}
|
||||
}
|
||||
|
||||
// add adds a new candidate to the internal array atomically.
|
||||
func (cl *candidateList) add(c *candidate) {
|
||||
if idx := atomic.AddInt32(&cl.idx, 1); idx < int32(len(cl.items)) {
|
||||
cl.items[idx] = c
|
||||
}
|
||||
}
|
||||
|
||||
// size returns the number of candidate stored. Note that some add() operations
|
||||
// might still be executing when this is called, so care must be taken to
|
||||
// ensure that all add() operations complete before accessing the elements of
|
||||
// the list.
|
||||
func (cl *candidateList) size() int32 {
|
||||
n := atomic.LoadInt32(&cl.idx) + 1
|
||||
if n >= int32(len(cl.items)) {
|
||||
n = int32(len(cl.items))
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// get returns the internal candidate array. This function is NOT atomic and
|
||||
// assumes that all add() operations have been completed.
|
||||
func (cl *candidateList) get() []Candidate {
|
||||
return cl.items[:cl.size()]
|
||||
}
|
||||
|
||||
// Interface is expected to be implemented by different preemption plugins as all those member
|
||||
// methods might have different behavior compared with the default preemption.
|
||||
type Interface interface {
|
||||
// GetOffsetAndNumCandidates chooses a random offset and calculates the number of candidates that should be
|
||||
// shortlisted for dry running preemption.
|
||||
GetOffsetAndNumCandidates(nodes int32) (int32, int32)
|
||||
// CandidatesToVictimsMap builds a map from the target node to a list of to-be-preempted Pods and the number of PDB violation.
|
||||
CandidatesToVictimsMap(candidates []Candidate) map[string]*extenderv1.Victims
|
||||
// PodEligibleToPreemptOthers returns one bool and one string. The bool indicates whether this pod should be considered for
|
||||
// preempting other pods or not. The string includes the reason if this pod isn't eligible.
|
||||
PodEligibleToPreemptOthers(ctx context.Context, pod *v1.Pod, nominatedNodeStatus *framework.Status) (bool, string)
|
||||
// SelectVictimsOnNode finds minimum set of pods on the given node that should be preempted in order to make enough room
|
||||
// for "pod" to be scheduled.
|
||||
// Note that both `state` and `nodeInfo` are deep copied.
|
||||
SelectVictimsOnNode(ctx context.Context, state *framework.CycleState,
|
||||
pod *v1.Pod, nodeInfo *framework.NodeInfo, pdbs []*policy.PodDisruptionBudget) ([]*v1.Pod, int, *framework.Status)
|
||||
// OrderedScoreFuncs returns a list of ordered score functions to select preferable node where victims will be preempted.
|
||||
// The ordered score functions will be processed one by one iff we find more than one node with the highest score.
|
||||
// Default score functions will be processed if nil returned here for backwards-compatibility.
|
||||
OrderedScoreFuncs(ctx context.Context, nodesToVictims map[string]*extenderv1.Victims) []func(node string) int64
|
||||
}
|
||||
|
||||
type Evaluator struct {
|
||||
PluginName string
|
||||
Handler framework.Handle
|
||||
PodLister corelisters.PodLister
|
||||
PdbLister policylisters.PodDisruptionBudgetLister
|
||||
|
||||
enableAsyncPreemption bool
|
||||
mu sync.RWMutex
|
||||
// preempting is a set that records the pods that are currently triggering preemption asynchronously,
|
||||
// which is used to prevent the pods from entering the scheduling cycle meanwhile.
|
||||
preempting sets.Set[types.UID]
|
||||
|
||||
// PreemptPod is a function that actually makes API calls to preempt a specific Pod.
|
||||
// This is exposed to be replaced during tests.
|
||||
PreemptPod func(ctx context.Context, c Candidate, preemptor, victim *v1.Pod, pluginName string) error
|
||||
|
||||
Interface
|
||||
}
|
||||
|
||||
func NewEvaluator(pluginName string, fh framework.Handle, i Interface, enableAsyncPreemption bool) *Evaluator {
|
||||
podLister := fh.SharedInformerFactory().Core().V1().Pods().Lister()
|
||||
pdbLister := fh.SharedInformerFactory().Policy().V1().PodDisruptionBudgets().Lister()
|
||||
|
||||
ev := &Evaluator{
|
||||
PluginName: names.DefaultPreemption,
|
||||
Handler: fh,
|
||||
PodLister: podLister,
|
||||
PdbLister: pdbLister,
|
||||
Interface: i,
|
||||
enableAsyncPreemption: enableAsyncPreemption,
|
||||
preempting: sets.New[types.UID](),
|
||||
}
|
||||
|
||||
// PreemptPod actually makes API calls to preempt a specific Pod.
|
||||
//
|
||||
// We implement it here directly, rather than creating a separate method like ev.preemptPod(...)
|
||||
// to prevent the misuse of the PreemptPod function.
|
||||
ev.PreemptPod = func(ctx context.Context, c Candidate, preemptor, victim *v1.Pod, pluginName string) error {
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
// If the victim is a WaitingPod, send a reject message to the PermitPlugin.
|
||||
// Otherwise we should delete the victim.
|
||||
if waitingPod := ev.Handler.GetWaitingPod(victim.UID); waitingPod != nil {
|
||||
waitingPod.Reject(pluginName, "preempted")
|
||||
logger.V(2).Info("Preemptor pod rejected a waiting pod", "preemptor", klog.KObj(preemptor), "waitingPod", klog.KObj(victim), "node", c.Name())
|
||||
} else {
|
||||
condition := &v1.PodCondition{
|
||||
Type: v1.DisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
Reason: v1.PodReasonPreemptionByScheduler,
|
||||
Message: fmt.Sprintf("%s: preempting to accommodate a higher priority pod", preemptor.Spec.SchedulerName),
|
||||
}
|
||||
newStatus := victim.Status.DeepCopy()
|
||||
updated := apipod.UpdatePodCondition(newStatus, condition)
|
||||
if updated {
|
||||
if err := util.PatchPodStatus(ctx, ev.Handler.ClientSet(), victim, newStatus); err != nil {
|
||||
logger.Error(err, "Could not add DisruptionTarget condition due to preemption", "pod", klog.KObj(victim), "preemptor", klog.KObj(preemptor))
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := util.DeletePod(ctx, ev.Handler.ClientSet(), victim); err != nil {
|
||||
if !apierrors.IsNotFound(err) {
|
||||
logger.Error(err, "Tried to preempted pod", "pod", klog.KObj(victim), "preemptor", klog.KObj(preemptor))
|
||||
return err
|
||||
}
|
||||
logger.V(2).Info("Victim Pod is already deleted", "preemptor", klog.KObj(preemptor), "victim", klog.KObj(victim), "node", c.Name())
|
||||
return nil
|
||||
}
|
||||
logger.V(2).Info("Preemptor Pod preempted victim Pod", "preemptor", klog.KObj(preemptor), "victim", klog.KObj(victim), "node", c.Name())
|
||||
}
|
||||
|
||||
ev.Handler.EventRecorder().Eventf(victim, preemptor, v1.EventTypeNormal, "Preempted", "Preempting", "Preempted by pod %v on node %v", preemptor.UID, c.Name())
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return ev
|
||||
}
|
||||
|
||||
// IsPodRunningPreemption returns true if the pod is currently triggering preemption asynchronously.
|
||||
func (ev *Evaluator) IsPodRunningPreemption(podUID types.UID) bool {
|
||||
ev.mu.RLock()
|
||||
defer ev.mu.RUnlock()
|
||||
|
||||
return ev.preempting.Has(podUID)
|
||||
}
|
||||
|
||||
// Preempt returns a PostFilterResult carrying suggested nominatedNodeName, along with a Status.
|
||||
// The semantics of returned <PostFilterResult, Status> varies on different scenarios:
|
||||
//
|
||||
// - <nil, Error>. This denotes it's a transient/rare error that may be self-healed in future cycles.
|
||||
//
|
||||
// - <nil, Unschedulable>. This status is mostly as expected like the preemptor is waiting for the
|
||||
// victims to be fully terminated.
|
||||
//
|
||||
// - In both cases above, a nil PostFilterResult is returned to keep the pod's nominatedNodeName unchanged.
|
||||
//
|
||||
// - <non-nil PostFilterResult, Unschedulable>. It indicates the pod cannot be scheduled even with preemption.
|
||||
// In this case, a non-nil PostFilterResult is returned and result.NominatingMode instructs how to deal with
|
||||
// the nominatedNodeName.
|
||||
//
|
||||
// - <non-nil PostFilterResult, Success>. It's the regular happy path
|
||||
// and the non-empty nominatedNodeName will be applied to the preemptor pod.
|
||||
func (ev *Evaluator) Preempt(ctx context.Context, state *framework.CycleState, pod *v1.Pod, m framework.NodeToStatusReader) (*framework.PostFilterResult, *framework.Status) {
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
// 0) Fetch the latest version of <pod>.
|
||||
// It's safe to directly fetch pod here. Because the informer cache has already been
|
||||
// initialized when creating the Scheduler obj.
|
||||
// However, tests may need to manually initialize the shared pod informer.
|
||||
podNamespace, podName := pod.Namespace, pod.Name
|
||||
pod, err := ev.PodLister.Pods(pod.Namespace).Get(pod.Name)
|
||||
if err != nil {
|
||||
logger.Error(err, "Could not get the updated preemptor pod object", "pod", klog.KRef(podNamespace, podName))
|
||||
return nil, framework.AsStatus(err)
|
||||
}
|
||||
|
||||
// 1) Ensure the preemptor is eligible to preempt other pods.
|
||||
nominatedNodeStatus := m.Get(pod.Status.NominatedNodeName)
|
||||
if ok, msg := ev.PodEligibleToPreemptOthers(ctx, pod, nominatedNodeStatus); !ok {
|
||||
logger.V(5).Info("Pod is not eligible for preemption", "pod", klog.KObj(pod), "reason", msg)
|
||||
return nil, framework.NewStatus(framework.Unschedulable, msg)
|
||||
}
|
||||
|
||||
// 2) Find all preemption candidates.
|
||||
allNodes, err := ev.Handler.SnapshotSharedLister().NodeInfos().List()
|
||||
if err != nil {
|
||||
return nil, framework.AsStatus(err)
|
||||
}
|
||||
candidates, nodeToStatusMap, err := ev.findCandidates(ctx, state, allNodes, pod, m)
|
||||
if err != nil && len(candidates) == 0 {
|
||||
return nil, framework.AsStatus(err)
|
||||
}
|
||||
|
||||
// Return a FitError only when there are no candidates that fit the pod.
|
||||
if len(candidates) == 0 {
|
||||
fitError := &framework.FitError{
|
||||
Pod: pod,
|
||||
NumAllNodes: len(allNodes),
|
||||
Diagnosis: framework.Diagnosis{
|
||||
NodeToStatus: nodeToStatusMap,
|
||||
// Leave UnschedulablePlugins or PendingPlugins as nil as it won't be used on moving Pods.
|
||||
},
|
||||
}
|
||||
fitError.Diagnosis.NodeToStatus.SetAbsentNodesStatus(framework.NewStatus(framework.UnschedulableAndUnresolvable, "Preemption is not helpful for scheduling"))
|
||||
// Specify nominatedNodeName to clear the pod's nominatedNodeName status, if applicable.
|
||||
return framework.NewPostFilterResultWithNominatedNode(""), framework.NewStatus(framework.Unschedulable, fitError.Error())
|
||||
}
|
||||
|
||||
// 3) Interact with registered Extenders to filter out some candidates if needed.
|
||||
candidates, status := ev.callExtenders(logger, pod, candidates)
|
||||
if !status.IsSuccess() {
|
||||
return nil, status
|
||||
}
|
||||
|
||||
// 4) Find the best candidate.
|
||||
bestCandidate := ev.SelectCandidate(ctx, candidates)
|
||||
if bestCandidate == nil || len(bestCandidate.Name()) == 0 {
|
||||
return nil, framework.NewStatus(framework.Unschedulable, "no candidate node for preemption")
|
||||
}
|
||||
|
||||
logger.V(2).Info("the target node for the preemption is determined", "node", bestCandidate.Name(), "pod", klog.KObj(pod))
|
||||
|
||||
// 5) Perform preparation work before nominating the selected candidate.
|
||||
if ev.enableAsyncPreemption {
|
||||
ev.prepareCandidateAsync(bestCandidate, pod, ev.PluginName)
|
||||
} else {
|
||||
if status := ev.prepareCandidate(ctx, bestCandidate, pod, ev.PluginName); !status.IsSuccess() {
|
||||
return nil, status
|
||||
}
|
||||
}
|
||||
|
||||
return framework.NewPostFilterResultWithNominatedNode(bestCandidate.Name()), framework.NewStatus(framework.Success)
|
||||
}
|
||||
|
||||
// FindCandidates calculates a slice of preemption candidates.
|
||||
// Each candidate is executable to make the given <pod> schedulable.
|
||||
func (ev *Evaluator) findCandidates(ctx context.Context, state *framework.CycleState, allNodes []*framework.NodeInfo, pod *v1.Pod, m framework.NodeToStatusReader) ([]Candidate, *framework.NodeToStatus, error) {
|
||||
if len(allNodes) == 0 {
|
||||
return nil, nil, errors.New("no nodes available")
|
||||
}
|
||||
logger := klog.FromContext(ctx)
|
||||
// Get a list of nodes with failed predicates (Unschedulable) that may be satisfied by removing pods from the node.
|
||||
potentialNodes, err := m.NodesForStatusCode(ev.Handler.SnapshotSharedLister().NodeInfos(), framework.Unschedulable)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if len(potentialNodes) == 0 {
|
||||
logger.V(3).Info("Preemption will not help schedule pod on any node", "pod", klog.KObj(pod))
|
||||
// In this case, we should clean-up any existing nominated node name of the pod.
|
||||
if err := util.ClearNominatedNodeName(ctx, ev.Handler.ClientSet(), pod); err != nil {
|
||||
logger.Error(err, "Could not clear the nominatedNodeName field of pod", "pod", klog.KObj(pod))
|
||||
// We do not return as this error is not critical.
|
||||
}
|
||||
return nil, framework.NewDefaultNodeToStatus(), nil
|
||||
}
|
||||
|
||||
pdbs, err := getPodDisruptionBudgets(ev.PdbLister)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
offset, candidatesNum := ev.GetOffsetAndNumCandidates(int32(len(potentialNodes)))
|
||||
return ev.DryRunPreemption(ctx, state, pod, potentialNodes, pdbs, offset, candidatesNum)
|
||||
}
|
||||
|
||||
// callExtenders calls given <extenders> to select the list of feasible candidates.
|
||||
// We will only check <candidates> with extenders that support preemption.
|
||||
// Extenders which do not support preemption may later prevent preemptor from being scheduled on the nominated
|
||||
// node. In that case, scheduler will find a different host for the preemptor in subsequent scheduling cycles.
|
||||
func (ev *Evaluator) callExtenders(logger klog.Logger, pod *v1.Pod, candidates []Candidate) ([]Candidate, *framework.Status) {
|
||||
extenders := ev.Handler.Extenders()
|
||||
nodeLister := ev.Handler.SnapshotSharedLister().NodeInfos()
|
||||
if len(extenders) == 0 {
|
||||
return candidates, nil
|
||||
}
|
||||
|
||||
// Migrate candidate slice to victimsMap to adapt to the Extender interface.
|
||||
// It's only applicable for candidate slice that have unique nominated node name.
|
||||
victimsMap := ev.CandidatesToVictimsMap(candidates)
|
||||
if len(victimsMap) == 0 {
|
||||
return candidates, nil
|
||||
}
|
||||
for _, extender := range extenders {
|
||||
if !extender.SupportsPreemption() || !extender.IsInterested(pod) {
|
||||
continue
|
||||
}
|
||||
nodeNameToVictims, err := extender.ProcessPreemption(pod, victimsMap, nodeLister)
|
||||
if err != nil {
|
||||
if extender.IsIgnorable() {
|
||||
logger.Info("Skipped extender as it returned error and has ignorable flag set",
|
||||
"extender", extender.Name(), "err", err)
|
||||
continue
|
||||
}
|
||||
return nil, framework.AsStatus(err)
|
||||
}
|
||||
// Check if the returned victims are valid.
|
||||
for nodeName, victims := range nodeNameToVictims {
|
||||
if victims == nil || len(victims.Pods) == 0 {
|
||||
if extender.IsIgnorable() {
|
||||
delete(nodeNameToVictims, nodeName)
|
||||
logger.Info("Ignored node for which the extender didn't report victims", "node", klog.KRef("", nodeName), "extender", extender.Name())
|
||||
continue
|
||||
}
|
||||
return nil, framework.AsStatus(fmt.Errorf("expected at least one victim pod on node %q", nodeName))
|
||||
}
|
||||
}
|
||||
|
||||
// Replace victimsMap with new result after preemption. So the
|
||||
// rest of extenders can continue use it as parameter.
|
||||
victimsMap = nodeNameToVictims
|
||||
|
||||
// If node list becomes empty, no preemption can happen regardless of other extenders.
|
||||
if len(victimsMap) == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
var newCandidates []Candidate
|
||||
for nodeName := range victimsMap {
|
||||
newCandidates = append(newCandidates, &candidate{
|
||||
victims: victimsMap[nodeName],
|
||||
name: nodeName,
|
||||
})
|
||||
}
|
||||
return newCandidates, nil
|
||||
}
|
||||
|
||||
// SelectCandidate chooses the best-fit candidate from given <candidates> and return it.
|
||||
// NOTE: This method is exported for easier testing in default preemption.
|
||||
func (ev *Evaluator) SelectCandidate(ctx context.Context, candidates []Candidate) Candidate {
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
if len(candidates) == 0 {
|
||||
return nil
|
||||
}
|
||||
if len(candidates) == 1 {
|
||||
return candidates[0]
|
||||
}
|
||||
|
||||
victimsMap := ev.CandidatesToVictimsMap(candidates)
|
||||
scoreFuncs := ev.OrderedScoreFuncs(ctx, victimsMap)
|
||||
candidateNode := pickOneNodeForPreemption(logger, victimsMap, scoreFuncs)
|
||||
|
||||
// Same as candidatesToVictimsMap, this logic is not applicable for out-of-tree
|
||||
// preemption plugins that exercise different candidates on the same nominated node.
|
||||
if victims := victimsMap[candidateNode]; victims != nil {
|
||||
return &candidate{
|
||||
victims: victims,
|
||||
name: candidateNode,
|
||||
}
|
||||
}
|
||||
|
||||
// We shouldn't reach here.
|
||||
logger.Error(errors.New("no candidate selected"), "Should not reach here", "candidates", candidates)
|
||||
// To not break the whole flow, return the first candidate.
|
||||
return candidates[0]
|
||||
}
|
||||
|
||||
// prepareCandidate does some preparation work before nominating the selected candidate:
|
||||
// - Evict the victim pods
|
||||
// - Reject the victim pods if they are in waitingPod map
|
||||
// - Clear the low-priority pods' nominatedNodeName status if needed
|
||||
func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1.Pod, pluginName string) *framework.Status {
|
||||
fh := ev.Handler
|
||||
cs := ev.Handler.ClientSet()
|
||||
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
logger := klog.FromContext(ctx)
|
||||
errCh := parallelize.NewErrorChannel()
|
||||
fh.Parallelizer().Until(ctx, len(c.Victims().Pods), func(index int) {
|
||||
if err := ev.PreemptPod(ctx, c, pod, c.Victims().Pods[index], pluginName); err != nil {
|
||||
errCh.SendErrorWithCancel(err, cancel)
|
||||
}
|
||||
}, ev.PluginName)
|
||||
if err := errCh.ReceiveError(); err != nil {
|
||||
return framework.AsStatus(err)
|
||||
}
|
||||
|
||||
metrics.PreemptionVictims.Observe(float64(len(c.Victims().Pods)))
|
||||
|
||||
// Lower priority pods nominated to run on this node, may no longer fit on
|
||||
// this node. So, we should remove their nomination. Removing their
|
||||
// nomination updates these pods and moves them to the active queue. It
|
||||
// lets scheduler find another place for them.
|
||||
nominatedPods := getLowerPriorityNominatedPods(logger, fh, pod, c.Name())
|
||||
if err := util.ClearNominatedNodeName(ctx, cs, nominatedPods...); err != nil {
|
||||
logger.Error(err, "Cannot clear 'NominatedNodeName' field")
|
||||
// We do not return as this error is not critical.
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// prepareCandidateAsync triggers a goroutine for some preparation work:
|
||||
// - Evict the victim pods
|
||||
// - Reject the victim pods if they are in waitingPod map
|
||||
// - Clear the low-priority pods' nominatedNodeName status if needed
|
||||
// The Pod won't be retried until the goroutine triggered here completes.
|
||||
//
|
||||
// See http://kep.k8s.io/4832 for how the async preemption works.
|
||||
func (ev *Evaluator) prepareCandidateAsync(c Candidate, pod *v1.Pod, pluginName string) {
|
||||
metrics.PreemptionVictims.Observe(float64(len(c.Victims().Pods)))
|
||||
|
||||
// Intentionally create a new context, not using a ctx from the scheduling cycle, to create ctx,
|
||||
// because this process could continue even after this scheduling cycle finishes.
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
errCh := parallelize.NewErrorChannel()
|
||||
preemptPod := func(index int) {
|
||||
victim := c.Victims().Pods[index]
|
||||
if err := ev.PreemptPod(ctx, c, pod, victim, pluginName); err != nil {
|
||||
errCh.SendErrorWithCancel(err, cancel)
|
||||
}
|
||||
}
|
||||
|
||||
ev.mu.Lock()
|
||||
ev.preempting.Insert(pod.UID)
|
||||
ev.mu.Unlock()
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
go func() {
|
||||
startTime := time.Now()
|
||||
result := metrics.GoroutineResultSuccess
|
||||
defer metrics.PreemptionGoroutinesDuration.WithLabelValues(result).Observe(metrics.SinceInSeconds(startTime))
|
||||
defer metrics.PreemptionGoroutinesExecutionTotal.WithLabelValues(result).Inc()
|
||||
defer func() {
|
||||
if result == metrics.GoroutineResultError {
|
||||
// When API call isn't successful, the Pod may get stuck in the unschedulable pod pool in the worst case.
|
||||
// So, we should move the Pod to the activeQ.
|
||||
ev.Handler.Activate(logger, map[string]*v1.Pod{pod.Name: pod})
|
||||
}
|
||||
}()
|
||||
defer cancel()
|
||||
logger.V(2).Info("Start the preemption asynchronously", "preemptor", klog.KObj(pod), "node", c.Name(), "numVictims", len(c.Victims().Pods))
|
||||
|
||||
// Lower priority pods nominated to run on this node, may no longer fit on
|
||||
// this node. So, we should remove their nomination. Removing their
|
||||
// nomination updates these pods and moves them to the active queue. It
|
||||
// lets scheduler find another place for them.
|
||||
nominatedPods := getLowerPriorityNominatedPods(logger, ev.Handler, pod, c.Name())
|
||||
if err := util.ClearNominatedNodeName(ctx, ev.Handler.ClientSet(), nominatedPods...); err != nil {
|
||||
logger.Error(err, "Cannot clear 'NominatedNodeName' field from lower priority pods on the same target node", "node", c.Name())
|
||||
result = metrics.GoroutineResultError
|
||||
// We do not return as this error is not critical.
|
||||
}
|
||||
|
||||
if len(c.Victims().Pods) == 0 {
|
||||
ev.mu.Lock()
|
||||
delete(ev.preempting, pod.UID)
|
||||
ev.mu.Unlock()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// We can evict all victims in parallel, but the last one.
|
||||
// We have to remove the pod from the preempting map before the last one is evicted
|
||||
// because, otherwise, the pod removal might be notified to the scheduling queue before
|
||||
// we remove this pod from the preempting map,
|
||||
// and the pod could end up stucking at the unschedulable pod pool
|
||||
// by all the pod removal events being ignored.
|
||||
ev.Handler.Parallelizer().Until(ctx, len(c.Victims().Pods)-1, preemptPod, ev.PluginName)
|
||||
if err := errCh.ReceiveError(); err != nil {
|
||||
logger.Error(err, "Error occurred during async preemption")
|
||||
result = metrics.GoroutineResultError
|
||||
}
|
||||
|
||||
ev.mu.Lock()
|
||||
delete(ev.preempting, pod.UID)
|
||||
ev.mu.Unlock()
|
||||
|
||||
if err := ev.PreemptPod(ctx, c, pod, c.Victims().Pods[len(c.Victims().Pods)-1], pluginName); err != nil {
|
||||
logger.Error(err, "Error occurred during async preemption")
|
||||
result = metrics.GoroutineResultError
|
||||
}
|
||||
|
||||
logger.V(2).Info("Async Preemption finished completely", "preemptor", klog.KObj(pod), "node", c.Name(), "result", result)
|
||||
}()
|
||||
}
|
||||
|
||||
func getPodDisruptionBudgets(pdbLister policylisters.PodDisruptionBudgetLister) ([]*policy.PodDisruptionBudget, error) {
|
||||
if pdbLister != nil {
|
||||
return pdbLister.List(labels.Everything())
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// pickOneNodeForPreemption chooses one node among the given nodes.
|
||||
// It assumes pods in each map entry are ordered by decreasing priority.
|
||||
// If the scoreFuns is not empty, It picks a node based on score scoreFuns returns.
|
||||
// If the scoreFuns is empty,
|
||||
// It picks a node based on the following criteria:
|
||||
// 1. A node with minimum number of PDB violations.
|
||||
// 2. A node with minimum highest priority victim is picked.
|
||||
// 3. Ties are broken by sum of priorities of all victims.
|
||||
// 4. If there are still ties, node with the minimum number of victims is picked.
|
||||
// 5. If there are still ties, node with the latest start time of all highest priority victims is picked.
|
||||
// 6. If there are still ties, the first such node is picked (sort of randomly).
|
||||
// The 'minNodes1' and 'minNodes2' are being reused here to save the memory
|
||||
// allocation and garbage collection time.
|
||||
func pickOneNodeForPreemption(logger klog.Logger, nodesToVictims map[string]*extenderv1.Victims, scoreFuncs []func(node string) int64) string {
|
||||
if len(nodesToVictims) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
allCandidates := make([]string, 0, len(nodesToVictims))
|
||||
for node := range nodesToVictims {
|
||||
allCandidates = append(allCandidates, node)
|
||||
}
|
||||
|
||||
if len(scoreFuncs) == 0 {
|
||||
minNumPDBViolatingScoreFunc := func(node string) int64 {
|
||||
// The smaller the NumPDBViolations, the higher the score.
|
||||
return -nodesToVictims[node].NumPDBViolations
|
||||
}
|
||||
minHighestPriorityScoreFunc := func(node string) int64 {
|
||||
// highestPodPriority is the highest priority among the victims on this node.
|
||||
highestPodPriority := corev1helpers.PodPriority(nodesToVictims[node].Pods[0])
|
||||
// The smaller the highestPodPriority, the higher the score.
|
||||
return -int64(highestPodPriority)
|
||||
}
|
||||
minSumPrioritiesScoreFunc := func(node string) int64 {
|
||||
var sumPriorities int64
|
||||
for _, pod := range nodesToVictims[node].Pods {
|
||||
// We add MaxInt32+1 to all priorities to make all of them >= 0. This is
|
||||
// needed so that a node with a few pods with negative priority is not
|
||||
// picked over a node with a smaller number of pods with the same negative
|
||||
// priority (and similar scenarios).
|
||||
sumPriorities += int64(corev1helpers.PodPriority(pod)) + int64(math.MaxInt32+1)
|
||||
}
|
||||
// The smaller the sumPriorities, the higher the score.
|
||||
return -sumPriorities
|
||||
}
|
||||
minNumPodsScoreFunc := func(node string) int64 {
|
||||
// The smaller the length of pods, the higher the score.
|
||||
return -int64(len(nodesToVictims[node].Pods))
|
||||
}
|
||||
latestStartTimeScoreFunc := func(node string) int64 {
|
||||
// Get the earliest start time of all pods on the current node.
|
||||
earliestStartTimeOnNode := util.GetEarliestPodStartTime(nodesToVictims[node])
|
||||
if earliestStartTimeOnNode == nil {
|
||||
logger.Error(errors.New("earliestStartTime is nil for node"), "Should not reach here", "node", node)
|
||||
return int64(math.MinInt64)
|
||||
}
|
||||
// The bigger the earliestStartTimeOnNode, the higher the score.
|
||||
return earliestStartTimeOnNode.UnixNano()
|
||||
}
|
||||
|
||||
// Each scoreFunc scores the nodes according to specific rules and keeps the name of the node
|
||||
// with the highest score. If and only if the scoreFunc has more than one node with the highest
|
||||
// score, we will execute the other scoreFunc in order of precedence.
|
||||
scoreFuncs = []func(string) int64{
|
||||
// A node with a minimum number of PDB is preferable.
|
||||
minNumPDBViolatingScoreFunc,
|
||||
// A node with a minimum highest priority victim is preferable.
|
||||
minHighestPriorityScoreFunc,
|
||||
// A node with the smallest sum of priorities is preferable.
|
||||
minSumPrioritiesScoreFunc,
|
||||
// A node with the minimum number of pods is preferable.
|
||||
minNumPodsScoreFunc,
|
||||
// A node with the latest start time of all highest priority victims is preferable.
|
||||
latestStartTimeScoreFunc,
|
||||
// If there are still ties, then the first Node in the list is selected.
|
||||
}
|
||||
}
|
||||
|
||||
for _, f := range scoreFuncs {
|
||||
selectedNodes := []string{}
|
||||
maxScore := int64(math.MinInt64)
|
||||
for _, node := range allCandidates {
|
||||
score := f(node)
|
||||
if score > maxScore {
|
||||
maxScore = score
|
||||
selectedNodes = []string{}
|
||||
}
|
||||
if score == maxScore {
|
||||
selectedNodes = append(selectedNodes, node)
|
||||
}
|
||||
}
|
||||
if len(selectedNodes) == 1 {
|
||||
return selectedNodes[0]
|
||||
}
|
||||
allCandidates = selectedNodes
|
||||
}
|
||||
|
||||
return allCandidates[0]
|
||||
}
|
||||
|
||||
// getLowerPriorityNominatedPods returns pods whose priority is smaller than the
|
||||
// priority of the given "pod" and are nominated to run on the given node.
|
||||
// Note: We could possibly check if the nominated lower priority pods still fit
|
||||
// and return those that no longer fit, but that would require lots of
|
||||
// manipulation of NodeInfo and PreFilter state per nominated pod. It may not be
|
||||
// worth the complexity, especially because we generally expect to have a very
|
||||
// small number of nominated pods per node.
|
||||
func getLowerPriorityNominatedPods(logger klog.Logger, pn framework.PodNominator, pod *v1.Pod, nodeName string) []*v1.Pod {
|
||||
podInfos := pn.NominatedPodsForNode(nodeName)
|
||||
|
||||
if len(podInfos) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var lowerPriorityPods []*v1.Pod
|
||||
podPriority := corev1helpers.PodPriority(pod)
|
||||
for _, pi := range podInfos {
|
||||
if corev1helpers.PodPriority(pi.Pod) < podPriority {
|
||||
lowerPriorityPods = append(lowerPriorityPods, pi.Pod)
|
||||
}
|
||||
}
|
||||
return lowerPriorityPods
|
||||
}
|
||||
|
||||
// DryRunPreemption simulates Preemption logic on <potentialNodes> in parallel,
|
||||
// returns preemption candidates and a map indicating filtered nodes statuses.
|
||||
// The number of candidates depends on the constraints defined in the plugin's args. In the returned list of
|
||||
// candidates, ones that do not violate PDB are preferred over ones that do.
|
||||
// NOTE: This method is exported for easier testing in default preemption.
|
||||
func (ev *Evaluator) DryRunPreemption(ctx context.Context, state *framework.CycleState, pod *v1.Pod, potentialNodes []*framework.NodeInfo,
|
||||
pdbs []*policy.PodDisruptionBudget, offset int32, candidatesNum int32) ([]Candidate, *framework.NodeToStatus, error) {
|
||||
|
||||
fh := ev.Handler
|
||||
nonViolatingCandidates := newCandidateList(candidatesNum)
|
||||
violatingCandidates := newCandidateList(candidatesNum)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
nodeStatuses := framework.NewDefaultNodeToStatus()
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(5).Info("Dry run the preemption", "potentialNodesNumber", len(potentialNodes), "pdbsNumber", len(pdbs), "offset", offset, "candidatesNumber", candidatesNum)
|
||||
|
||||
var statusesLock sync.Mutex
|
||||
var errs []error
|
||||
checkNode := func(i int) {
|
||||
nodeInfoCopy := potentialNodes[(int(offset)+i)%len(potentialNodes)].Snapshot()
|
||||
logger.V(5).Info("Check the potential node for preemption", "node", nodeInfoCopy.Node().Name)
|
||||
|
||||
stateCopy := state.Clone()
|
||||
pods, numPDBViolations, status := ev.SelectVictimsOnNode(ctx, stateCopy, pod, nodeInfoCopy, pdbs)
|
||||
if status.IsSuccess() && len(pods) != 0 {
|
||||
victims := extenderv1.Victims{
|
||||
Pods: pods,
|
||||
NumPDBViolations: int64(numPDBViolations),
|
||||
}
|
||||
c := &candidate{
|
||||
victims: &victims,
|
||||
name: nodeInfoCopy.Node().Name,
|
||||
}
|
||||
if numPDBViolations == 0 {
|
||||
nonViolatingCandidates.add(c)
|
||||
} else {
|
||||
violatingCandidates.add(c)
|
||||
}
|
||||
nvcSize, vcSize := nonViolatingCandidates.size(), violatingCandidates.size()
|
||||
if nvcSize > 0 && nvcSize+vcSize >= candidatesNum {
|
||||
cancel()
|
||||
}
|
||||
return
|
||||
}
|
||||
if status.IsSuccess() && len(pods) == 0 {
|
||||
status = framework.AsStatus(fmt.Errorf("expected at least one victim pod on node %q", nodeInfoCopy.Node().Name))
|
||||
}
|
||||
statusesLock.Lock()
|
||||
if status.Code() == framework.Error {
|
||||
errs = append(errs, status.AsError())
|
||||
}
|
||||
nodeStatuses.Set(nodeInfoCopy.Node().Name, status)
|
||||
statusesLock.Unlock()
|
||||
}
|
||||
fh.Parallelizer().Until(ctx, len(potentialNodes), checkNode, ev.PluginName)
|
||||
return append(nonViolatingCandidates.get(), violatingCandidates.get()...), nodeStatuses, utilerrors.NewAggregate(errs)
|
||||
}
|
1671
vendor/k8s.io/kubernetes/pkg/scheduler/framework/runtime/framework.go
generated
vendored
Normal file
1671
vendor/k8s.io/kubernetes/pkg/scheduler/framework/runtime/framework.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
83
vendor/k8s.io/kubernetes/pkg/scheduler/framework/runtime/instrumented_plugins.go
generated
vendored
Normal file
83
vendor/k8s.io/kubernetes/pkg/scheduler/framework/runtime/instrumented_plugins.go
generated
vendored
Normal file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
compbasemetrics "k8s.io/component-base/metrics"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
type instrumentedFilterPlugin struct {
|
||||
framework.FilterPlugin
|
||||
|
||||
metric compbasemetrics.CounterMetric
|
||||
}
|
||||
|
||||
var _ framework.FilterPlugin = &instrumentedFilterPlugin{}
|
||||
|
||||
func (p *instrumentedFilterPlugin) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
|
||||
p.metric.Inc()
|
||||
return p.FilterPlugin.Filter(ctx, state, pod, nodeInfo)
|
||||
}
|
||||
|
||||
type instrumentedPreFilterPlugin struct {
|
||||
framework.PreFilterPlugin
|
||||
|
||||
metric compbasemetrics.CounterMetric
|
||||
}
|
||||
|
||||
var _ framework.PreFilterPlugin = &instrumentedPreFilterPlugin{}
|
||||
|
||||
func (p *instrumentedPreFilterPlugin) PreFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
|
||||
result, status := p.PreFilterPlugin.PreFilter(ctx, state, pod)
|
||||
if !status.IsSkip() {
|
||||
p.metric.Inc()
|
||||
}
|
||||
return result, status
|
||||
}
|
||||
|
||||
type instrumentedPreScorePlugin struct {
|
||||
framework.PreScorePlugin
|
||||
|
||||
metric compbasemetrics.CounterMetric
|
||||
}
|
||||
|
||||
var _ framework.PreScorePlugin = &instrumentedPreScorePlugin{}
|
||||
|
||||
func (p *instrumentedPreScorePlugin) PreScore(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodes []*framework.NodeInfo) *framework.Status {
|
||||
status := p.PreScorePlugin.PreScore(ctx, state, pod, nodes)
|
||||
if !status.IsSkip() {
|
||||
p.metric.Inc()
|
||||
}
|
||||
return status
|
||||
}
|
||||
|
||||
type instrumentedScorePlugin struct {
|
||||
framework.ScorePlugin
|
||||
|
||||
metric compbasemetrics.CounterMetric
|
||||
}
|
||||
|
||||
var _ framework.ScorePlugin = &instrumentedScorePlugin{}
|
||||
|
||||
func (p *instrumentedScorePlugin) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
|
||||
p.metric.Inc()
|
||||
return p.ScorePlugin.Score(ctx, state, pod, nodeName)
|
||||
}
|
101
vendor/k8s.io/kubernetes/pkg/scheduler/framework/runtime/registry.go
generated
vendored
Normal file
101
vendor/k8s.io/kubernetes/pkg/scheduler/framework/runtime/registry.go
generated
vendored
Normal file
@ -0,0 +1,101 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/json"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
// PluginFactory is a function that builds a plugin.
|
||||
type PluginFactory = func(ctx context.Context, configuration runtime.Object, f framework.Handle) (framework.Plugin, error)
|
||||
|
||||
// PluginFactoryWithFts is a function that builds a plugin with certain feature gates.
|
||||
type PluginFactoryWithFts func(context.Context, runtime.Object, framework.Handle, plfeature.Features) (framework.Plugin, error)
|
||||
|
||||
// FactoryAdapter can be used to inject feature gates for a plugin that needs
|
||||
// them when the caller expects the older PluginFactory method.
|
||||
func FactoryAdapter(fts plfeature.Features, withFts PluginFactoryWithFts) PluginFactory {
|
||||
return func(ctx context.Context, plArgs runtime.Object, fh framework.Handle) (framework.Plugin, error) {
|
||||
return withFts(ctx, plArgs, fh, fts)
|
||||
}
|
||||
}
|
||||
|
||||
// DecodeInto decodes configuration whose type is *runtime.Unknown to the interface into.
|
||||
func DecodeInto(obj runtime.Object, into interface{}) error {
|
||||
if obj == nil {
|
||||
return nil
|
||||
}
|
||||
configuration, ok := obj.(*runtime.Unknown)
|
||||
if !ok {
|
||||
return fmt.Errorf("want args of type runtime.Unknown, got %T", obj)
|
||||
}
|
||||
if configuration.Raw == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch configuration.ContentType {
|
||||
// If ContentType is empty, it means ContentTypeJSON by default.
|
||||
case runtime.ContentTypeJSON, "":
|
||||
return json.Unmarshal(configuration.Raw, into)
|
||||
case runtime.ContentTypeYAML:
|
||||
return yaml.Unmarshal(configuration.Raw, into)
|
||||
default:
|
||||
return fmt.Errorf("not supported content type %s", configuration.ContentType)
|
||||
}
|
||||
}
|
||||
|
||||
// Registry is a collection of all available plugins. The framework uses a
|
||||
// registry to enable and initialize configured plugins.
|
||||
// All plugins must be in the registry before initializing the framework.
|
||||
type Registry map[string]PluginFactory
|
||||
|
||||
// Register adds a new plugin to the registry. If a plugin with the same name
|
||||
// exists, it returns an error.
|
||||
func (r Registry) Register(name string, factory PluginFactory) error {
|
||||
if _, ok := r[name]; ok {
|
||||
return fmt.Errorf("a plugin named %v already exists", name)
|
||||
}
|
||||
r[name] = factory
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unregister removes an existing plugin from the registry. If no plugin with
|
||||
// the provided name exists, it returns an error.
|
||||
func (r Registry) Unregister(name string) error {
|
||||
if _, ok := r[name]; !ok {
|
||||
return fmt.Errorf("no plugin named %v exists", name)
|
||||
}
|
||||
delete(r, name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Merge merges the provided registry to the current one.
|
||||
func (r Registry) Merge(in Registry) error {
|
||||
for name, factory := range in {
|
||||
if err := r.Register(name, factory); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
165
vendor/k8s.io/kubernetes/pkg/scheduler/framework/runtime/waiting_pods_map.go
generated
vendored
Normal file
165
vendor/k8s.io/kubernetes/pkg/scheduler/framework/runtime/waiting_pods_map.go
generated
vendored
Normal file
@ -0,0 +1,165 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// waitingPodsMap a thread-safe map used to maintain pods waiting in the permit phase.
|
||||
type waitingPodsMap struct {
|
||||
pods map[types.UID]*waitingPod
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// NewWaitingPodsMap returns a new waitingPodsMap.
|
||||
func NewWaitingPodsMap() *waitingPodsMap {
|
||||
return &waitingPodsMap{
|
||||
pods: make(map[types.UID]*waitingPod),
|
||||
}
|
||||
}
|
||||
|
||||
// add a new WaitingPod to the map.
|
||||
func (m *waitingPodsMap) add(wp *waitingPod) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
m.pods[wp.GetPod().UID] = wp
|
||||
}
|
||||
|
||||
// remove a WaitingPod from the map.
|
||||
func (m *waitingPodsMap) remove(uid types.UID) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
delete(m.pods, uid)
|
||||
}
|
||||
|
||||
// get a WaitingPod from the map.
|
||||
func (m *waitingPodsMap) get(uid types.UID) *waitingPod {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
return m.pods[uid]
|
||||
}
|
||||
|
||||
// iterate acquires a read lock and iterates over the WaitingPods map.
|
||||
func (m *waitingPodsMap) iterate(callback func(framework.WaitingPod)) {
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
for _, v := range m.pods {
|
||||
callback(v)
|
||||
}
|
||||
}
|
||||
|
||||
// waitingPod represents a pod waiting in the permit phase.
|
||||
type waitingPod struct {
|
||||
pod *v1.Pod
|
||||
pendingPlugins map[string]*time.Timer
|
||||
s chan *framework.Status
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
var _ framework.WaitingPod = &waitingPod{}
|
||||
|
||||
// newWaitingPod returns a new waitingPod instance.
|
||||
func newWaitingPod(pod *v1.Pod, pluginsMaxWaitTime map[string]time.Duration) *waitingPod {
|
||||
wp := &waitingPod{
|
||||
pod: pod,
|
||||
// Allow() and Reject() calls are non-blocking. This property is guaranteed
|
||||
// by using non-blocking send to this channel. This channel has a buffer of size 1
|
||||
// to ensure that non-blocking send will not be ignored - possible situation when
|
||||
// receiving from this channel happens after non-blocking send.
|
||||
s: make(chan *framework.Status, 1),
|
||||
}
|
||||
|
||||
wp.pendingPlugins = make(map[string]*time.Timer, len(pluginsMaxWaitTime))
|
||||
// The time.AfterFunc calls wp.Reject which iterates through pendingPlugins map. Acquire the
|
||||
// lock here so that time.AfterFunc can only execute after newWaitingPod finishes.
|
||||
wp.mu.Lock()
|
||||
defer wp.mu.Unlock()
|
||||
for k, v := range pluginsMaxWaitTime {
|
||||
plugin, waitTime := k, v
|
||||
wp.pendingPlugins[plugin] = time.AfterFunc(waitTime, func() {
|
||||
msg := fmt.Sprintf("rejected due to timeout after waiting %v at plugin %v",
|
||||
waitTime, plugin)
|
||||
wp.Reject(plugin, msg)
|
||||
})
|
||||
}
|
||||
|
||||
return wp
|
||||
}
|
||||
|
||||
// GetPod returns a reference to the waiting pod.
|
||||
func (w *waitingPod) GetPod() *v1.Pod {
|
||||
return w.pod
|
||||
}
|
||||
|
||||
// GetPendingPlugins returns a list of pending permit plugin's name.
|
||||
func (w *waitingPod) GetPendingPlugins() []string {
|
||||
w.mu.RLock()
|
||||
defer w.mu.RUnlock()
|
||||
plugins := make([]string, 0, len(w.pendingPlugins))
|
||||
for p := range w.pendingPlugins {
|
||||
plugins = append(plugins, p)
|
||||
}
|
||||
|
||||
return plugins
|
||||
}
|
||||
|
||||
// Allow declares the waiting pod is allowed to be scheduled by plugin pluginName.
|
||||
// If this is the last remaining plugin to allow, then a success signal is delivered
|
||||
// to unblock the pod.
|
||||
func (w *waitingPod) Allow(pluginName string) {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
if timer, exist := w.pendingPlugins[pluginName]; exist {
|
||||
timer.Stop()
|
||||
delete(w.pendingPlugins, pluginName)
|
||||
}
|
||||
|
||||
// Only signal success status after all plugins have allowed
|
||||
if len(w.pendingPlugins) != 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// The select clause works as a non-blocking send.
|
||||
// If there is no receiver, it's a no-op (default case).
|
||||
select {
|
||||
case w.s <- framework.NewStatus(framework.Success, ""):
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// Reject declares the waiting pod unschedulable.
|
||||
func (w *waitingPod) Reject(pluginName, msg string) {
|
||||
w.mu.RLock()
|
||||
defer w.mu.RUnlock()
|
||||
for _, timer := range w.pendingPlugins {
|
||||
timer.Stop()
|
||||
}
|
||||
|
||||
// The select clause works as a non-blocking send.
|
||||
// If there is no receiver, it's a no-op (default case).
|
||||
select {
|
||||
case w.s <- framework.NewStatus(framework.Unschedulable, msg).WithPlugin(pluginName):
|
||||
default:
|
||||
}
|
||||
}
|
1302
vendor/k8s.io/kubernetes/pkg/scheduler/framework/types.go
generated
vendored
Normal file
1302
vendor/k8s.io/kubernetes/pkg/scheduler/framework/types.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
224
vendor/k8s.io/kubernetes/pkg/scheduler/metrics/metric_recorder.go
generated
vendored
Normal file
224
vendor/k8s.io/kubernetes/pkg/scheduler/metrics/metric_recorder.go
generated
vendored
Normal file
@ -0,0 +1,224 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"k8s.io/component-base/metrics"
|
||||
)
|
||||
|
||||
// MetricRecorder represents a metric recorder which takes action when the
|
||||
// metric Inc(), Dec() and Clear()
|
||||
type MetricRecorder interface {
|
||||
Inc()
|
||||
Dec()
|
||||
Clear()
|
||||
}
|
||||
|
||||
var _ MetricRecorder = &PendingPodsRecorder{}
|
||||
|
||||
// PendingPodsRecorder is an implementation of MetricRecorder
|
||||
type PendingPodsRecorder struct {
|
||||
recorder metrics.GaugeMetric
|
||||
}
|
||||
|
||||
// NewActivePodsRecorder returns ActivePods in a Prometheus metric fashion
|
||||
func NewActivePodsRecorder() *PendingPodsRecorder {
|
||||
return &PendingPodsRecorder{
|
||||
recorder: ActivePods(),
|
||||
}
|
||||
}
|
||||
|
||||
// NewUnschedulablePodsRecorder returns UnschedulablePods in a Prometheus metric fashion
|
||||
func NewUnschedulablePodsRecorder() *PendingPodsRecorder {
|
||||
return &PendingPodsRecorder{
|
||||
recorder: UnschedulablePods(),
|
||||
}
|
||||
}
|
||||
|
||||
// NewBackoffPodsRecorder returns BackoffPods in a Prometheus metric fashion
|
||||
func NewBackoffPodsRecorder() *PendingPodsRecorder {
|
||||
return &PendingPodsRecorder{
|
||||
recorder: BackoffPods(),
|
||||
}
|
||||
}
|
||||
|
||||
// NewGatedPodsRecorder returns GatedPods in a Prometheus metric fashion
|
||||
func NewGatedPodsRecorder() *PendingPodsRecorder {
|
||||
return &PendingPodsRecorder{
|
||||
recorder: GatedPods(),
|
||||
}
|
||||
}
|
||||
|
||||
// Inc increases a metric counter by 1, in an atomic way
|
||||
func (r *PendingPodsRecorder) Inc() {
|
||||
r.recorder.Inc()
|
||||
}
|
||||
|
||||
// Dec decreases a metric counter by 1, in an atomic way
|
||||
func (r *PendingPodsRecorder) Dec() {
|
||||
r.recorder.Dec()
|
||||
}
|
||||
|
||||
// Clear set a metric counter to 0, in an atomic way
|
||||
func (r *PendingPodsRecorder) Clear() {
|
||||
r.recorder.Set(float64(0))
|
||||
}
|
||||
|
||||
// histogramVecMetric is the data structure passed in the buffer channel between the main framework thread
|
||||
// and the metricsRecorder goroutine.
|
||||
type histogramVecMetric struct {
|
||||
metric *metrics.HistogramVec
|
||||
labelValues []string
|
||||
value float64
|
||||
}
|
||||
|
||||
type gaugeVecMetric struct {
|
||||
metric *metrics.GaugeVec
|
||||
labelValues []string
|
||||
valueToAdd float64
|
||||
}
|
||||
|
||||
type gaugeVecMetricKey struct {
|
||||
metricName string
|
||||
labelValue string
|
||||
}
|
||||
|
||||
// MetricAsyncRecorder records metric in a separate goroutine to avoid overhead in the critical path.
|
||||
type MetricAsyncRecorder struct {
|
||||
// bufferCh is a channel that serves as a metrics buffer before the metricsRecorder goroutine reports it.
|
||||
bufferCh chan *histogramVecMetric
|
||||
// if bufferSize is reached, incoming metrics will be discarded.
|
||||
bufferSize int
|
||||
// how often the recorder runs to flush the metrics.
|
||||
interval time.Duration
|
||||
|
||||
// aggregatedInflightEventMetric is only to record InFlightEvents metric asynchronously.
|
||||
// It's a map from gaugeVecMetricKey to the aggregated value
|
||||
// and the aggregated value is flushed to Prometheus every time the interval is reached.
|
||||
// Note that we don't lock the map deliberately because we assume the queue takes lock before updating the in-flight events.
|
||||
aggregatedInflightEventMetric map[gaugeVecMetricKey]int
|
||||
aggregatedInflightEventMetricLastFlushTime time.Time
|
||||
aggregatedInflightEventMetricBufferCh chan *gaugeVecMetric
|
||||
|
||||
// stopCh is used to stop the goroutine which periodically flushes metrics.
|
||||
stopCh <-chan struct{}
|
||||
// IsStoppedCh indicates whether the goroutine is stopped. It's used in tests only to make sure
|
||||
// the metric flushing goroutine is stopped so that tests can collect metrics for verification.
|
||||
IsStoppedCh chan struct{}
|
||||
}
|
||||
|
||||
func NewMetricsAsyncRecorder(bufferSize int, interval time.Duration, stopCh <-chan struct{}) *MetricAsyncRecorder {
|
||||
recorder := &MetricAsyncRecorder{
|
||||
bufferCh: make(chan *histogramVecMetric, bufferSize),
|
||||
bufferSize: bufferSize,
|
||||
interval: interval,
|
||||
stopCh: stopCh,
|
||||
aggregatedInflightEventMetric: make(map[gaugeVecMetricKey]int),
|
||||
aggregatedInflightEventMetricLastFlushTime: time.Now(),
|
||||
aggregatedInflightEventMetricBufferCh: make(chan *gaugeVecMetric, bufferSize),
|
||||
IsStoppedCh: make(chan struct{}),
|
||||
}
|
||||
go recorder.run()
|
||||
return recorder
|
||||
}
|
||||
|
||||
// ObservePluginDurationAsync observes the plugin_execution_duration_seconds metric.
|
||||
// The metric will be flushed to Prometheus asynchronously.
|
||||
func (r *MetricAsyncRecorder) ObservePluginDurationAsync(extensionPoint, pluginName, status string, value float64) {
|
||||
r.observeMetricAsync(PluginExecutionDuration, value, pluginName, extensionPoint, status)
|
||||
}
|
||||
|
||||
// ObserveQueueingHintDurationAsync observes the queueing_hint_execution_duration_seconds metric.
|
||||
// The metric will be flushed to Prometheus asynchronously.
|
||||
func (r *MetricAsyncRecorder) ObserveQueueingHintDurationAsync(pluginName, event, hint string, value float64) {
|
||||
r.observeMetricAsync(queueingHintExecutionDuration, value, pluginName, event, hint)
|
||||
}
|
||||
|
||||
// ObserveInFlightEventsAsync observes the in_flight_events metric.
|
||||
//
|
||||
// Note that this function is not goroutine-safe;
|
||||
// we don't lock the map deliberately for the performance reason and we assume the queue (i.e., the caller) takes lock before updating the in-flight events.
|
||||
func (r *MetricAsyncRecorder) ObserveInFlightEventsAsync(eventLabel string, valueToAdd float64, forceFlush bool) {
|
||||
r.aggregatedInflightEventMetric[gaugeVecMetricKey{metricName: InFlightEvents.Name, labelValue: eventLabel}] += int(valueToAdd)
|
||||
|
||||
// Only flush the metric to the channel if the interval is reached.
|
||||
// The values are flushed to Prometheus in the run() function, which runs once the interval time.
|
||||
// Note: we implement this flushing here, not in FlushMetrics, because, if we did so, we would need to implement a lock for the map, which we want to avoid.
|
||||
if forceFlush || time.Since(r.aggregatedInflightEventMetricLastFlushTime) > r.interval {
|
||||
for key, value := range r.aggregatedInflightEventMetric {
|
||||
newMetric := &gaugeVecMetric{
|
||||
metric: InFlightEvents,
|
||||
labelValues: []string{key.labelValue},
|
||||
valueToAdd: float64(value),
|
||||
}
|
||||
select {
|
||||
case r.aggregatedInflightEventMetricBufferCh <- newMetric:
|
||||
default:
|
||||
}
|
||||
}
|
||||
r.aggregatedInflightEventMetricLastFlushTime = time.Now()
|
||||
// reset
|
||||
r.aggregatedInflightEventMetric = make(map[gaugeVecMetricKey]int)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *MetricAsyncRecorder) observeMetricAsync(m *metrics.HistogramVec, value float64, labelsValues ...string) {
|
||||
newMetric := &histogramVecMetric{
|
||||
metric: m,
|
||||
labelValues: labelsValues,
|
||||
value: value,
|
||||
}
|
||||
select {
|
||||
case r.bufferCh <- newMetric:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// run flushes buffered metrics into Prometheus every second.
|
||||
func (r *MetricAsyncRecorder) run() {
|
||||
for {
|
||||
select {
|
||||
case <-r.stopCh:
|
||||
close(r.IsStoppedCh)
|
||||
return
|
||||
default:
|
||||
}
|
||||
r.FlushMetrics()
|
||||
time.Sleep(r.interval)
|
||||
}
|
||||
}
|
||||
|
||||
// FlushMetrics tries to clean up the bufferCh by reading at most bufferSize metrics.
|
||||
func (r *MetricAsyncRecorder) FlushMetrics() {
|
||||
for i := 0; i < r.bufferSize; i++ {
|
||||
select {
|
||||
case m := <-r.bufferCh:
|
||||
m.metric.WithLabelValues(m.labelValues...).Observe(m.value)
|
||||
default:
|
||||
// no more value
|
||||
}
|
||||
|
||||
select {
|
||||
case m := <-r.aggregatedInflightEventMetricBufferCh:
|
||||
m.metric.WithLabelValues(m.labelValues...).Add(m.valueToAdd)
|
||||
default:
|
||||
// no more value
|
||||
}
|
||||
}
|
||||
}
|
416
vendor/k8s.io/kubernetes/pkg/scheduler/metrics/metrics.go
generated
vendored
Normal file
416
vendor/k8s.io/kubernetes/pkg/scheduler/metrics/metrics.go
generated
vendored
Normal file
@ -0,0 +1,416 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
volumebindingmetrics "k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
// SchedulerSubsystem - subsystem name used by scheduler.
|
||||
SchedulerSubsystem = "scheduler"
|
||||
)
|
||||
|
||||
// Below are possible values for the work and operation label.
|
||||
const (
|
||||
// PrioritizingExtender - prioritizing extender work/operation label value.
|
||||
PrioritizingExtender = "prioritizing_extender"
|
||||
// Binding - binding work/operation label value.
|
||||
Binding = "binding"
|
||||
)
|
||||
|
||||
const (
|
||||
GoroutineResultSuccess = "success"
|
||||
GoroutineResultError = "error"
|
||||
)
|
||||
|
||||
// ExtentionPoints is a list of possible values for the extension_point label.
|
||||
var ExtentionPoints = []string{
|
||||
PreFilter,
|
||||
Filter,
|
||||
PreFilterExtensionAddPod,
|
||||
PreFilterExtensionRemovePod,
|
||||
PostFilter,
|
||||
PreScore,
|
||||
Score,
|
||||
ScoreExtensionNormalize,
|
||||
PreBind,
|
||||
Bind,
|
||||
PostBind,
|
||||
Reserve,
|
||||
Unreserve,
|
||||
Permit,
|
||||
}
|
||||
|
||||
const (
|
||||
PreFilter = "PreFilter"
|
||||
Filter = "Filter"
|
||||
PreFilterExtensionAddPod = "PreFilterExtensionAddPod"
|
||||
PreFilterExtensionRemovePod = "PreFilterExtensionRemovePod"
|
||||
PostFilter = "PostFilter"
|
||||
PreScore = "PreScore"
|
||||
Score = "Score"
|
||||
ScoreExtensionNormalize = "ScoreExtensionNormalize"
|
||||
PreBind = "PreBind"
|
||||
Bind = "Bind"
|
||||
PostBind = "PostBind"
|
||||
Reserve = "Reserve"
|
||||
Unreserve = "Unreserve"
|
||||
Permit = "Permit"
|
||||
)
|
||||
|
||||
const (
|
||||
QueueingHintResultQueue = "Queue"
|
||||
QueueingHintResultQueueSkip = "QueueSkip"
|
||||
QueueingHintResultError = "Error"
|
||||
)
|
||||
|
||||
const (
|
||||
PodPoppedInFlightEvent = "PodPopped"
|
||||
)
|
||||
|
||||
// All the histogram based metrics have 1ms as size for the smallest bucket.
|
||||
var (
|
||||
scheduleAttempts *metrics.CounterVec
|
||||
EventHandlingLatency *metrics.HistogramVec
|
||||
schedulingLatency *metrics.HistogramVec
|
||||
SchedulingAlgorithmLatency *metrics.Histogram
|
||||
PreemptionVictims *metrics.Histogram
|
||||
PreemptionAttempts *metrics.Counter
|
||||
pendingPods *metrics.GaugeVec
|
||||
InFlightEvents *metrics.GaugeVec
|
||||
Goroutines *metrics.GaugeVec
|
||||
|
||||
// PodSchedulingDuration is deprecated as of Kubernetes v1.28, and will be removed
|
||||
// in v1.31. Please use PodSchedulingSLIDuration instead.
|
||||
PodSchedulingDuration *metrics.HistogramVec
|
||||
PodSchedulingSLIDuration *metrics.HistogramVec
|
||||
PodSchedulingAttempts *metrics.Histogram
|
||||
FrameworkExtensionPointDuration *metrics.HistogramVec
|
||||
PluginExecutionDuration *metrics.HistogramVec
|
||||
|
||||
PermitWaitDuration *metrics.HistogramVec
|
||||
CacheSize *metrics.GaugeVec
|
||||
unschedulableReasons *metrics.GaugeVec
|
||||
PluginEvaluationTotal *metrics.CounterVec
|
||||
|
||||
// The below two are only available when the QHint feature gate is enabled.
|
||||
queueingHintExecutionDuration *metrics.HistogramVec
|
||||
SchedulerQueueIncomingPods *metrics.CounterVec
|
||||
|
||||
// The below two are only available when the async-preemption feature gate is enabled.
|
||||
PreemptionGoroutinesDuration *metrics.HistogramVec
|
||||
PreemptionGoroutinesExecutionTotal *metrics.CounterVec
|
||||
|
||||
// metricsList is a list of all metrics that should be registered always, regardless of any feature gate's value.
|
||||
metricsList []metrics.Registerable
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
// Register all metrics.
|
||||
func Register() {
|
||||
// Register the metrics.
|
||||
registerMetrics.Do(func() {
|
||||
InitMetrics()
|
||||
RegisterMetrics(metricsList...)
|
||||
volumebindingmetrics.RegisterVolumeSchedulingMetrics()
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints) {
|
||||
RegisterMetrics(queueingHintExecutionDuration, InFlightEvents)
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.SchedulerAsyncPreemption) {
|
||||
RegisterMetrics(PreemptionGoroutinesDuration, PreemptionGoroutinesExecutionTotal)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func InitMetrics() {
|
||||
scheduleAttempts = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "schedule_attempts_total",
|
||||
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
|
||||
StabilityLevel: metrics.STABLE,
|
||||
}, []string{"result", "profile"})
|
||||
|
||||
EventHandlingLatency = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "event_handling_duration_seconds",
|
||||
Help: "Event handling latency in seconds.",
|
||||
// Start with 0.1ms with the last bucket being [~200ms, Inf)
|
||||
Buckets: metrics.ExponentialBuckets(0.0001, 2, 12),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
}, []string{"event"})
|
||||
|
||||
schedulingLatency = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "scheduling_attempt_duration_seconds",
|
||||
Help: "Scheduling attempt latency in seconds (scheduling algorithm + binding)",
|
||||
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
|
||||
StabilityLevel: metrics.STABLE,
|
||||
}, []string{"result", "profile"})
|
||||
SchedulingAlgorithmLatency = metrics.NewHistogram(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "scheduling_algorithm_duration_seconds",
|
||||
Help: "Scheduling algorithm latency in seconds",
|
||||
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
PreemptionVictims = metrics.NewHistogram(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "preemption_victims",
|
||||
Help: "Number of selected preemption victims",
|
||||
// we think #victims>64 is pretty rare, therefore [64, +Inf) is considered a single bucket.
|
||||
Buckets: metrics.ExponentialBuckets(1, 2, 7),
|
||||
StabilityLevel: metrics.STABLE,
|
||||
})
|
||||
PreemptionAttempts = metrics.NewCounter(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "preemption_attempts_total",
|
||||
Help: "Total preemption attempts in the cluster till now",
|
||||
StabilityLevel: metrics.STABLE,
|
||||
})
|
||||
pendingPods = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "pending_pods",
|
||||
Help: "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulablePods that the scheduler attempted to schedule and failed; 'gated' is the number of unschedulable pods that the scheduler never attempted to schedule because they are gated.",
|
||||
StabilityLevel: metrics.STABLE,
|
||||
}, []string{"queue"})
|
||||
InFlightEvents = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "inflight_events",
|
||||
Help: "Number of events currently tracked in the scheduling queue.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
}, []string{"event"})
|
||||
Goroutines = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "goroutines",
|
||||
Help: "Number of running goroutines split by the work they do such as binding.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
}, []string{"operation"})
|
||||
|
||||
// PodSchedulingDuration is deprecated as of Kubernetes v1.28, and will be removed
|
||||
// in v1.31. Please use PodSchedulingSLIDuration instead.
|
||||
PodSchedulingDuration = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "pod_scheduling_duration_seconds",
|
||||
Help: "E2e latency for a pod being scheduled which may include multiple scheduling attempts.",
|
||||
// Start with 10ms with the last bucket being [~88m, Inf).
|
||||
Buckets: metrics.ExponentialBuckets(0.01, 2, 20),
|
||||
StabilityLevel: metrics.STABLE,
|
||||
DeprecatedVersion: "1.29.0",
|
||||
},
|
||||
[]string{"attempts"})
|
||||
|
||||
PodSchedulingSLIDuration = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "pod_scheduling_sli_duration_seconds",
|
||||
Help: "E2e latency for a pod being scheduled, from the time the pod enters the scheduling queue and might involve multiple scheduling attempts.",
|
||||
// Start with 10ms with the last bucket being [~88m, Inf).
|
||||
Buckets: metrics.ExponentialBuckets(0.01, 2, 20),
|
||||
StabilityLevel: metrics.BETA,
|
||||
},
|
||||
[]string{"attempts"})
|
||||
|
||||
PodSchedulingAttempts = metrics.NewHistogram(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "pod_scheduling_attempts",
|
||||
Help: "Number of attempts to successfully schedule a pod.",
|
||||
Buckets: metrics.ExponentialBuckets(1, 2, 5),
|
||||
StabilityLevel: metrics.STABLE,
|
||||
})
|
||||
|
||||
FrameworkExtensionPointDuration = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "framework_extension_point_duration_seconds",
|
||||
Help: "Latency for running all plugins of a specific extension point.",
|
||||
// Start with 0.1ms with the last bucket being [~200ms, Inf)
|
||||
Buckets: metrics.ExponentialBuckets(0.0001, 2, 12),
|
||||
StabilityLevel: metrics.STABLE,
|
||||
},
|
||||
[]string{"extension_point", "status", "profile"})
|
||||
|
||||
PluginExecutionDuration = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "plugin_execution_duration_seconds",
|
||||
Help: "Duration for running a plugin at a specific extension point.",
|
||||
// Start with 0.01ms with the last bucket being [~22ms, Inf). We use a small factor (1.5)
|
||||
// so that we have better granularity since plugin latency is very sensitive.
|
||||
Buckets: metrics.ExponentialBuckets(0.00001, 1.5, 20),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"plugin", "extension_point", "status"})
|
||||
|
||||
// This is only available when the QHint feature gate is enabled.
|
||||
queueingHintExecutionDuration = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "queueing_hint_execution_duration_seconds",
|
||||
Help: "Duration for running a queueing hint function of a plugin.",
|
||||
// Start with 0.01ms with the last bucket being [~22ms, Inf). We use a small factor (1.5)
|
||||
// so that we have better granularity since plugin latency is very sensitive.
|
||||
Buckets: metrics.ExponentialBuckets(0.00001, 1.5, 20),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"plugin", "event", "hint"})
|
||||
|
||||
SchedulerQueueIncomingPods = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "queue_incoming_pods_total",
|
||||
Help: "Number of pods added to scheduling queues by event and queue type.",
|
||||
StabilityLevel: metrics.STABLE,
|
||||
}, []string{"queue", "event"})
|
||||
|
||||
PermitWaitDuration = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "permit_wait_duration_seconds",
|
||||
Help: "Duration of waiting on permit.",
|
||||
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"result"})
|
||||
|
||||
CacheSize = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "scheduler_cache_size",
|
||||
Help: "Number of nodes, pods, and assumed (bound) pods in the scheduler cache.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
}, []string{"type"})
|
||||
|
||||
unschedulableReasons = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "unschedulable_pods",
|
||||
Help: "The number of unschedulable pods broken down by plugin name. A pod will increment the gauge for all plugins that caused it to not schedule and so this metric have meaning only when broken down by plugin.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
}, []string{"plugin", "profile"})
|
||||
|
||||
PluginEvaluationTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "plugin_evaluation_total",
|
||||
Help: "Number of attempts to schedule pods by each plugin and the extension point (available only in PreFilter, Filter, PreScore, and Score).",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
}, []string{"plugin", "extension_point", "profile"})
|
||||
|
||||
PreemptionGoroutinesDuration = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "preemption_goroutines_duration_seconds",
|
||||
Help: "Duration in seconds for running goroutines for the preemption.",
|
||||
Buckets: metrics.ExponentialBuckets(0.01, 2, 20),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"result"})
|
||||
|
||||
PreemptionGoroutinesExecutionTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "preemption_goroutines_execution_total",
|
||||
Help: "Number of preemption goroutines executed.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"result"})
|
||||
|
||||
metricsList = []metrics.Registerable{
|
||||
scheduleAttempts,
|
||||
schedulingLatency,
|
||||
SchedulingAlgorithmLatency,
|
||||
EventHandlingLatency,
|
||||
PreemptionVictims,
|
||||
PreemptionAttempts,
|
||||
pendingPods,
|
||||
PodSchedulingDuration,
|
||||
PodSchedulingSLIDuration,
|
||||
PodSchedulingAttempts,
|
||||
FrameworkExtensionPointDuration,
|
||||
PluginExecutionDuration,
|
||||
SchedulerQueueIncomingPods,
|
||||
Goroutines,
|
||||
PermitWaitDuration,
|
||||
CacheSize,
|
||||
unschedulableReasons,
|
||||
PluginEvaluationTotal,
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterMetrics registers a list of metrics.
|
||||
// This function is exported because it is intended to be used by out-of-tree plugins to register their custom metrics.
|
||||
func RegisterMetrics(extraMetrics ...metrics.Registerable) {
|
||||
for _, metric := range extraMetrics {
|
||||
legacyregistry.MustRegister(metric)
|
||||
}
|
||||
}
|
||||
|
||||
// GetGather returns the gatherer. It used by test case outside current package.
|
||||
func GetGather() metrics.Gatherer {
|
||||
return legacyregistry.DefaultGatherer
|
||||
}
|
||||
|
||||
// ActivePods returns the pending pods metrics with the label active
|
||||
func ActivePods() metrics.GaugeMetric {
|
||||
return pendingPods.With(metrics.Labels{"queue": "active"})
|
||||
}
|
||||
|
||||
// BackoffPods returns the pending pods metrics with the label backoff
|
||||
func BackoffPods() metrics.GaugeMetric {
|
||||
return pendingPods.With(metrics.Labels{"queue": "backoff"})
|
||||
}
|
||||
|
||||
// UnschedulablePods returns the pending pods metrics with the label unschedulable
|
||||
func UnschedulablePods() metrics.GaugeMetric {
|
||||
return pendingPods.With(metrics.Labels{"queue": "unschedulable"})
|
||||
}
|
||||
|
||||
// GatedPods returns the pending pods metrics with the label gated
|
||||
func GatedPods() metrics.GaugeMetric {
|
||||
return pendingPods.With(metrics.Labels{"queue": "gated"})
|
||||
}
|
||||
|
||||
// SinceInSeconds gets the time since the specified start in seconds.
|
||||
func SinceInSeconds(start time.Time) float64 {
|
||||
return time.Since(start).Seconds()
|
||||
}
|
||||
|
||||
func UnschedulableReason(plugin string, profile string) metrics.GaugeMetric {
|
||||
return unschedulableReasons.With(metrics.Labels{"plugin": plugin, "profile": profile})
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user