Changes to accommodate client-go changes and kube vendor update

to v1.18.0

Signed-off-by: Humble Chirammal <hchiramm@redhat.com>
This commit is contained in:
Humble Chirammal
2020-04-14 12:34:33 +05:30
committed by mergify[bot]
parent 4c96ad3c85
commit 34fc1d847e
1083 changed files with 50505 additions and 155846 deletions

View File

@ -32,3 +32,7 @@ const HorizontalPodAutoscalerConditionsAnnotation = "autoscaling.alpha.kubernete
// metrics are present. This is here because it's used by both the v2beta1 defaulting
// logic, and the pseudo-defaulting done in v1 conversion.
const DefaultCPUUtilization = 80
// BehaviorSpecsAnnotation is the annotation which holds the HPA constraints specs
// when converting the `Behavior` field from autoscaling/v2beta2
const BehaviorSpecsAnnotation = "autoscaling.alpha.kubernetes.io/behavior"

View File

@ -95,6 +95,90 @@ type HorizontalPodAutoscalerSpec struct {
// more information about how each type of metric must respond.
// +optional
Metrics []MetricSpec
// behavior configures the scaling behavior of the target
// in both Up and Down directions (scaleUp and scaleDown fields respectively).
// If not set, the default HPAScalingRules for scale up and scale down are used.
// +optional
Behavior *HorizontalPodAutoscalerBehavior
}
// HorizontalPodAutoscalerBehavior configures a scaling behavior for Up and Down direction
// (scaleUp and scaleDown fields respectively).
type HorizontalPodAutoscalerBehavior struct {
// scaleUp is scaling policy for scaling Up.
// If not set, the default value is the higher of:
// * increase no more than 4 pods per 60 seconds
// * double the number of pods per 60 seconds
// No stabilization is used.
// +optional
ScaleUp *HPAScalingRules
// scaleDown is scaling policy for scaling Down.
// If not set, the default value is to allow to scale down to minReplicas pods, with a
// 300 second stabilization window (i.e., the highest recommendation for
// the last 300sec is used).
// +optional
ScaleDown *HPAScalingRules
}
// ScalingPolicySelect is used to specify which policy should be used while scaling in a certain direction
type ScalingPolicySelect string
const (
// MaxPolicySelect selects the policy with the highest possible change.
MaxPolicySelect ScalingPolicySelect = "Max"
// MinPolicySelect selects the policy with the lowest possible change.
MinPolicySelect ScalingPolicySelect = "Min"
// DisabledPolicySelect disables the scaling in this direction.
DisabledPolicySelect ScalingPolicySelect = "Disabled"
)
// HPAScalingRules configures the scaling behavior for one direction.
// These Rules are applied after calculating DesiredReplicas from metrics for the HPA.
// They can limit the scaling velocity by specifying scaling policies.
// They can prevent flapping by specifying the stabilization window, so that the
// number of replicas is not set instantly, instead, the safest value from the stabilization
// window is chosen.
type HPAScalingRules struct {
// StabilizationWindowSeconds is the number of seconds for which past recommendations should be
// considered while scaling up or scaling down.
// StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour).
// If not set, use the default values:
// - For scale up: 0 (i.e. no stabilization is done).
// - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
// +optional
StabilizationWindowSeconds *int32
// selectPolicy is used to specify which policy should be used.
// If not set, the default value MaxPolicySelect is used.
// +optional
SelectPolicy *ScalingPolicySelect
// policies is a list of potential scaling polices which can used during scaling.
// At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
// +optional
Policies []HPAScalingPolicy
}
// HPAScalingPolicyType is the type of the policy which could be used while making scaling decisions.
type HPAScalingPolicyType string
const (
// PodsScalingPolicy is a policy used to specify a change in absolute number of pods.
PodsScalingPolicy HPAScalingPolicyType = "Pods"
// PercentScalingPolicy is a policy used to specify a relative amount of change with respect to
// the current number of pods.
PercentScalingPolicy HPAScalingPolicyType = "Percent"
)
// HPAScalingPolicy is a single policy which must hold true for a specified past interval.
type HPAScalingPolicy struct {
// Type is used to specify the scaling policy.
Type HPAScalingPolicyType
// Value contains the amount of change which is permitted by the policy.
// It must be greater than zero
Value int32
// PeriodSeconds specifies the window of time for which the policy should hold true.
// PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min).
PeriodSeconds int32
}
// MetricSourceType indicates the type of metric.

View File

@ -77,6 +77,53 @@ func (in *ExternalMetricStatus) DeepCopy() *ExternalMetricStatus {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HPAScalingPolicy) DeepCopyInto(out *HPAScalingPolicy) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HPAScalingPolicy.
func (in *HPAScalingPolicy) DeepCopy() *HPAScalingPolicy {
if in == nil {
return nil
}
out := new(HPAScalingPolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HPAScalingRules) DeepCopyInto(out *HPAScalingRules) {
*out = *in
if in.StabilizationWindowSeconds != nil {
in, out := &in.StabilizationWindowSeconds, &out.StabilizationWindowSeconds
*out = new(int32)
**out = **in
}
if in.SelectPolicy != nil {
in, out := &in.SelectPolicy, &out.SelectPolicy
*out = new(ScalingPolicySelect)
**out = **in
}
if in.Policies != nil {
in, out := &in.Policies, &out.Policies
*out = make([]HPAScalingPolicy, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HPAScalingRules.
func (in *HPAScalingRules) DeepCopy() *HPAScalingRules {
if in == nil {
return nil
}
out := new(HPAScalingRules)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HorizontalPodAutoscaler) DeepCopyInto(out *HorizontalPodAutoscaler) {
*out = *in
@ -105,6 +152,32 @@ func (in *HorizontalPodAutoscaler) DeepCopyObject() runtime.Object {
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HorizontalPodAutoscalerBehavior) DeepCopyInto(out *HorizontalPodAutoscalerBehavior) {
*out = *in
if in.ScaleUp != nil {
in, out := &in.ScaleUp, &out.ScaleUp
*out = new(HPAScalingRules)
(*in).DeepCopyInto(*out)
}
if in.ScaleDown != nil {
in, out := &in.ScaleDown, &out.ScaleDown
*out = new(HPAScalingRules)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HorizontalPodAutoscalerBehavior.
func (in *HorizontalPodAutoscalerBehavior) DeepCopy() *HorizontalPodAutoscalerBehavior {
if in == nil {
return nil
}
out := new(HorizontalPodAutoscalerBehavior)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HorizontalPodAutoscalerCondition) DeepCopyInto(out *HorizontalPodAutoscalerCondition) {
*out = *in
@ -171,6 +244,11 @@ func (in *HorizontalPodAutoscalerSpec) DeepCopyInto(out *HorizontalPodAutoscaler
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Behavior != nil {
in, out := &in.Behavior, &out.Behavior
*out = new(HorizontalPodAutoscalerBehavior)
(*in).DeepCopyInto(*out)
}
return
}

View File

@ -40,6 +40,14 @@ func (rl *ResourceList) Memory() *resource.Quantity {
return &resource.Quantity{Format: resource.BinarySI}
}
// Storage returns the Storage limit if specified.
func (rl *ResourceList) Storage() *resource.Quantity {
if val, ok := (*rl)[ResourceStorage]; ok {
return &val
}
return &resource.Quantity{Format: resource.BinarySI}
}
// Pods returns the list of pods
func (rl *ResourceList) Pods() *resource.Quantity {
if val, ok := (*rl)[ResourcePods]; ok {

View File

@ -302,7 +302,6 @@ type PersistentVolumeSpec struct {
MountOptions []string
// volumeMode defines if a volume is intended to be used with a formatted filesystem
// or to remain in raw block state. Value of Filesystem is implied when not included in spec.
// This is a beta feature.
// +optional
VolumeMode *PersistentVolumeMode
// NodeAffinity defines constraints that limit what nodes this volume can be accessed from.
@ -416,17 +415,17 @@ type PersistentVolumeClaimSpec struct {
StorageClassName *string
// volumeMode defines what type of volume is required by the claim.
// Value of Filesystem is implied when not included in claim spec.
// This is a beta feature.
// +optional
VolumeMode *PersistentVolumeMode
// This field can be used to specify either:
// * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot)
// * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot - Beta)
// * An existing PVC (PersistentVolumeClaim)
// In order to use either of these DataSource types, the appropriate feature gate
// must be enabled (VolumeSnapshotDataSource, VolumePVCDataSource)
// If the provisioner can support the specified data source, it will create
// a new volume based on the contents of the specified PVC or Snapshot.
// If the provisioner does not support the specified data source, the volume will
// * An existing custom resource/object that implements data population (Alpha)
// In order to use VolumeSnapshot object types, the appropriate feature gate
// must be enabled (VolumeSnapshotDataSource or AnyVolumeDataSource)
// If the provisioner or an external controller can support the specified data source,
// it will create a new volume based on the contents of the specified data source.
// If the specified data source is not supported, the volume will
// not be created and the failure will be reported as an event.
// In the future, we plan to support more data source types and the behavior
// of the provisioner may change.
@ -583,9 +582,10 @@ type StorageMedium string
// These are the valid value for StorageMedium
const (
StorageMediumDefault StorageMedium = "" // use whatever the default is for the node
StorageMediumMemory StorageMedium = "Memory" // use memory (tmpfs)
StorageMediumHugePages StorageMedium = "HugePages" // use hugepages
StorageMediumDefault StorageMedium = "" // use whatever the default is for the node
StorageMediumMemory StorageMedium = "Memory" // use memory (tmpfs)
StorageMediumHugePages StorageMedium = "HugePages" // use hugepages
StorageMediumHugePagesPrefix StorageMedium = "HugePages-" // prefix for full medium notation HugePages-<size>
)
// Protocol defines network protocols supported for things like container ports.
@ -2056,7 +2056,6 @@ type Container struct {
// +optional
VolumeMounts []VolumeMount
// volumeDevices is the list of block devices to be used by the container.
// This is a beta feature.
// +optional
VolumeDevices []VolumeDevice
// +optional
@ -2765,8 +2764,7 @@ type PodSpec struct {
EnableServiceLinks *bool
// TopologySpreadConstraints describes how a group of pods ought to spread across topology
// domains. Scheduler will schedule pods in a way which abides by the constraints.
// This field is alpha-level and is only honored by clusters that enables the EvenPodsSpread
// feature.
// This field is only honored by clusters that enable the EvenPodsSpread feature.
// All topologySpreadConstraints are ANDed.
// +optional
TopologySpreadConstraints []TopologySpreadConstraint
@ -2787,6 +2785,22 @@ type Sysctl struct {
Value string
}
// PodFSGroupChangePolicy holds policies that will be used for applying fsGroup to a volume
// when volume is mounted.
type PodFSGroupChangePolicy string
const (
// FSGroupChangeOnRootMismatch indicates that volume's ownership and permissions will be changed
// only when permission and ownership of root directory does not match with expected
// permissions on the volume. This can help shorten the time it takes to change
// ownership and permissions of a volume.
FSGroupChangeOnRootMismatch PodFSGroupChangePolicy = "OnRootMismatch"
// FSGroupChangeAlways indicates that volume's ownership and permissions
// should always be changed whenever volume is mounted inside a Pod. This the default
// behavior.
FSGroupChangeAlways PodFSGroupChangePolicy = "Always"
)
// PodSecurityContext holds pod-level security attributes and common container settings.
// Some fields are also present in container.securityContext. Field values of
// container.securityContext take precedence over field values of PodSecurityContext.
@ -2866,6 +2880,14 @@ type PodSecurityContext struct {
// If unset, the Kubelet will not modify the ownership and permissions of any volume.
// +optional
FSGroup *int64
// fsGroupChangePolicy defines behavior of changing ownership and permission of the volume
// before being exposed inside Pod. This field will only apply to
// volume types which support fsGroup based ownership(and permissions).
// It will have no effect on ephemeral volume types such as: secret, configmaps
// and emptydir.
// Valid values are "OnRootMismatch" and "Always". If not specified defaults to "Always".
// +optional
FSGroupChangePolicy *PodFSGroupChangePolicy
// Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported
// sysctls (by the container runtime) might fail to launch.
// +optional
@ -2969,7 +2991,6 @@ type EphemeralContainerCommon struct {
// +optional
VolumeMounts []VolumeMount
// volumeDevices is the list of block devices to be used by the container.
// This is a beta feature.
// +optional
VolumeDevices []VolumeDevice
// Probes are not allowed for ephemeral containers.
@ -3533,6 +3554,16 @@ type ServicePort struct {
// The IP protocol for this port. Supports "TCP", "UDP", and "SCTP".
Protocol Protocol
// The application protocol for this port.
// This field follows standard Kubernetes label syntax.
// Un-prefixed names are reserved for IANA standard service names (as per
// RFC-6335 and http://www.iana.org/assignments/service-names).
// Non-standard protocols should use prefixed names such as
// mycompany.com/my-custom-protocol.
// Field can be enabled with ServiceAppProtocol feature gate.
// +optional
AppProtocol *string
// The port that will be exposed on the service.
Port int32
@ -3673,6 +3704,16 @@ type EndpointPort struct {
// The IP protocol for this port.
Protocol Protocol
// The application protocol for this port.
// This field follows standard Kubernetes label syntax.
// Un-prefixed names are reserved for IANA standard service names (as per
// RFC-6335 and http://www.iana.org/assignments/service-names).
// Non-standard protocols should use prefixed names such as
// mycompany.com/my-custom-protocol.
// Field can be enabled with ServiceAppProtocol feature gate.
// +optional
AppProtocol *string
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@ -4735,6 +4776,12 @@ type Secret struct {
// +optional
metav1.ObjectMeta
// Immutable field, if set, ensures that data stored in the Secret cannot
// be updated (only object metadata can be modified).
// This is an alpha field enabled by ImmutableEphemeralVolumes feature gate.
// +optional
Immutable *bool
// Data contains the secret data. Each key must consist of alphanumeric
// characters, '-', '_' or '.'. The serialized form of the secret data is a
// base64 encoded string, representing the arbitrary (possibly non-string)
@ -4857,6 +4904,12 @@ type ConfigMap struct {
// +optional
metav1.ObjectMeta
// Immutable field, if set, ensures that data stored in the ConfigMap cannot
// be updated (only object metadata can be modified).
// This is an alpha field enabled by ImmutableEphemeralVolumes feature gate.
// +optional
Immutable *bool
// Data contains the configuration data.
// Each key must consist of alphanumeric characters, '-', '_' or '.'.
// Values with non-UTF-8 byte sequences must use the BinaryData field.
@ -5064,14 +5117,12 @@ type SELinuxOptions struct {
// WindowsSecurityContextOptions contain Windows-specific options and credentials.
type WindowsSecurityContextOptions struct {
// GMSACredentialSpecName is the name of the GMSA credential spec to use.
// This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.
// +optional
GMSACredentialSpecName *string
// GMSACredentialSpec is where the GMSA admission webhook
// (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the
// GMSA credential spec named by the GMSACredentialSpecName field.
// This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.
// +optional
GMSACredentialSpec *string
@ -5079,7 +5130,6 @@ type WindowsSecurityContextOptions struct {
// Defaults to the user specified in image metadata if unspecified.
// May also be set in PodSecurityContext. If set in both SecurityContext and
// PodSecurityContext, the value specified in SecurityContext takes precedence.
// This field is beta-level and may be disabled with the WindowsRunAsUserName feature flag.
// +optional
RunAsUserName *string
}

View File

@ -41,10 +41,11 @@ go_test(
embed = [":go_default_library"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/api/testapi:go_default_library",
"//pkg/apis/apps:go_default_library",
"//pkg/apis/apps/install:go_default_library",
"//pkg/apis/core:go_default_library",
"//pkg/apis/core/fuzzer:go_default_library",
"//pkg/apis/core/install:go_default_library",
"//pkg/features:go_default_library",
"//staging/src/k8s.io/api/apps/v1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",

View File

@ -253,9 +253,13 @@ func Convert_v1_PodStatus_To_core_PodStatus(in *v1.PodStatus, out *core.PodStatu
return err
}
// If both fields (v1.PodIPs and v1.PodIP) are provided, then test v1.PodIP == v1.PodIPs[0]
// If both fields (v1.PodIPs and v1.PodIP) are provided and differ, then PodIP is authoritative for compatibility with older kubelets
if (len(in.PodIP) > 0 && len(in.PodIPs) > 0) && (in.PodIP != in.PodIPs[0].IP) {
return fmt.Errorf("conversion Error: v1.PodIP(%v) != v1.PodIPs[0](%v)", in.PodIP, in.PodIPs[0].IP)
out.PodIPs = []core.PodIP{
{
IP: in.PodIP,
},
}
}
// at the this point, autoConvert copied v1.PodIPs -> core.PodIPs
// if v1.PodIPs was empty but v1.PodIP is not, then set core.PodIPs[0] with v1.PodIP
@ -321,9 +325,9 @@ func Convert_v1_NodeSpec_To_core_NodeSpec(in *v1.NodeSpec, out *core.NodeSpec, s
if err := autoConvert_v1_NodeSpec_To_core_NodeSpec(in, out, s); err != nil {
return err
}
// If both fields (v1.PodCIDRs and v1.PodCIDR) are provided, then test v1.PodCIDR == v1.PodCIDRs[0]
// If both fields (v1.PodCIDRs and v1.PodCIDR) are provided and differ, then PodCIDR is authoritative for compatibility with older clients
if (len(in.PodCIDR) > 0 && len(in.PodCIDRs) > 0) && (in.PodCIDR != in.PodCIDRs[0]) {
return fmt.Errorf("conversion Error: v1.PodCIDR(%v) != v1.CIDRs[0](%v)", in.PodCIDR, in.PodCIDRs[0])
out.PodCIDRs = []string{in.PodCIDR}
}
// at the this point, autoConvert copied v1.PodCIDRs -> core.PodCIDRs
@ -506,6 +510,26 @@ func dropInitContainerAnnotations(oldAnnotations map[string]string) map[string]s
return newAnnotations
}
// Convert_core_LoadBalancerStatus_To_v1_LoadBalancerStatus is defined outside the autogenerated file for use by other API packages
func Convert_core_LoadBalancerStatus_To_v1_LoadBalancerStatus(in *core.LoadBalancerStatus, out *v1.LoadBalancerStatus, s conversion.Scope) error {
return autoConvert_core_LoadBalancerStatus_To_v1_LoadBalancerStatus(in, out, s)
}
// Convert_v1_LoadBalancerStatus_To_core_LoadBalancerStatus is defined outside the autogenerated file for use by other API packages
func Convert_v1_LoadBalancerStatus_To_core_LoadBalancerStatus(in *v1.LoadBalancerStatus, out *core.LoadBalancerStatus, s conversion.Scope) error {
return autoConvert_v1_LoadBalancerStatus_To_core_LoadBalancerStatus(in, out, s)
}
// Convert_core_Volume_To_v1_Volume is defined outside the autogenerated file for use by other API packages
func Convert_core_Volume_To_v1_Volume(in *core.Volume, out *v1.Volume, s conversion.Scope) error {
return autoConvert_core_Volume_To_v1_Volume(in, out, s)
}
// Convert_v1_Volume_To_core_Volume is defined outside the autogenerated file for use by other API packages
func Convert_v1_Volume_To_core_Volume(in *v1.Volume, out *core.Volume, s conversion.Scope) error {
return autoConvert_v1_Volume_To_core_Volume(in, out, s)
}
// Convert_core_PersistentVolumeSpec_To_v1_PersistentVolumeSpec is defined outside the autogenerated file for use by other API packages
func Convert_core_PersistentVolumeSpec_To_v1_PersistentVolumeSpec(in *core.PersistentVolumeSpec, out *v1.PersistentVolumeSpec, s conversion.Scope) error {
return autoConvert_core_PersistentVolumeSpec_To_v1_PersistentVolumeSpec(in, out, s)

View File

@ -85,6 +85,45 @@ func HugePageSizeFromResourceName(name v1.ResourceName) (resource.Quantity, erro
return resource.ParseQuantity(pageSize)
}
// HugePageUnitSizeFromByteSize returns hugepage size has the format.
// `size` must be guaranteed to divisible into the largest units that can be expressed.
// <size><unit-prefix>B (1024 = "1KB", 1048576 = "1MB", etc).
func HugePageUnitSizeFromByteSize(size int64) (string, error) {
// hugePageSizeUnitList is borrowed from opencontainers/runc/libcontainer/cgroups/utils.go
var hugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
idx := 0
len := len(hugePageSizeUnitList) - 1
for size%1024 == 0 && idx < len {
size /= 1024
idx++
}
if size > 1024 && idx < len {
return "", fmt.Errorf("size: %d%s must be guaranteed to divisible into the largest units", size, hugePageSizeUnitList[idx])
}
return fmt.Sprintf("%d%s", size, hugePageSizeUnitList[idx]), nil
}
// IsHugePageMedium returns true if the volume medium is in 'HugePages[-size]' format
func IsHugePageMedium(medium v1.StorageMedium) bool {
if medium == v1.StorageMediumHugePages {
return true
}
return strings.HasPrefix(string(medium), string(v1.StorageMediumHugePagesPrefix))
}
// HugePageSizeFromMedium returns the page size for the specified huge page medium.
// If the specified input is not a valid huge page medium an error is returned.
func HugePageSizeFromMedium(medium v1.StorageMedium) (resource.Quantity, error) {
if !IsHugePageMedium(medium) {
return resource.Quantity{}, fmt.Errorf("medium: %s is not a hugepage medium", medium)
}
if medium == v1.StorageMediumHugePages {
return resource.Quantity{}, fmt.Errorf("medium: %s doesn't have size information", medium)
}
pageSize := strings.TrimPrefix(string(medium), string(v1.StorageMediumHugePagesPrefix))
return resource.ParseQuantity(pageSize)
}
// IsOvercommitAllowed returns true if the resource is in the default
// namespace and is not hugepages.
func IsOvercommitAllowed(name v1.ResourceName) bool {
@ -395,22 +434,37 @@ type taintsFilterFunc func(*v1.Taint) bool
// TolerationsTolerateTaintsWithFilter checks if given tolerations tolerates
// all the taints that apply to the filter in given taint list.
// DEPRECATED: Please use FindMatchingUntoleratedTaint instead.
func TolerationsTolerateTaintsWithFilter(tolerations []v1.Toleration, taints []v1.Taint, applyFilter taintsFilterFunc) bool {
if len(taints) == 0 {
return true
}
_, isUntolerated := FindMatchingUntoleratedTaint(taints, tolerations, applyFilter)
return !isUntolerated
}
for i := range taints {
if applyFilter != nil && !applyFilter(&taints[i]) {
// FindMatchingUntoleratedTaint checks if the given tolerations tolerates
// all the filtered taints, and returns the first taint without a toleration
func FindMatchingUntoleratedTaint(taints []v1.Taint, tolerations []v1.Toleration, inclusionFilter taintsFilterFunc) (v1.Taint, bool) {
filteredTaints := getFilteredTaints(taints, inclusionFilter)
for _, taint := range filteredTaints {
if !TolerationsTolerateTaint(tolerations, &taint) {
return taint, true
}
}
return v1.Taint{}, false
}
// getFilteredTaints returns a list of taints satisfying the filter predicate
func getFilteredTaints(taints []v1.Taint, inclusionFilter taintsFilterFunc) []v1.Taint {
if inclusionFilter == nil {
return taints
}
filteredTaints := []v1.Taint{}
for _, taint := range taints {
if !inclusionFilter(&taint) {
continue
}
if !TolerationsTolerateTaint(tolerations, &taints[i]) {
return false
}
filteredTaints = append(filteredTaints, taint)
}
return true
return filteredTaints
}
// Returns true and list of Tolerations matching all Taints if all are tolerated, or false otherwise.

View File

@ -33,7 +33,7 @@ func init() {
localSchemeBuilder.Register(addDefaultingFuncs, addConversionFuncs)
}
// TODO: remove these global varialbes
// TODO: remove these global variables
// GroupName is the group name use in this package
const GroupName = ""

View File

@ -821,16 +821,6 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1.LoadBalancerStatus)(nil), (*core.LoadBalancerStatus)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1_LoadBalancerStatus_To_core_LoadBalancerStatus(a.(*v1.LoadBalancerStatus), b.(*core.LoadBalancerStatus), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*core.LoadBalancerStatus)(nil), (*v1.LoadBalancerStatus)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_core_LoadBalancerStatus_To_v1_LoadBalancerStatus(a.(*core.LoadBalancerStatus), b.(*v1.LoadBalancerStatus), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1.LocalObjectReference)(nil), (*core.LocalObjectReference)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1_LocalObjectReference_To_core_LocalObjectReference(a.(*v1.LocalObjectReference), b.(*core.LocalObjectReference), scope)
}); err != nil {
@ -1916,16 +1906,6 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1.Volume)(nil), (*core.Volume)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1_Volume_To_core_Volume(a.(*v1.Volume), b.(*core.Volume), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*core.Volume)(nil), (*v1.Volume)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_core_Volume_To_v1_Volume(a.(*core.Volume), b.(*v1.Volume), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1.VolumeDevice)(nil), (*core.VolumeDevice)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1_VolumeDevice_To_core_VolumeDevice(a.(*v1.VolumeDevice), b.(*core.VolumeDevice), scope)
}); err != nil {
@ -2056,6 +2036,11 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddConversionFunc((*core.LoadBalancerStatus)(nil), (*v1.LoadBalancerStatus)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_core_LoadBalancerStatus_To_v1_LoadBalancerStatus(a.(*core.LoadBalancerStatus), b.(*v1.LoadBalancerStatus), scope)
}); err != nil {
return err
}
if err := s.AddConversionFunc((*core.NodeSpec)(nil), (*v1.NodeSpec)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_core_NodeSpec_To_v1_NodeSpec(a.(*core.NodeSpec), b.(*v1.NodeSpec), scope)
}); err != nil {
@ -2091,6 +2076,16 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddConversionFunc((*core.Volume)(nil), (*v1.Volume)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_core_Volume_To_v1_Volume(a.(*core.Volume), b.(*v1.Volume), scope)
}); err != nil {
return err
}
if err := s.AddConversionFunc((*v1.LoadBalancerStatus)(nil), (*core.LoadBalancerStatus)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1_LoadBalancerStatus_To_core_LoadBalancerStatus(a.(*v1.LoadBalancerStatus), b.(*core.LoadBalancerStatus), scope)
}); err != nil {
return err
}
if err := s.AddConversionFunc((*v1.NodeSpec)(nil), (*core.NodeSpec)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1_NodeSpec_To_core_NodeSpec(a.(*v1.NodeSpec), b.(*core.NodeSpec), scope)
}); err != nil {
@ -2151,6 +2146,11 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddConversionFunc((*v1.Volume)(nil), (*core.Volume)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1_Volume_To_core_Volume(a.(*v1.Volume), b.(*core.Volume), scope)
}); err != nil {
return err
}
return nil
}
@ -2642,6 +2642,7 @@ func Convert_core_ComponentStatusList_To_v1_ComponentStatusList(in *core.Compone
func autoConvert_v1_ConfigMap_To_core_ConfigMap(in *v1.ConfigMap, out *core.ConfigMap, s conversion.Scope) error {
out.ObjectMeta = in.ObjectMeta
out.Immutable = (*bool)(unsafe.Pointer(in.Immutable))
out.Data = *(*map[string]string)(unsafe.Pointer(&in.Data))
out.BinaryData = *(*map[string][]byte)(unsafe.Pointer(&in.BinaryData))
return nil
@ -2654,6 +2655,7 @@ func Convert_v1_ConfigMap_To_core_ConfigMap(in *v1.ConfigMap, out *core.ConfigMa
func autoConvert_core_ConfigMap_To_v1_ConfigMap(in *core.ConfigMap, out *v1.ConfigMap, s conversion.Scope) error {
out.ObjectMeta = in.ObjectMeta
out.Immutable = (*bool)(unsafe.Pointer(in.Immutable))
out.Data = *(*map[string]string)(unsafe.Pointer(&in.Data))
out.BinaryData = *(*map[string][]byte)(unsafe.Pointer(&in.BinaryData))
return nil
@ -3224,6 +3226,7 @@ func autoConvert_v1_EndpointPort_To_core_EndpointPort(in *v1.EndpointPort, out *
out.Name = in.Name
out.Port = in.Port
out.Protocol = core.Protocol(in.Protocol)
out.AppProtocol = (*string)(unsafe.Pointer(in.AppProtocol))
return nil
}
@ -3236,6 +3239,7 @@ func autoConvert_core_EndpointPort_To_v1_EndpointPort(in *core.EndpointPort, out
out.Name = in.Name
out.Port = in.Port
out.Protocol = v1.Protocol(in.Protocol)
out.AppProtocol = (*string)(unsafe.Pointer(in.AppProtocol))
return nil
}
@ -4261,21 +4265,11 @@ func autoConvert_v1_LoadBalancerStatus_To_core_LoadBalancerStatus(in *v1.LoadBal
return nil
}
// Convert_v1_LoadBalancerStatus_To_core_LoadBalancerStatus is an autogenerated conversion function.
func Convert_v1_LoadBalancerStatus_To_core_LoadBalancerStatus(in *v1.LoadBalancerStatus, out *core.LoadBalancerStatus, s conversion.Scope) error {
return autoConvert_v1_LoadBalancerStatus_To_core_LoadBalancerStatus(in, out, s)
}
func autoConvert_core_LoadBalancerStatus_To_v1_LoadBalancerStatus(in *core.LoadBalancerStatus, out *v1.LoadBalancerStatus, s conversion.Scope) error {
out.Ingress = *(*[]v1.LoadBalancerIngress)(unsafe.Pointer(&in.Ingress))
return nil
}
// Convert_core_LoadBalancerStatus_To_v1_LoadBalancerStatus is an autogenerated conversion function.
func Convert_core_LoadBalancerStatus_To_v1_LoadBalancerStatus(in *core.LoadBalancerStatus, out *v1.LoadBalancerStatus, s conversion.Scope) error {
return autoConvert_core_LoadBalancerStatus_To_v1_LoadBalancerStatus(in, out, s)
}
func autoConvert_v1_LocalObjectReference_To_core_LocalObjectReference(in *v1.LocalObjectReference, out *core.LocalObjectReference, s conversion.Scope) error {
out.Name = in.Name
return nil
@ -5947,6 +5941,7 @@ func autoConvert_v1_PodSecurityContext_To_core_PodSecurityContext(in *v1.PodSecu
out.SupplementalGroups = *(*[]int64)(unsafe.Pointer(&in.SupplementalGroups))
out.FSGroup = (*int64)(unsafe.Pointer(in.FSGroup))
out.Sysctls = *(*[]core.Sysctl)(unsafe.Pointer(&in.Sysctls))
out.FSGroupChangePolicy = (*core.PodFSGroupChangePolicy)(unsafe.Pointer(in.FSGroupChangePolicy))
return nil
}
@ -5967,6 +5962,7 @@ func autoConvert_core_PodSecurityContext_To_v1_PodSecurityContext(in *core.PodSe
out.RunAsNonRoot = (*bool)(unsafe.Pointer(in.RunAsNonRoot))
out.SupplementalGroups = *(*[]int64)(unsafe.Pointer(&in.SupplementalGroups))
out.FSGroup = (*int64)(unsafe.Pointer(in.FSGroup))
out.FSGroupChangePolicy = (*v1.PodFSGroupChangePolicy)(unsafe.Pointer(in.FSGroupChangePolicy))
out.Sysctls = *(*[]v1.Sysctl)(unsafe.Pointer(&in.Sysctls))
return nil
}
@ -7006,6 +7002,7 @@ func Convert_core_ScopedResourceSelectorRequirement_To_v1_ScopedResourceSelector
func autoConvert_v1_Secret_To_core_Secret(in *v1.Secret, out *core.Secret, s conversion.Scope) error {
out.ObjectMeta = in.ObjectMeta
out.Immutable = (*bool)(unsafe.Pointer(in.Immutable))
out.Data = *(*map[string][]byte)(unsafe.Pointer(&in.Data))
// INFO: in.StringData opted out of conversion generation
out.Type = core.SecretType(in.Type)
@ -7014,6 +7011,7 @@ func autoConvert_v1_Secret_To_core_Secret(in *v1.Secret, out *core.Secret, s con
func autoConvert_core_Secret_To_v1_Secret(in *core.Secret, out *v1.Secret, s conversion.Scope) error {
out.ObjectMeta = in.ObjectMeta
out.Immutable = (*bool)(unsafe.Pointer(in.Immutable))
out.Data = *(*map[string][]byte)(unsafe.Pointer(&in.Data))
out.Type = v1.SecretType(in.Type)
return nil
@ -7411,6 +7409,7 @@ func Convert_core_ServiceList_To_v1_ServiceList(in *core.ServiceList, out *v1.Se
func autoConvert_v1_ServicePort_To_core_ServicePort(in *v1.ServicePort, out *core.ServicePort, s conversion.Scope) error {
out.Name = in.Name
out.Protocol = core.Protocol(in.Protocol)
out.AppProtocol = (*string)(unsafe.Pointer(in.AppProtocol))
out.Port = in.Port
out.TargetPort = in.TargetPort
out.NodePort = in.NodePort
@ -7425,6 +7424,7 @@ func Convert_v1_ServicePort_To_core_ServicePort(in *v1.ServicePort, out *core.Se
func autoConvert_core_ServicePort_To_v1_ServicePort(in *core.ServicePort, out *v1.ServicePort, s conversion.Scope) error {
out.Name = in.Name
out.Protocol = v1.Protocol(in.Protocol)
out.AppProtocol = (*string)(unsafe.Pointer(in.AppProtocol))
out.Port = in.Port
out.TargetPort = in.TargetPort
out.NodePort = in.NodePort
@ -7820,11 +7820,6 @@ func autoConvert_v1_Volume_To_core_Volume(in *v1.Volume, out *core.Volume, s con
return nil
}
// Convert_v1_Volume_To_core_Volume is an autogenerated conversion function.
func Convert_v1_Volume_To_core_Volume(in *v1.Volume, out *core.Volume, s conversion.Scope) error {
return autoConvert_v1_Volume_To_core_Volume(in, out, s)
}
func autoConvert_core_Volume_To_v1_Volume(in *core.Volume, out *v1.Volume, s conversion.Scope) error {
out.Name = in.Name
if err := Convert_core_VolumeSource_To_v1_VolumeSource(&in.VolumeSource, &out.VolumeSource, s); err != nil {
@ -7833,11 +7828,6 @@ func autoConvert_core_Volume_To_v1_Volume(in *core.Volume, out *v1.Volume, s con
return nil
}
// Convert_core_Volume_To_v1_Volume is an autogenerated conversion function.
func Convert_core_Volume_To_v1_Volume(in *core.Volume, out *v1.Volume, s conversion.Scope) error {
return autoConvert_core_Volume_To_v1_Volume(in, out, s)
}
func autoConvert_v1_VolumeDevice_To_core_VolumeDevice(in *v1.VolumeDevice, out *core.VolumeDevice, s conversion.Scope) error {
out.Name = in.Name
out.DevicePath = in.DevicePath

View File

@ -33,7 +33,6 @@ go_library(
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1/validation:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
@ -54,7 +53,6 @@ go_test(
],
embed = [":go_default_library"],
deps = [
"//pkg/api/testapi:go_default_library",
"//pkg/apis/core:go_default_library",
"//pkg/capabilities:go_default_library",
"//pkg/features:go_default_library",

View File

@ -31,14 +31,13 @@ import (
"k8s.io/klog"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/resource"
apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
unversionedvalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/sets"
@ -1563,11 +1562,6 @@ var supportedReclaimPolicy = sets.NewString(string(core.PersistentVolumeReclaimD
var supportedVolumeModes = sets.NewString(string(core.PersistentVolumeBlock), string(core.PersistentVolumeFilesystem))
var supportedDataSourceAPIGroupKinds = map[schema.GroupKind]bool{
{Group: "snapshot.storage.k8s.io", Kind: "VolumeSnapshot"}: true,
{Group: "", Kind: "PersistentVolumeClaim"}: true,
}
func ValidatePersistentVolumeSpec(pvSpec *core.PersistentVolumeSpec, pvName string, validateInlinePersistentVolumeSpec bool, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
@ -1929,17 +1923,15 @@ func ValidatePersistentVolumeClaimSpec(spec *core.PersistentVolumeClaimSpec, fld
if len(spec.DataSource.Name) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Child("dataSource", "name"), ""))
}
groupKind := schema.GroupKind{Group: "", Kind: spec.DataSource.Kind}
if len(spec.DataSource.Kind) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Child("dataSource", "kind"), ""))
}
apiGroup := ""
if spec.DataSource.APIGroup != nil {
groupKind.Group = string(*spec.DataSource.APIGroup)
apiGroup = *spec.DataSource.APIGroup
}
groupKindList := make([]string, 0, len(supportedDataSourceAPIGroupKinds))
for grp := range supportedDataSourceAPIGroupKinds {
groupKindList = append(groupKindList, grp.String())
}
if !supportedDataSourceAPIGroupKinds[groupKind] {
allErrs = append(allErrs, field.NotSupported(fldPath.Child("dataSource"), groupKind.String(), groupKindList))
if len(apiGroup) == 0 && spec.DataSource.Kind != "PersistentVolumeClaim" {
allErrs = append(allErrs, field.Invalid(fldPath.Child("dataSource"), spec.DataSource.Kind, ""))
}
}
@ -2824,6 +2816,16 @@ func validateDNSPolicy(dnsPolicy *core.DNSPolicy, fldPath *field.Path) field.Err
return allErrors
}
var validFSGroupChangePolicies = sets.NewString(string(core.FSGroupChangeOnRootMismatch), string(core.FSGroupChangeAlways))
func validateFSGroupChangePolicy(fsGroupPolicy *core.PodFSGroupChangePolicy, fldPath *field.Path) field.ErrorList {
allErrors := field.ErrorList{}
if !validFSGroupChangePolicies.Has(string(*fsGroupPolicy)) {
allErrors = append(allErrors, field.NotSupported(fldPath, fsGroupPolicy, validFSGroupChangePolicies.List()))
}
return allErrors
}
const (
// Limits on various DNS parameters. These are derived from
// restrictions in Linux libc name resolution handling.
@ -3083,8 +3085,33 @@ func validateContainersOnlyForPod(containers []core.Container, fldPath *field.Pa
return allErrs
}
// PodValidationOptions contains the different settings for pod validation
type PodValidationOptions struct {
// Allow pod spec to have more than one huge page resource (with different sizes)
AllowMultipleHugePageResources bool
}
// ValidatePodSingleHugePageResources checks if there are multiple huge
// pages resources in the pod object.
func ValidatePodSingleHugePageResources(pod *core.Pod, specPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
hugePageResources := sets.NewString()
for i := range pod.Spec.Containers {
resourceSet := toContainerResourcesSet(&pod.Spec.Containers[i])
for resourceStr := range resourceSet {
if v1helper.IsHugePageResourceName(v1.ResourceName(resourceStr)) {
hugePageResources.Insert(resourceStr)
}
}
}
if len(hugePageResources) > 1 {
allErrs = append(allErrs, field.Invalid(specPath, hugePageResources.List(), "must use a single hugepage size in a pod spec"))
}
return allErrs
}
// ValidatePod tests if required fields in the pod are set.
func ValidatePod(pod *core.Pod) field.ErrorList {
func ValidatePod(pod *core.Pod, opts PodValidationOptions) field.ErrorList {
fldPath := field.NewPath("metadata")
allErrs := ValidateObjectMeta(&pod.ObjectMeta, true, ValidatePodName, fldPath)
allErrs = append(allErrs, ValidatePodSpecificAnnotations(pod.ObjectMeta.Annotations, &pod.Spec, fldPath.Child("annotations"))...)
@ -3111,17 +3138,8 @@ func ValidatePod(pod *core.Pod) field.ErrorList {
allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.Containers, specPath.Child("containers"))...)
allErrs = append(allErrs, validateContainersOnlyForPod(pod.Spec.InitContainers, specPath.Child("initContainers"))...)
hugePageResources := sets.NewString()
for i := range pod.Spec.Containers {
resourceSet := toContainerResourcesSet(&pod.Spec.Containers[i])
for resourceStr := range resourceSet {
if v1helper.IsHugePageResourceName(v1.ResourceName(resourceStr)) {
hugePageResources.Insert(resourceStr)
}
}
}
if len(hugePageResources) > 1 {
allErrs = append(allErrs, field.Invalid(specPath, hugePageResources, "must use a single hugepage size in a pod spec"))
if !opts.AllowMultipleHugePageResources {
allErrs = append(allErrs, ValidatePodSingleHugePageResources(pod, specPath)...)
}
podIPsField := field.NewPath("status", "podIPs")
@ -3651,6 +3669,10 @@ func ValidatePodSecurityContext(securityContext *core.PodSecurityContext, spec *
allErrs = append(allErrs, validateSysctls(securityContext.Sysctls, fldPath.Child("sysctls"))...)
}
if securityContext.FSGroupChangePolicy != nil {
allErrs = append(allErrs, validateFSGroupChangePolicy(securityContext.FSGroupChangePolicy, fldPath.Child("fsGroupChangePolicy"))...)
}
allErrs = append(allErrs, validateWindowsSecurityContextOptions(securityContext.WindowsOptions, fldPath.Child("windowsOptions"))...)
}
@ -3679,8 +3701,8 @@ func ValidateContainerUpdates(newContainers, oldContainers []core.Container, fld
}
// ValidatePodCreate validates a pod in the context of its initial create
func ValidatePodCreate(pod *core.Pod) field.ErrorList {
allErrs := ValidatePod(pod)
func ValidatePodCreate(pod *core.Pod, opts PodValidationOptions) field.ErrorList {
allErrs := ValidatePod(pod, opts)
fldPath := field.NewPath("spec")
// EphemeralContainers can only be set on update using the ephemeralcontainers subresource
@ -3693,12 +3715,16 @@ func ValidatePodCreate(pod *core.Pod) field.ErrorList {
// ValidatePodUpdate tests to see if the update is legal for an end user to make. newPod is updated with fields
// that cannot be changed.
func ValidatePodUpdate(newPod, oldPod *core.Pod) field.ErrorList {
func ValidatePodUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions) field.ErrorList {
fldPath := field.NewPath("metadata")
allErrs := ValidateObjectMetaUpdate(&newPod.ObjectMeta, &oldPod.ObjectMeta, fldPath)
allErrs = append(allErrs, ValidatePodSpecificAnnotationUpdates(newPod, oldPod, fldPath.Child("annotations"))...)
specPath := field.NewPath("spec")
if !opts.AllowMultipleHugePageResources {
allErrs = append(allErrs, ValidatePodSingleHugePageResources(newPod, specPath)...)
}
// validate updateable fields:
// 1. spec.containers[*].image
// 2. spec.initContainers[*].image
@ -3908,7 +3934,7 @@ var supportedServiceType = sets.NewString(string(core.ServiceTypeClusterIP), str
var supportedServiceIPFamily = sets.NewString(string(core.IPv4Protocol), string(core.IPv6Protocol))
// ValidateService tests if required fields/annotations of a Service are valid.
func ValidateService(service *core.Service) field.ErrorList {
func ValidateService(service *core.Service, allowAppProtocol bool) field.ErrorList {
allErrs := ValidateObjectMeta(&service.ObjectMeta, true, ValidateServiceName, field.NewPath("metadata"))
specPath := field.NewPath("spec")
@ -3953,7 +3979,7 @@ func ValidateService(service *core.Service) field.ErrorList {
portsPath := specPath.Child("ports")
for i := range service.Spec.Ports {
portPath := portsPath.Index(i)
allErrs = append(allErrs, validateServicePort(&service.Spec.Ports[i], len(service.Spec.Ports) > 1, isHeadlessService, &allPortNames, portPath)...)
allErrs = append(allErrs, validateServicePort(&service.Spec.Ports[i], len(service.Spec.Ports) > 1, isHeadlessService, allowAppProtocol, &allPortNames, portPath)...)
}
if service.Spec.Selector != nil {
@ -4125,7 +4151,7 @@ func ValidateService(service *core.Service) field.ErrorList {
return allErrs
}
func validateServicePort(sp *core.ServicePort, requireName, isHeadlessService bool, allNames *sets.String, fldPath *field.Path) field.ErrorList {
func validateServicePort(sp *core.ServicePort, requireName, isHeadlessService, allowAppProtocol bool, allNames *sets.String, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if requireName && len(sp.Name) == 0 {
@ -4151,6 +4177,16 @@ func validateServicePort(sp *core.ServicePort, requireName, isHeadlessService bo
allErrs = append(allErrs, ValidatePortNumOrName(sp.TargetPort, fldPath.Child("targetPort"))...)
if sp.AppProtocol != nil {
if allowAppProtocol {
for _, msg := range validation.IsQualifiedName(*sp.AppProtocol) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("appProtocol"), sp.AppProtocol, msg))
}
} else {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("appProtocol"), "This field can be enabled with the ServiceAppProtocol feature gate"))
}
}
// in the v1 API, targetPorts on headless services were tolerated.
// once we have version-specific validation, we can reject this on newer API versions, but until then, we have to tolerate it for compatibility.
//
@ -4207,6 +4243,14 @@ func ValidateServiceExternalTrafficFieldsCombination(service *core.Service) fiel
return allErrs
}
// ValidateServiceCreate validates Services as they are created.
func ValidateServiceCreate(service *core.Service) field.ErrorList {
// allow AppProtocol value if the feature gate is set.
allowAppProtocol := utilfeature.DefaultFeatureGate.Enabled(features.ServiceAppProtocol)
return ValidateService(service, allowAppProtocol)
}
// ValidateServiceUpdate tests if required fields in the service are set during an update
func ValidateServiceUpdate(service, oldService *core.Service) field.ErrorList {
allErrs := ValidateObjectMetaUpdate(&service.ObjectMeta, &oldService.ObjectMeta, field.NewPath("metadata"))
@ -4226,8 +4270,19 @@ func ValidateServiceUpdate(service, oldService *core.Service) field.ErrorList {
}
}
allErrs = append(allErrs, ValidateService(service)...)
return allErrs
// allow AppProtocol value if the feature gate is set or the field is
// already set on the resource.
allowAppProtocol := utilfeature.DefaultFeatureGate.Enabled(features.ServiceAppProtocol)
if !allowAppProtocol {
for _, port := range oldService.Spec.Ports {
if port.AppProtocol != nil {
allowAppProtocol = true
break
}
}
}
return append(allErrs, ValidateService(service, allowAppProtocol)...)
}
// ValidateServiceStatusUpdate tests if required fields in the Service are set when updating status.
@ -4424,8 +4479,15 @@ func ValidateNodeSpecificAnnotations(annotations map[string]string, fldPath *fie
return allErrs
}
// NodeValidationOptions contains the different settings for node validation
type NodeValidationOptions struct {
// Should node a spec containing more than one huge page resource (with different sizes)
// with pre-allocated memory trigger validation errors
ValidateSingleHugePageResource bool
}
// ValidateNode tests if required fields in the node are set.
func ValidateNode(node *core.Node) field.ErrorList {
func ValidateNode(node *core.Node, opts NodeValidationOptions) field.ErrorList {
fldPath := field.NewPath("metadata")
allErrs := ValidateObjectMeta(&node.ObjectMeta, false, ValidateNodeName, fldPath)
allErrs = append(allErrs, ValidateNodeSpecificAnnotations(node.ObjectMeta.Annotations, fldPath.Child("annotations"))...)
@ -4436,7 +4498,7 @@ func ValidateNode(node *core.Node) field.ErrorList {
// Only validate spec.
// All status fields are optional and can be updated later.
// That said, if specified, we need to ensure they are valid.
allErrs = append(allErrs, ValidateNodeResources(node)...)
allErrs = append(allErrs, ValidateNodeResources(node, opts)...)
// validate PodCIDRS only if we need to
if len(node.Spec.PodCIDRs) > 0 {
@ -4476,13 +4538,33 @@ func ValidateNode(node *core.Node) field.ErrorList {
}
// ValidateNodeResources is used to make sure a node has valid capacity and allocatable values.
func ValidateNodeResources(node *core.Node) field.ErrorList {
func ValidateNodeResources(node *core.Node, opts NodeValidationOptions) field.ErrorList {
allErrs := field.ErrorList{}
if opts.ValidateSingleHugePageResource {
allErrs = append(allErrs, ValidateNodeSingleHugePageResources(node)...)
}
// Validate resource quantities in capacity.
for k, v := range node.Status.Capacity {
resPath := field.NewPath("status", "capacity", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
}
// Validate resource quantities in allocatable.
for k, v := range node.Status.Allocatable {
resPath := field.NewPath("status", "allocatable", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
}
return allErrs
}
// ValidateNodeHugePageResources is used to make sure a node has valid capacity and allocatable values for the huge page resources.
func ValidateNodeSingleHugePageResources(node *core.Node) field.ErrorList {
allErrs := field.ErrorList{}
// Validate resource quantities in capacity.
hugePageSizes := sets.NewString()
for k, v := range node.Status.Capacity {
resPath := field.NewPath("status", "capacity", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
// track any huge page size that has a positive value
if helper.IsHugePageResourceName(k) && v.Value() > int64(0) {
hugePageSizes.Insert(string(k))
@ -4495,7 +4577,6 @@ func ValidateNodeResources(node *core.Node) field.ErrorList {
hugePageSizes = sets.NewString()
for k, v := range node.Status.Allocatable {
resPath := field.NewPath("status", "allocatable", string(k))
allErrs = append(allErrs, ValidateResourceQuantityValue(string(k), v, resPath)...)
// track any huge page size that has a positive value
if helper.IsHugePageResourceName(k) && v.Value() > int64(0) {
hugePageSizes.Insert(string(k))
@ -4508,7 +4589,7 @@ func ValidateNodeResources(node *core.Node) field.ErrorList {
}
// ValidateNodeUpdate tests to make sure a node update can be applied. Modifies oldNode.
func ValidateNodeUpdate(node, oldNode *core.Node) field.ErrorList {
func ValidateNodeUpdate(node, oldNode *core.Node, opts NodeValidationOptions) field.ErrorList {
fldPath := field.NewPath("metadata")
allErrs := ValidateObjectMetaUpdate(&node.ObjectMeta, &oldNode.ObjectMeta, fldPath)
allErrs = append(allErrs, ValidateNodeSpecificAnnotations(node.ObjectMeta.Annotations, fldPath.Child("annotations"))...)
@ -4519,7 +4600,7 @@ func ValidateNodeUpdate(node, oldNode *core.Node) field.ErrorList {
// allErrs = append(allErrs, field.Invalid("status", node.Status, "must be empty"))
// }
allErrs = append(allErrs, ValidateNodeResources(node)...)
allErrs = append(allErrs, ValidateNodeResources(node, opts)...)
// Validate no duplicate addresses in node status.
addresses := make(map[core.NodeAddress]bool)
@ -5005,6 +5086,16 @@ func ValidateSecretUpdate(newSecret, oldSecret *core.Secret) field.ErrorList {
}
allErrs = append(allErrs, ValidateImmutableField(newSecret.Type, oldSecret.Type, field.NewPath("type"))...)
if oldSecret.Immutable != nil && *oldSecret.Immutable {
if newSecret.Immutable == nil || !*newSecret.Immutable {
allErrs = append(allErrs, field.Forbidden(field.NewPath("immutable"), "field is immutable when `immutable` is set"))
}
if !reflect.DeepEqual(newSecret.Data, oldSecret.Data) {
allErrs = append(allErrs, field.Forbidden(field.NewPath("data"), "field is immutable when `immutable` is set"))
}
// We don't validate StringData, as it was already converted back to Data
// before validation is happening.
}
allErrs = append(allErrs, ValidateSecret(newSecret)...)
return allErrs
@ -5051,8 +5142,20 @@ func ValidateConfigMap(cfg *core.ConfigMap) field.ErrorList {
func ValidateConfigMapUpdate(newCfg, oldCfg *core.ConfigMap) field.ErrorList {
allErrs := field.ErrorList{}
allErrs = append(allErrs, ValidateObjectMetaUpdate(&newCfg.ObjectMeta, &oldCfg.ObjectMeta, field.NewPath("metadata"))...)
allErrs = append(allErrs, ValidateConfigMap(newCfg)...)
if oldCfg.Immutable != nil && *oldCfg.Immutable {
if newCfg.Immutable == nil || !*newCfg.Immutable {
allErrs = append(allErrs, field.Forbidden(field.NewPath("immutable"), "field is immutable when `immutable` is set"))
}
if !reflect.DeepEqual(newCfg.Data, oldCfg.Data) {
allErrs = append(allErrs, field.Forbidden(field.NewPath("data"), "field is immutable when `immutable` is set"))
}
if !reflect.DeepEqual(newCfg.BinaryData, oldCfg.BinaryData) {
allErrs = append(allErrs, field.Forbidden(field.NewPath("binaryData"), "field is immutable when `immutable` is set"))
}
}
allErrs = append(allErrs, ValidateConfigMap(newCfg)...)
return allErrs
}
@ -5395,15 +5498,42 @@ func ValidateNamespaceFinalizeUpdate(newNamespace, oldNamespace *core.Namespace)
return allErrs
}
// ValidateEndpoints tests if required fields are set.
func ValidateEndpoints(endpoints *core.Endpoints) field.ErrorList {
// ValidateEndpoints validates Endpoints on create and update.
func ValidateEndpoints(endpoints *core.Endpoints, allowAppProtocol bool) field.ErrorList {
allErrs := ValidateObjectMeta(&endpoints.ObjectMeta, true, ValidateEndpointsName, field.NewPath("metadata"))
allErrs = append(allErrs, ValidateEndpointsSpecificAnnotations(endpoints.Annotations, field.NewPath("annotations"))...)
allErrs = append(allErrs, validateEndpointSubsets(endpoints.Subsets, field.NewPath("subsets"))...)
allErrs = append(allErrs, validateEndpointSubsets(endpoints.Subsets, allowAppProtocol, field.NewPath("subsets"))...)
return allErrs
}
func validateEndpointSubsets(subsets []core.EndpointSubset, fldPath *field.Path) field.ErrorList {
// ValidateEndpointsCreate validates Endpoints on create.
func ValidateEndpointsCreate(endpoints *core.Endpoints) field.ErrorList {
allowAppProtocol := utilfeature.DefaultFeatureGate.Enabled(features.ServiceAppProtocol)
return ValidateEndpoints(endpoints, allowAppProtocol)
}
// ValidateEndpointsUpdate validates Endpoints on update. NodeName changes are
// allowed during update to accommodate the case where nodeIP or PodCIDR is
// reused. An existing endpoint ip will have a different nodeName if this
// happens.
func ValidateEndpointsUpdate(newEndpoints, oldEndpoints *core.Endpoints) field.ErrorList {
allErrs := ValidateObjectMetaUpdate(&newEndpoints.ObjectMeta, &oldEndpoints.ObjectMeta, field.NewPath("metadata"))
allowAppProtocol := utilfeature.DefaultFeatureGate.Enabled(features.ServiceAppProtocol)
if !allowAppProtocol {
for _, oldSubset := range oldEndpoints.Subsets {
for _, port := range oldSubset.Ports {
if port.AppProtocol != nil {
allowAppProtocol = true
break
}
}
}
}
allErrs = append(allErrs, ValidateEndpoints(newEndpoints, allowAppProtocol)...)
return allErrs
}
func validateEndpointSubsets(subsets []core.EndpointSubset, allowAppProtocol bool, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
for i := range subsets {
ss := &subsets[i]
@ -5421,7 +5551,7 @@ func validateEndpointSubsets(subsets []core.EndpointSubset, fldPath *field.Path)
allErrs = append(allErrs, validateEndpointAddress(&ss.NotReadyAddresses[addr], idxPath.Child("notReadyAddresses").Index(addr))...)
}
for port := range ss.Ports {
allErrs = append(allErrs, validateEndpointPort(&ss.Ports[port], len(ss.Ports) > 1, idxPath.Child("ports").Index(port))...)
allErrs = append(allErrs, validateEndpointPort(&ss.Ports[port], len(ss.Ports) > 1, allowAppProtocol, idxPath.Child("ports").Index(port))...)
}
}
@ -5472,7 +5602,7 @@ func validateNonSpecialIP(ipAddress string, fldPath *field.Path) field.ErrorList
return allErrs
}
func validateEndpointPort(port *core.EndpointPort, requireName bool, fldPath *field.Path) field.ErrorList {
func validateEndpointPort(port *core.EndpointPort, requireName, allowAppProtocol bool, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if requireName && len(port.Name) == 0 {
allErrs = append(allErrs, field.Required(fldPath.Child("name"), ""))
@ -5487,16 +5617,15 @@ func validateEndpointPort(port *core.EndpointPort, requireName bool, fldPath *fi
} else if !supportedPortProtocols.Has(string(port.Protocol)) {
allErrs = append(allErrs, field.NotSupported(fldPath.Child("protocol"), port.Protocol, supportedPortProtocols.List()))
}
return allErrs
}
// ValidateEndpointsUpdate tests to make sure an endpoints update can be applied.
// NodeName changes are allowed during update to accommodate the case where nodeIP or PodCIDR is reused.
// An existing endpoint ip will have a different nodeName if this happens.
func ValidateEndpointsUpdate(newEndpoints, oldEndpoints *core.Endpoints) field.ErrorList {
allErrs := ValidateObjectMetaUpdate(&newEndpoints.ObjectMeta, &oldEndpoints.ObjectMeta, field.NewPath("metadata"))
allErrs = append(allErrs, validateEndpointSubsets(newEndpoints.Subsets, field.NewPath("subsets"))...)
allErrs = append(allErrs, ValidateEndpointsSpecificAnnotations(newEndpoints.Annotations, field.NewPath("annotations"))...)
if port.AppProtocol != nil {
if allowAppProtocol {
for _, msg := range validation.IsQualifiedName(*port.AppProtocol) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("appProtocol"), port.AppProtocol, msg))
}
} else {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("appProtocol"), "This field can be enabled with the ServiceAppProtocol feature gate"))
}
}
return allErrs
}

View File

@ -519,6 +519,11 @@ func (in *ConfigMap) DeepCopyInto(out *ConfigMap) {
*out = *in
out.TypeMeta = in.TypeMeta
in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
if in.Immutable != nil {
in, out := &in.Immutable, &out.Immutable
*out = new(bool)
**out = **in
}
if in.Data != nil {
in, out := &in.Data, &out.Data
*out = make(map[string]string, len(*in))
@ -1091,6 +1096,11 @@ func (in *EndpointAddress) DeepCopy() *EndpointAddress {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *EndpointPort) DeepCopyInto(out *EndpointPort) {
*out = *in
if in.AppProtocol != nil {
in, out := &in.AppProtocol, &out.AppProtocol
*out = new(string)
**out = **in
}
return
}
@ -1124,7 +1134,9 @@ func (in *EndpointSubset) DeepCopyInto(out *EndpointSubset) {
if in.Ports != nil {
in, out := &in.Ports, &out.Ports
*out = make([]EndpointPort, len(*in))
copy(*out, *in)
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
@ -3679,6 +3691,11 @@ func (in *PodSecurityContext) DeepCopyInto(out *PodSecurityContext) {
*out = new(int64)
**out = **in
}
if in.FSGroupChangePolicy != nil {
in, out := &in.FSGroupChangePolicy, &out.FSGroupChangePolicy
*out = new(PodFSGroupChangePolicy)
**out = **in
}
if in.Sysctls != nil {
in, out := &in.Sysctls, &out.Sysctls
*out = make([]Sysctl, len(*in))
@ -4660,6 +4677,11 @@ func (in *Secret) DeepCopyInto(out *Secret) {
*out = *in
out.TypeMeta = in.TypeMeta
in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
if in.Immutable != nil {
in, out := &in.Immutable, &out.Immutable
*out = new(bool)
**out = **in
}
if in.Data != nil {
in, out := &in.Data, &out.Data
*out = make(map[string][]byte, len(*in))
@ -5097,6 +5119,11 @@ func (in *ServiceList) DeepCopyObject() runtime.Object {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServicePort) DeepCopyInto(out *ServicePort) {
*out = *in
if in.AppProtocol != nil {
in, out := &in.AppProtocol, &out.AppProtocol
*out = new(string)
**out = **in
}
out.TargetPort = in.TargetPort
return
}
@ -5142,7 +5169,9 @@ func (in *ServiceSpec) DeepCopyInto(out *ServiceSpec) {
if in.Ports != nil {
in, out := &in.Ports, &out.Ports
*out = make([]ServicePort, len(*in))
copy(*out, *in)
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Selector != nil {
in, out := &in.Selector, &out.Selector

View File

@ -19,7 +19,6 @@ go_library(
"//pkg/apis/autoscaling:go_default_library",
"//pkg/apis/networking:go_default_library",
"//pkg/apis/policy:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
],

View File

@ -54,7 +54,6 @@ func addKnownTypes(scheme *runtime.Scheme) error {
&apps.Deployment{},
&apps.DeploymentList{},
&apps.DeploymentRollback{},
&ReplicationControllerDummy{},
&apps.DaemonSetList{},
&apps.DaemonSet{},
&networking.Ingress{},

View File

@ -27,14 +27,3 @@ support is experimental.
*/
package extensions
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// ReplicationControllerDummy : Dummy definition
type ReplicationControllerDummy struct {
metav1.TypeMeta
}

View File

@ -19,32 +19,3 @@ limitations under the License.
// Code generated by deepcopy-gen. DO NOT EDIT.
package extensions
import (
runtime "k8s.io/apimachinery/pkg/runtime"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ReplicationControllerDummy) DeepCopyInto(out *ReplicationControllerDummy) {
*out = *in
out.TypeMeta = in.TypeMeta
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicationControllerDummy.
func (in *ReplicationControllerDummy) DeepCopy() *ReplicationControllerDummy {
if in == nil {
return nil
}
out := new(ReplicationControllerDummy)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *ReplicationControllerDummy) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}

View File

@ -50,6 +50,8 @@ func addKnownTypes(scheme *runtime.Scheme) error {
&NetworkPolicyList{},
&Ingress{},
&IngressList{},
&IngressClass{},
&IngressClassList{},
)
return nil
}

View File

@ -144,15 +144,15 @@ type NetworkPolicyPort struct {
Port *intstr.IntOrString
}
// IPBlock describes a particular CIDR (Ex. "192.168.1.1/24") that is allowed to the pods
// matched by a NetworkPolicySpec's podSelector. The except entry describes CIDRs that should
// not be included within this rule.
// IPBlock describes a particular CIDR (Ex. "192.168.1.1/24","2001:db9::/64") that is allowed
// to the pods matched by a NetworkPolicySpec's podSelector. The except entry describes CIDRs
// that should not be included within this rule.
type IPBlock struct {
// CIDR is a string representing the IP Block
// Valid examples are "192.168.1.1/24"
// Valid examples are "192.168.1.1/24" or "2001:db9::/64"
CIDR string
// Except is a slice of CIDRs that should not be included within an IP Block
// Valid examples are "192.168.1.1/24"
// Valid examples are "192.168.1.1/24" or "2001:db9::/64"
// Except values will be rejected if they are outside the CIDR range
// +optional
Except []string
@ -235,10 +235,23 @@ type IngressList struct {
// IngressSpec describes the Ingress the user wishes to exist.
type IngressSpec struct {
// A default backend capable of servicing requests that don't match any
// rule. At least one of 'backend' or 'rules' must be specified. This field
// is optional to allow the loadbalancer controller or defaulting logic to
// specify a global default.
// IngressClassName is the name of the IngressClass cluster resource. The
// associated IngressClass defines which controller will implement the
// resource. This replaces the deprecated `kubernetes.io/ingress.class`
// annotation. For backwards compatibility, when that annotation is set, it
// must be given precedence over this field. The controller may emit a
// warning if the field and annotation have different values.
// Implementations of this API should ignore Ingresses without a class
// specified. An IngressClass resource may be marked as default, which can
// be used to set a default value for this field. For more information,
// refer to the IngressClass documentation.
// +optional
IngressClassName *string
// Backend is a default backend capable of servicing requests that don't
// match any rule. At least one of 'backend' or 'rules' must be specified.
// This field is optional to allow the loadbalancer controller or defaulting
// logic to specify a global default.
// +optional
Backend *IngressBackend
@ -257,6 +270,54 @@ type IngressSpec struct {
// TODO: Add the ability to specify load-balancer IP through claims
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// IngressClass represents the class of the Ingress, referenced by the Ingress
// Spec. The `ingressclass.kubernetes.io/is-default-class` annotation can be
// used to indicate that an IngressClass should be considered default. When a
// single IngressClass resource has this annotation set to true, new Ingress
// resources without a class specified will be assigned this default class.
type IngressClass struct {
metav1.TypeMeta
metav1.ObjectMeta
// Spec is the desired state of the IngressClass.
// More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status
// +optional
Spec IngressClassSpec
}
// IngressClassSpec provides information about the class of an Ingress.
type IngressClassSpec struct {
// Controller refers to the name of the controller that should handle this
// class. This allows for different "flavors" that are controlled by the
// same controller. For example, you may have different Parameters for the
// same implementing controller. This should be specified as a
// domain-prefixed path no more than 250 characters in length, e.g.
// "acme.io/ingress-controller". This field is immutable.
Controller string
// Parameters is a link to a custom resource containing additional
// configuration for the controller. This is optional if the controller does
// not require extra parameters.
// +optional
Parameters *api.TypedLocalObjectReference
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// IngressClassList is a collection of IngressClasses.
type IngressClassList struct {
metav1.TypeMeta
// Standard object's metadata.
// +optional
metav1.ListMeta
// Items is the list of IngressClasses.
// +listType=set
Items []IngressClass
}
// IngressTLS describes the transport layer security associated with an Ingress.
type IngressTLS struct {
// Hosts are a list of hosts included in the TLS certificate. The values in
@ -265,11 +326,11 @@ type IngressTLS struct {
// Ingress, if left unspecified.
// +optional
Hosts []string
// SecretName is the name of the secret used to terminate SSL traffic on 443.
// Field is left optional to allow SSL routing based on SNI hostname alone.
// If the SNI host in a listener conflicts with the "Host" header field used
// by an IngressRule, the SNI host is used for termination and value of the
// Host header is used for routing.
// SecretName is the name of the secret used to terminate TLS traffic on
// port 443. Field is left optional to allow TLS routing based on SNI
// hostname alone. If the SNI host in a listener conflicts with the "Host"
// header field used by an IngressRule, the SNI host is used for termination
// and value of the Host header is used for routing.
// +optional
SecretName string
// TODO: Consider specifying different modes of termination, protocols etc.
@ -283,28 +344,40 @@ type IngressStatus struct {
}
// IngressRule represents the rules mapping the paths under a specified host to
// the related backend services. Incoming requests are first evaluated for a host
// match, then routed to the backend associated with the matching IngressRuleValue.
// the related backend services. Incoming requests are first evaluated for a
// host match, then routed to the backend associated with the matching
// IngressRuleValue.
type IngressRule struct {
// Host is the fully qualified domain name of a network host, as defined
// by RFC 3986. Note the following deviations from the "host" part of the
// URI as defined in the RFC:
// 1. IPs are not allowed. Currently an IngressRuleValue can only apply to the
// IP in the Spec of the parent Ingress.
// Host is the fully qualified domain name of a network host, as defined by RFC 3986.
// Note the following deviations from the "host" part of the
// URI as defined in RFC 3986:
// 1. IPs are not allowed. Currently an IngressRuleValue can only apply to
// the IP in the Spec of the parent Ingress.
// 2. The `:` delimiter is not respected because ports are not allowed.
// Currently the port of an Ingress is implicitly :80 for http and
// :443 for https.
// Both these may change in the future.
// Incoming requests are matched against the host before the IngressRuleValue.
// If the host is unspecified, the Ingress routes all traffic based on the
// specified IngressRuleValue.
// Incoming requests are matched against the host before the
// IngressRuleValue. If the host is unspecified, the Ingress routes all
// traffic based on the specified IngressRuleValue.
//
// Host can be "precise" which is a domain name without the terminating dot of
// a network host (e.g. "foo.bar.com") or "wildcard", which is a domain name
// prefixed with a single wildcard label (e.g. "*.foo.com").
// The wildcard character '*' must appear by itself as the first DNS label and
// matches only a single label. You cannot have a wildcard label by itself (e.g. Host == "*").
// Requests will be matched against the Host field in the following way:
// 1. If Host is precise, the request matches this rule if the http host header is equal to Host.
// 2. If Host is a wildcard, then the request matches this rule if the http host header
// is to equal to the suffix (removing the first label) of the wildcard rule.
// +optional
Host string
// IngressRuleValue represents a rule to route requests for this IngressRule.
// If unspecified, the rule defaults to a http catch-all. Whether that sends
// just traffic matching the host to the default backend or all traffic to the
// default backend, is left to the controller fulfilling the Ingress. Http is
// currently the only supported IngressRuleValue.
// IngressRuleValue represents a rule to route requests for this
// IngressRule. If unspecified, the rule defaults to a http catch-all.
// Whether that sends just traffic matching the host to the default backend
// or all traffic to the default backend, is left to the controller
// fulfilling the Ingress. Http is currently the only supported
// IngressRuleValue.
// +optional
IngressRuleValue
}
@ -336,19 +409,52 @@ type HTTPIngressRuleValue struct {
// options usable by a loadbalancer, like http keep-alive.
}
// HTTPIngressPath associates a path regex with a backend. Incoming urls matching
// the path are forwarded to the backend.
// PathType represents the type of path referred to by a HTTPIngressPath.
type PathType string
const (
// PathTypeExact matches the URL path exactly and with case sensitivity.
PathTypeExact = PathType("Exact")
// PathTypePrefix matches based on a URL path prefix split by '/'. Matching
// is case sensitive and done on a path element by element basis. A path
// element refers to the list of labels in the path split by the '/'
// separator. A request is a match for path p if every p is an element-wise
// prefix of p of the request path. Note that if the last element of the
// path is a substring of the last element in request path, it is not a
// match (e.g. /foo/bar matches /foo/bar/baz, but does not match
// /foo/barbaz). If multiple matching paths exist in an Ingress spec, the
// longest matching path is given priority.
// Examples:
// - /foo/bar does not match requests to /foo/barbaz
// - /foo/bar matches request to /foo/bar and /foo/bar/baz
// - /foo and /foo/ both match requests to /foo and /foo/. If both paths are
// present in an Ingress spec, the longest matching path (/foo/) is given
// priority.
PathTypePrefix = PathType("Prefix")
// PathTypeImplementationSpecific matching is up to the IngressClass.
// Implementations can treat this as a separate PathType or treat it
// identically to Prefix or Exact path types.
PathTypeImplementationSpecific = PathType("ImplementationSpecific")
)
// HTTPIngressPath associates a path with a backend. Incoming urls matching the
// path are forwarded to the backend.
type HTTPIngressPath struct {
// Path is an extended POSIX regex as defined by IEEE Std 1003.1,
// (i.e this follows the egrep/unix syntax, not the perl syntax)
// matched against the path of an incoming request. Currently it can
// contain characters disallowed from the conventional "path"
// part of a URL as defined by RFC 3986. Paths must begin with
// a '/'. If unspecified, the path defaults to a catch all sending
// traffic to the backend.
// Path is matched against the path of an incoming request. Currently it can
// contain characters disallowed from the conventional "path" part of a URL
// as defined by RFC 3986. Paths must begin with a '/'. When unspecified,
// all paths from incoming requests are matched.
// +optional
Path string
// PathType determines the interpretation of the Path matching. PathType can
// be one of Exact, Prefix, or ImplementationSpecific. Implementations are
// required to support all path types.
// +optional
PathType *PathType
// Backend defines the referenced service endpoint to which the traffic
// will be forwarded to.
Backend IngressBackend
@ -357,8 +463,16 @@ type HTTPIngressPath struct {
// IngressBackend describes all endpoints for a given service and port.
type IngressBackend struct {
// Specifies the name of the referenced service.
// +optional
ServiceName string
// Specifies the port of the referenced service.
// +optional
ServicePort intstr.IntOrString
// Resource is an ObjectRef to another Kubernetes resource in the namespace
// of the Ingress object. If resource is specified, serviceName and servicePort
// must not be specified.
// +optional
Resource *api.TypedLocalObjectReference
}

View File

@ -30,7 +30,12 @@ import (
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HTTPIngressPath) DeepCopyInto(out *HTTPIngressPath) {
*out = *in
out.Backend = in.Backend
if in.PathType != nil {
in, out := &in.PathType, &out.PathType
*out = new(PathType)
**out = **in
}
in.Backend.DeepCopyInto(&out.Backend)
return
}
@ -50,7 +55,9 @@ func (in *HTTPIngressRuleValue) DeepCopyInto(out *HTTPIngressRuleValue) {
if in.Paths != nil {
in, out := &in.Paths, &out.Paths
*out = make([]HTTPIngressPath, len(*in))
copy(*out, *in)
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
@ -118,6 +125,11 @@ func (in *Ingress) DeepCopyObject() runtime.Object {
func (in *IngressBackend) DeepCopyInto(out *IngressBackend) {
*out = *in
out.ServicePort = in.ServicePort
if in.Resource != nil {
in, out := &in.Resource, &out.Resource
*out = new(core.TypedLocalObjectReference)
(*in).DeepCopyInto(*out)
}
return
}
@ -131,6 +143,87 @@ func (in *IngressBackend) DeepCopy() *IngressBackend {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *IngressClass) DeepCopyInto(out *IngressClass) {
*out = *in
out.TypeMeta = in.TypeMeta
in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
in.Spec.DeepCopyInto(&out.Spec)
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IngressClass.
func (in *IngressClass) DeepCopy() *IngressClass {
if in == nil {
return nil
}
out := new(IngressClass)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *IngressClass) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *IngressClassList) DeepCopyInto(out *IngressClassList) {
*out = *in
out.TypeMeta = in.TypeMeta
in.ListMeta.DeepCopyInto(&out.ListMeta)
if in.Items != nil {
in, out := &in.Items, &out.Items
*out = make([]IngressClass, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IngressClassList.
func (in *IngressClassList) DeepCopy() *IngressClassList {
if in == nil {
return nil
}
out := new(IngressClassList)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *IngressClassList) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *IngressClassSpec) DeepCopyInto(out *IngressClassSpec) {
*out = *in
if in.Parameters != nil {
in, out := &in.Parameters, &out.Parameters
*out = new(core.TypedLocalObjectReference)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IngressClassSpec.
func (in *IngressClassSpec) DeepCopy() *IngressClassSpec {
if in == nil {
return nil
}
out := new(IngressClassSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *IngressList) DeepCopyInto(out *IngressList) {
*out = *in
@ -205,10 +298,15 @@ func (in *IngressRuleValue) DeepCopy() *IngressRuleValue {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *IngressSpec) DeepCopyInto(out *IngressSpec) {
*out = *in
if in.IngressClassName != nil {
in, out := &in.IngressClassName, &out.IngressClassName
*out = new(string)
**out = **in
}
if in.Backend != nil {
in, out := &in.Backend, &out.Backend
*out = new(IngressBackend)
**out = **in
(*in).DeepCopyInto(*out)
}
if in.TLS != nil {
in, out := &in.TLS, &out.TLS

View File

@ -47,7 +47,7 @@ type PodDisruptionBudgetSpec struct {
// PodDisruptionBudgetStatus represents information about the status of a
// PodDisruptionBudget. Status may trail the actual state of a system.
type PodDisruptionBudgetStatus struct {
// Most recent generation observed when updating this PDB status. PodDisruptionsAllowed and other
// Most recent generation observed when updating this PDB status. DisruptionsAllowed and other
// status information is valid only if observedGeneration equals to PDB's object generation.
// +optional
ObservedGeneration int64
@ -67,7 +67,7 @@ type PodDisruptionBudgetStatus struct {
DisruptedPods map[string]metav1.Time
// Number of pod disruptions that are currently allowed.
PodDisruptionsAllowed int32
DisruptionsAllowed int32
// current number of healthy pods
CurrentHealthy int32

View File

@ -71,7 +71,7 @@ func ValidatePodDisruptionBudgetSpec(spec policy.PodDisruptionBudgetSpec, fldPat
// with any errors.
func ValidatePodDisruptionBudgetStatus(status policy.PodDisruptionBudgetStatus, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.PodDisruptionsAllowed), fldPath.Child("podDisruptionsAllowed"))...)
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.DisruptionsAllowed), fldPath.Child("disruptionsAllowed"))...)
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.CurrentHealthy), fldPath.Child("currentHealthy"))...)
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.DesiredHealthy), fldPath.Child("desiredHealthy"))...)
allErrs = append(allErrs, apivalidation.ValidateNonnegativeField(int64(status.ExpectedPods), fldPath.Child("expectedPods"))...)

View File

@ -11,308 +11,6 @@
"ForbiddenPrefixes": [
"k8s.io/kubernetes/pkg/client/unversioned/testclient"
]
},
{
"SelectorRegexp": "k8s[.]io/(api/|apimachinery/|apiextensions-apiserver/|apiserver/)",
"AllowedPrefixes": [
"k8s.io/api/apps/v1",
"k8s.io/api/apps/v1beta1",
"k8s.io/api/authentication/v1",
"k8s.io/api/authorization/v1beta1",
"k8s.io/api/autoscaling/v1",
"k8s.io/api/autoscaling/v2beta1",
"k8s.io/api/autoscaling/v2beta2",
"k8s.io/api/batch/v1",
"k8s.io/api/batch/v1beta1",
"k8s.io/api/certificates/v1beta1",
"k8s.io/api/core/v1",
"k8s.io/api/coordination/v1",
"k8s.io/api/discovery/v1beta1",
"k8s.io/api/extensions/v1beta1",
"k8s.io/api/policy/v1beta1",
"k8s.io/api/rbac/v1",
"k8s.io/api/storage/v1",
"k8s.io/apimachinery/pkg/api/equality",
"k8s.io/apimachinery/pkg/api/errors",
"k8s.io/apimachinery/pkg/api/meta",
"k8s.io/apimachinery/pkg/api/resource",
"k8s.io/apimachinery/pkg/apis/meta/v1",
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured",
"k8s.io/apimachinery/pkg/conversion",
"k8s.io/apimachinery/pkg/fields",
"k8s.io/apimachinery/pkg/labels",
"k8s.io/apimachinery/pkg/runtime",
"k8s.io/apimachinery/pkg/runtime/schema",
"k8s.io/apimachinery/pkg/runtime/serializer",
"k8s.io/apimachinery/pkg/types",
"k8s.io/apimachinery/pkg/util/clock",
"k8s.io/apimachinery/pkg/util/diff",
"k8s.io/apimachinery/pkg/util/errors",
"k8s.io/apimachinery/pkg/util/intstr",
"k8s.io/apimachinery/pkg/util/json",
"k8s.io/apimachinery/pkg/util/rand",
"k8s.io/apimachinery/pkg/util/runtime",
"k8s.io/apimachinery/pkg/util/sets",
"k8s.io/apimachinery/pkg/util/strategicpatch",
"k8s.io/apimachinery/pkg/util/uuid",
"k8s.io/apimachinery/pkg/util/wait",
"k8s.io/apimachinery/pkg/util/version",
"k8s.io/apimachinery/pkg/watch",
"k8s.io/apiserver/pkg/authentication/serviceaccount",
"k8s.io/apiserver/pkg/storage/names",
"k8s.io/apiserver/pkg/util/feature",
"k8s.io/apiextensions-apiserver/pkg/features",
"k8s.io/apimachinery/pkg/api/validation",
"k8s.io/apimachinery/pkg/apis/meta/internalversion",
"k8s.io/apimachinery/pkg/selection",
"k8s.io/apimachinery/pkg/util/validation",
"k8s.io/apimachinery/pkg/util/validation/field",
"k8s.io/apiserver/pkg/authentication/authenticator",
"k8s.io/apiserver/pkg/authentication/user",
"k8s.io/apiserver/pkg/features",
"k8s.io/apiserver/pkg/registry/generic",
"k8s.io/apimachinery/pkg/version",
"k8s.io/api/imagepolicy/v1alpha1",
"k8s.io/apiserver/pkg/admission",
"k8s.io/apiserver/pkg/storage",
"k8s.io/api/batch/v2alpha1",
"k8s.io/apiserver/pkg/registry/rest",
"k8s.io/api/scheduling/v1alpha1",
"k8s.io/api/admissionregistration/v1",
"k8s.io/api/admissionregistration/v1beta1",
"k8s.io/api/authorization/v1",
"k8s.io/api/settings/v1alpha1",
"k8s.io/api/admission/v1beta1",
"k8s.io/api/admission/v1",
"k8s.io/api/networking/v1",
"k8s.io/component-base/config",
"k8s.io/component-base/config/v1alpha1",
"k8s.io/api/scheduling/v1"
]
},
{
"SelectorRegexp": "github[.]com/",
"AllowedPrefixes": [
"github.com/cloudflare/cfssl/config",
"github.com/cloudflare/cfssl/helpers",
"github.com/cloudflare/cfssl/signer",
"github.com/cloudflare/cfssl/signer/local",
"github.com/davecgh/go-spew/spew",
"github.com/docker/distribution/reference",
"github.com/evanphx/json-patch",
"github.com/golang/groupcache/lru",
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud",
"github.com/google/gofuzz",
"github.com/prometheus/client_golang/prometheus",
"github.com/robfig/cron",
"github.com/spf13/pflag",
"github.com/stretchr/testify/assert",
"github.com/stretchr/testify/mock",
"github.com/stretchr/testify/require",
"github.com/google/gofuzz",
"github.com/golang/protobuf/ptypes/wrappers",
"github.com/golang/protobuf/proto",
"github.com/container-storage-interface/spec/lib/go/csi"
]
},
{
"SelectorRegexp": "k8s[.]io/client-go/",
"AllowedPrefixes": [
"k8s.io/client-go/util/keyutil",
"k8s.io/client-go/discovery",
"k8s.io/client-go/dynamic",
"k8s.io/client-go/informers",
"k8s.io/client-go/informers/apps/v1",
"k8s.io/client-go/informers/apps/v1beta1",
"k8s.io/client-go/informers/autoscaling/v1",
"k8s.io/client-go/informers/batch/v1",
"k8s.io/client-go/informers/certificates/v1beta1",
"k8s.io/client-go/informers/core/v1",
"k8s.io/client-go/informers/extensions/v1beta1",
"k8s.io/client-go/informers/policy/v1beta1",
"k8s.io/client-go/informers/rbac/v1",
"k8s.io/client-go/informers/storage/v1",
"k8s.io/client-go/kubernetes",
"k8s.io/client-go/kubernetes/fake",
"k8s.io/client-go/kubernetes/scheme",
"k8s.io/client-go/kubernetes/typed/apps/v1",
"k8s.io/client-go/kubernetes/typed/authentication/v1",
"k8s.io/client-go/kubernetes/typed/autoscaling/v1",
"k8s.io/client-go/kubernetes/typed/certificates/v1beta1",
"k8s.io/client-go/kubernetes/typed/core/v1",
"k8s.io/client-go/kubernetes/typed/policy/v1beta1",
"k8s.io/client-go/kubernetes/typed/rbac/v1",
"k8s.io/client-go/listers/apps/v1",
"k8s.io/client-go/listers/apps/v1beta1",
"k8s.io/client-go/listers/autoscaling/v1",
"k8s.io/client-go/listers/batch/v1",
"k8s.io/client-go/listers/certificates/v1beta1",
"k8s.io/client-go/listers/coordination/v1",
"k8s.io/client-go/listers/core/v1",
"k8s.io/client-go/listers/discovery/v1alpha1",
"k8s.io/client-go/listers/discovery/v1beta1",
"k8s.io/client-go/listers/coordination/v1",
"k8s.io/client-go/listers/extensions/v1beta1",
"k8s.io/client-go/listers/policy/v1beta1",
"k8s.io/client-go/listers/rbac/v1",
"k8s.io/client-go/listers/storage/v1",
"k8s.io/client-go/metadata",
"k8s.io/client-go/pkg/version",
"k8s.io/client-go/rest",
"k8s.io/client-go/scale",
"k8s.io/client-go/testing",
"k8s.io/client-go/tools/cache",
"k8s.io/client-go/tools/leaderelection/resourcelock",
"k8s.io/client-go/tools/pager",
"k8s.io/client-go/tools/record",
"k8s.io/client-go/tools/reference",
"k8s.io/client-go/tools/watch",
"k8s.io/client-go/transport",
"k8s.io/client-go/util/cert",
"k8s.io/client-go/util/flowcontrol",
"k8s.io/client-go/util/retry",
"k8s.io/client-go/util/testing",
"k8s.io/client-go/util/workqueue"
]
},
{
"SelectorRegexp": "k8s[.]io/kubernetes/pkg",
"AllowedPrefixes": [
"k8s.io/kubernetes/pkg/api/legacyscheme",
"k8s.io/kubernetes/pkg/api/v1/endpoints",
"k8s.io/kubernetes/pkg/api/v1/node",
"k8s.io/kubernetes/pkg/api/v1/pod",
"k8s.io/kubernetes/pkg/apis/apps/v1",
"k8s.io/kubernetes/pkg/apis/autoscaling",
"k8s.io/kubernetes/pkg/apis/certificates/v1beta1",
"k8s.io/kubernetes/pkg/apis/core",
"k8s.io/kubernetes/pkg/apis/core/helper",
"k8s.io/kubernetes/pkg/apis/core/install",
"k8s.io/kubernetes/pkg/apis/core/v1",
"k8s.io/kubernetes/pkg/apis/core/v1/helper",
"k8s.io/kubernetes/pkg/apis/core/validation",
"k8s.io/kubernetes/pkg/apis/discovery",
"k8s.io/kubernetes/pkg/apis/discovery/validation",
"k8s.io/kubernetes/pkg/cloudprovider",
"k8s.io/kubernetes/pkg/cloudprovider/providers/gce",
"k8s.io/kubernetes/pkg/controller",
"k8s.io/kubernetes/pkg/controller/apis/config",
"k8s.io/kubernetes/pkg/controller/apis/config/v1alpha1",
"k8s.io/kubernetes/pkg/controller/certificates",
"k8s.io/kubernetes/pkg/controller/daemon",
"k8s.io/kubernetes/pkg/controller/daemon/util",
"k8s.io/kubernetes/pkg/controller/deployment",
"k8s.io/kubernetes/pkg/controller/deployment/util",
"k8s.io/kubernetes/pkg/controller/garbagecollector",
"k8s.io/kubernetes/pkg/controller/garbagecollector/metaonly",
"k8s.io/kubernetes/pkg/controller/history",
"k8s.io/kubernetes/pkg/controller/job",
"k8s.io/kubernetes/pkg/controller/namespace",
"k8s.io/kubernetes/pkg/controller/namespace/deletion",
"k8s.io/kubernetes/pkg/controller/nodeipam",
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam",
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/cidrset",
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/sync",
"k8s.io/kubernetes/pkg/controller/nodelifecycle",
"k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler",
"k8s.io/kubernetes/pkg/controller/podautoscaler",
"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics",
"k8s.io/kubernetes/pkg/controller/replicaset",
"k8s.io/kubernetes/pkg/controller/util/node",
"k8s.io/kubernetes/pkg/controller/volume/attachdetach",
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/cache",
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/metrics",
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/populator",
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/reconciler",
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater",
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/util",
"k8s.io/kubernetes/pkg/controller/volume/events",
"k8s.io/kubernetes/pkg/controller/volume/expand",
"k8s.io/kubernetes/pkg/controller/volume/expand/cache",
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume",
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume/metrics",
"k8s.io/kubernetes/pkg/features",
"k8s.io/kubernetes/pkg/kubectl/scheme",
"k8s.io/kubernetes/pkg/kubelet/apis",
"k8s.io/kubernetes/pkg/kubelet/events",
"k8s.io/kubernetes/pkg/kubelet/types",
"k8s.io/kubernetes/pkg/kubelet/util/format",
"k8s.io/kubernetes/pkg/quota",
"k8s.io/kubernetes/pkg/registry/core/secret",
"k8s.io/kubernetes/pkg/scheduler/algorithm",
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates",
"k8s.io/kubernetes/pkg/scheduler/nodeinfo",
"k8s.io/kubernetes/pkg/serviceaccount",
"k8s.io/kubernetes/pkg/util/goroutinemap",
"k8s.io/kubernetes/pkg/util/goroutinemap/exponentialbackoff",
"k8s.io/kubernetes/pkg/util/hash",
"k8s.io/kubernetes/pkg/util/labels",
"k8s.io/kubernetes/pkg/util/node",
"k8s.io/kubernetes/pkg/util/slice",
"k8s.io/kubernetes/pkg/util/taints",
"k8s.io/kubernetes/pkg/volume",
"k8s.io/kubernetes/pkg/volume/util",
"k8s.io/kubernetes/pkg/volume/util/operationexecutor",
"k8s.io/kubernetes/pkg/volume/util/recyclerclient",
"k8s.io/kubernetes/pkg/volume/util/subpath",
"k8s.io/kubernetes/pkg/volume/util/types",
"k8s.io/kubernetes/pkg/volume/util/volumepathhandler",
"k8s.io/kubernetes/pkg/api/service",
"k8s.io/kubernetes/pkg/apis/scheduling",
"k8s.io/kubernetes/pkg/capabilities",
"k8s.io/kubernetes/pkg/master/ports",
"k8s.io/kubernetes/pkg/scheduler/api",
"k8s.io/kubernetes/pkg/scheduler/util",
"k8s.io/kubernetes/pkg/scheduler/listers",
"k8s.io/kubernetes/pkg/security/apparmor",
"k8s.io/kubernetes/pkg/util/parsers",
"k8s.io/kubernetes/pkg/fieldpath",
"k8s.io/kubernetes/pkg/scheduler/volumebinder",
"k8s.io/kubernetes/pkg/util/resizefs",
"k8s.io/kubernetes/pkg/apis/apps",
"k8s.io/kubernetes/pkg/scheduler/metrics"
]
},
{
"SelectorRegexp": "k8s[.]io/(metrics/|utils/|heapster/|kube-controller-manager/)",
"AllowedPrefixes": [
"k8s.io/heapster/metrics/api/v1/types",
"k8s.io/kube-controller-manager/config/v1alpha1",
"k8s.io/metrics/pkg/apis/custom_metrics/v1beta2",
"k8s.io/metrics/pkg/apis/external_metrics/v1beta1",
"k8s.io/metrics/pkg/apis/metrics/v1alpha1",
"k8s.io/metrics/pkg/apis/metrics/v1beta1",
"k8s.io/metrics/pkg/client/clientset/versioned/scheme",
"k8s.io/metrics/pkg/client/clientset/versioned/typed/metrics/v1beta1",
"k8s.io/metrics/pkg/client/custom_metrics",
"k8s.io/metrics/pkg/client/external_metrics",
"k8s.io/utils/exec",
"k8s.io/utils/integer",
"k8s.io/utils/io",
"k8s.io/utils/mount",
"k8s.io/utils/net",
"k8s.io/utils/nsenter",
"k8s.io/utils/path",
"k8s.io/utils/pointer",
"k8s.io/utils/strings"
]
},
{
"SelectorRegexp": "golang[.]org/",
"AllowedPrefixes": [
"golang.org/x/time/rate",
"golang.org/x/sys/unix",
"golang.org/x/oauth2",
"golang.org/x/net/context",
"google.golang.org/api/compute/v1",
"google.golang.org/api/googleapi",
"google.golang.org/api/compute/v0.alpha",
"google.golang.org/api/container/v1",
"google.golang.org/api/compute/v0.beta",
"google.golang.org/api/tpu/v1",
"golang.org/x/net/context",
"google.golang.org/grpc"
]
}
]
}

View File

@ -14,7 +14,6 @@ go_test(
],
embed = [":go_default_library"],
deps = [
"//pkg/api/testapi:go_default_library",
"//pkg/apis/core/install:go_default_library",
"//pkg/controller/testutil:go_default_library",
"//pkg/securitycontext:go_default_library",
@ -31,6 +30,7 @@ go_test(
"//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/scheme:go_default_library",
"//staging/src/k8s.io/client-go/rest:go_default_library",
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
"//staging/src/k8s.io/client-go/tools/record:go_default_library",

View File

@ -22,7 +22,7 @@ import (
"time"
v1authenticationapi "k8s.io/api/authentication/v1"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
@ -120,11 +120,11 @@ func (b SAControllerClientBuilder) Config(name string) (*restclient.Config, erro
lw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = fieldSelector
return b.CoreClient.Secrets(b.Namespace).List(options)
return b.CoreClient.Secrets(b.Namespace).List(context.TODO(), options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = fieldSelector
return b.CoreClient.Secrets(b.Namespace).Watch(options)
return b.CoreClient.Secrets(b.Namespace).Watch(context.TODO(), options)
},
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
@ -157,7 +157,7 @@ func (b SAControllerClientBuilder) Config(name string) (*restclient.Config, erro
if !valid {
klog.Warningf("secret %s contained an invalid API token for %s/%s", secret.Name, sa.Namespace, sa.Name)
// try to delete the secret containing the invalid token
if err := b.CoreClient.Secrets(secret.Namespace).Delete(secret.Name, &metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) {
if err := b.CoreClient.Secrets(secret.Namespace).Delete(context.TODO(), secret.Name, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) {
klog.Warningf("error deleting secret %s containing invalid API token for %s/%s: %v", secret.Name, sa.Namespace, sa.Name, err)
}
// continue watching for good tokens
@ -186,7 +186,7 @@ func (b SAControllerClientBuilder) getAuthenticatedConfig(sa *v1.ServiceAccount,
// Try token review first
tokenReview := &v1authenticationapi.TokenReview{Spec: v1authenticationapi.TokenReviewSpec{Token: token}}
if tokenResult, err := b.AuthenticationClient.TokenReviews().Create(tokenReview); err == nil {
if tokenResult, err := b.AuthenticationClient.TokenReviews().Create(context.TODO(), tokenReview, metav1.CreateOptions{}); err == nil {
if !tokenResult.Status.Authenticated {
klog.Warningf("Token for %s/%s did not authenticate correctly", sa.Namespace, sa.Name)
return nil, false, nil
@ -207,7 +207,7 @@ func (b SAControllerClientBuilder) getAuthenticatedConfig(sa *v1.ServiceAccount,
if err != nil {
return nil, false, err
}
err = client.Get().AbsPath("/apis").Do().Error()
err = client.Get().AbsPath("/apis").Do(context.TODO()).Error()
if apierrors.IsUnauthorized(err) {
klog.Warningf("Token for %s/%s did not authenticate correctly: %v", sa.Namespace, sa.Name, err)
return nil, false, nil

View File

@ -17,6 +17,7 @@ limitations under the License.
package controller
import (
"context"
"fmt"
"net/http"
"sync"
@ -25,6 +26,7 @@ import (
"golang.org/x/oauth2"
v1authenticationapi "k8s.io/api/authentication/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/clock"
"k8s.io/apimachinery/pkg/util/wait"
apiserverserviceaccount "k8s.io/apiserver/pkg/authentication/serviceaccount"
@ -174,11 +176,11 @@ func (ts *tokenSourceImpl) Token() (*oauth2.Token, error) {
return false, nil
}
tr, inErr := ts.coreClient.ServiceAccounts(ts.namespace).CreateToken(ts.serviceAccountName, &v1authenticationapi.TokenRequest{
tr, inErr := ts.coreClient.ServiceAccounts(ts.namespace).CreateToken(context.TODO(), ts.serviceAccountName, &v1authenticationapi.TokenRequest{
Spec: v1authenticationapi.TokenRequestSpec{
ExpirationSeconds: utilpointer.Int64Ptr(ts.expirationSeconds),
},
})
}, metav1.CreateOptions{})
if inErr != nil {
klog.Warningf("get token failed: %v", inErr)
return false, nil

View File

@ -17,6 +17,7 @@ limitations under the License.
package controller
import (
"context"
"encoding/binary"
"encoding/json"
"fmt"
@ -419,7 +420,7 @@ type RealRSControl struct {
var _ RSControlInterface = &RealRSControl{}
func (r RealRSControl) PatchReplicaSet(namespace, name string, data []byte) error {
_, err := r.KubeClient.AppsV1().ReplicaSets(namespace).Patch(name, types.StrategicMergePatchType, data)
_, err := r.KubeClient.AppsV1().ReplicaSets(namespace).Patch(context.TODO(), name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
return err
}
@ -439,7 +440,7 @@ type RealControllerRevisionControl struct {
var _ ControllerRevisionControlInterface = &RealControllerRevisionControl{}
func (r RealControllerRevisionControl) PatchControllerRevision(namespace, name string, data []byte) error {
_, err := r.KubeClient.AppsV1().ControllerRevisions(namespace).Patch(name, types.StrategicMergePatchType, data)
_, err := r.KubeClient.AppsV1().ControllerRevisions(namespace).Patch(context.TODO(), name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
return err
}
@ -536,7 +537,7 @@ func (r RealPodControl) CreatePodsOnNode(nodeName, namespace string, template *v
}
func (r RealPodControl) PatchPod(namespace, name string, data []byte) error {
_, err := r.KubeClient.CoreV1().Pods(namespace).Patch(name, types.StrategicMergePatchType, data)
_, err := r.KubeClient.CoreV1().Pods(namespace).Patch(context.TODO(), name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
return err
}
@ -576,7 +577,7 @@ func (r RealPodControl) createPods(nodeName, namespace string, template *v1.PodT
if len(labels.Set(pod.Labels)) == 0 {
return fmt.Errorf("unable to create pods, no labels")
}
newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(pod)
newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(context.TODO(), pod, metav1.CreateOptions{})
if err != nil {
// only send an event if the namespace isn't terminating
if !apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
@ -601,7 +602,7 @@ func (r RealPodControl) DeletePod(namespace string, podID string, object runtime
return fmt.Errorf("object does not have ObjectMeta, %v", err)
}
klog.V(2).Infof("Controller %v deleting pod %v/%v", accessor.GetName(), namespace, podID)
if err := r.KubeClient.CoreV1().Pods(namespace).Delete(podID, nil); err != nil && !apierrors.IsNotFound(err) {
if err := r.KubeClient.CoreV1().Pods(namespace).Delete(context.TODO(), podID, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) {
r.Recorder.Eventf(object, v1.EventTypeWarning, FailedDeletePodReason, "Error deleting: %v", err)
return fmt.Errorf("unable to delete pods: %v", err)
}
@ -1013,10 +1014,10 @@ func AddOrUpdateTaintOnNode(c clientset.Interface, nodeName string, taints ...*v
// First we try getting node from the API server cache, as it's cheaper. If it fails
// we get it from etcd to be sure to have fresh data.
if firstTry {
oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{ResourceVersion: "0"})
oldNode, err = c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{ResourceVersion: "0"})
firstTry = false
} else {
oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
oldNode, err = c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
}
if err != nil {
return err
@ -1070,10 +1071,10 @@ func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, t
// First we try getting node from the API server cache, as it's cheaper. If it fails
// we get it from etcd to be sure to have fresh data.
if firstTry {
oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{ResourceVersion: "0"})
oldNode, err = c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{ResourceVersion: "0"})
firstTry = false
} else {
oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
oldNode, err = c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
}
if err != nil {
return err
@ -1118,7 +1119,7 @@ func PatchNodeTaints(c clientset.Interface, nodeName string, oldNode *v1.Node, n
return fmt.Errorf("failed to create patch for node %q: %v", nodeName, err)
}
_, err = c.CoreV1().Nodes().Patch(nodeName, types.StrategicMergePatchType, patchBytes)
_, err = c.CoreV1().Nodes().Patch(context.TODO(), nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{})
return err
}
@ -1147,10 +1148,10 @@ func AddOrUpdateLabelsOnNode(kubeClient clientset.Interface, nodeName string, la
// First we try getting node from the API server cache, as it's cheaper. If it fails
// we get it from etcd to be sure to have fresh data.
if firstTry {
node, err = kubeClient.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{ResourceVersion: "0"})
node, err = kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{ResourceVersion: "0"})
firstTry = false
} else {
node, err = kubeClient.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
node, err = kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
}
if err != nil {
return err
@ -1177,7 +1178,7 @@ func AddOrUpdateLabelsOnNode(kubeClient clientset.Interface, nodeName string, la
if err != nil {
return fmt.Errorf("failed to create a two-way merge patch: %v", err)
}
if _, err := kubeClient.CoreV1().Nodes().Patch(node.Name, types.StrategicMergePatchType, patchBytes); err != nil {
if _, err := kubeClient.CoreV1().Nodes().Patch(context.TODO(), node.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}); err != nil {
return fmt.Errorf("failed to patch the node: %v", err)
}
return nil
@ -1185,7 +1186,7 @@ func AddOrUpdateLabelsOnNode(kubeClient clientset.Interface, nodeName string, la
}
func getOrCreateServiceAccount(coreClient v1core.CoreV1Interface, namespace, name string) (*v1.ServiceAccount, error) {
sa, err := coreClient.ServiceAccounts(namespace).Get(name, metav1.GetOptions{})
sa, err := coreClient.ServiceAccounts(namespace).Get(context.TODO(), name, metav1.GetOptions{})
if err == nil {
return sa, nil
}
@ -1195,17 +1196,17 @@ func getOrCreateServiceAccount(coreClient v1core.CoreV1Interface, namespace, nam
// Create the namespace if we can't verify it exists.
// Tolerate errors, since we don't know whether this component has namespace creation permissions.
if _, err := coreClient.Namespaces().Get(namespace, metav1.GetOptions{}); apierrors.IsNotFound(err) {
if _, err = coreClient.Namespaces().Create(&v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}}); err != nil && !apierrors.IsAlreadyExists(err) {
if _, err := coreClient.Namespaces().Get(context.TODO(), namespace, metav1.GetOptions{}); apierrors.IsNotFound(err) {
if _, err = coreClient.Namespaces().Create(context.TODO(), &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}}, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) {
klog.Warningf("create non-exist namespace %s failed:%v", namespace, err)
}
}
// Create the service account
sa, err = coreClient.ServiceAccounts(namespace).Create(&v1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}})
sa, err = coreClient.ServiceAccounts(namespace).Create(context.TODO(), &v1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}}, metav1.CreateOptions{})
if apierrors.IsAlreadyExists(err) {
// If we're racing to init and someone else already created it, re-fetch
return coreClient.ServiceAccounts(namespace).Get(name, metav1.GetOptions{})
return coreClient.ServiceAccounts(namespace).Get(context.TODO(), name, metav1.GetOptions{})
}
return sa, err
}

View File

@ -18,11 +18,13 @@ go_library(
"//staging/src/k8s.io/apimachinery/pkg/api/equality:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/meta:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/typed/apps/v1:go_default_library",
"//staging/src/k8s.io/client-go/listers/apps/v1:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/utils/integer:go_default_library",
],
@ -46,6 +48,7 @@ go_test(
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/storage/names:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
"//staging/src/k8s.io/client-go/testing:go_default_library",
],

View File

@ -17,6 +17,7 @@ limitations under the License.
package util
import (
"context"
"fmt"
"math"
"sort"
@ -24,18 +25,19 @@ import (
"strings"
"time"
"k8s.io/klog"
apps "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
intstrutil "k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
appsclient "k8s.io/client-go/kubernetes/typed/apps/v1"
appslisters "k8s.io/client-go/listers/apps/v1"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/controller"
labelsutil "k8s.io/kubernetes/pkg/util/labels"
"k8s.io/utils/integer"
@ -544,7 +546,7 @@ func GetNewReplicaSet(deployment *apps.Deployment, c appsclient.AppsV1Interface)
// RsListFromClient returns an rsListFunc that wraps the given client.
func RsListFromClient(c appsclient.AppsV1Interface) RsListFunc {
return func(namespace string, options metav1.ListOptions) ([]*apps.ReplicaSet, error) {
rsList, err := c.ReplicaSets(namespace).List(options)
rsList, err := c.ReplicaSets(namespace).List(context.TODO(), options)
if err != nil {
return nil, err
}
@ -912,3 +914,38 @@ func HasProgressDeadline(d *apps.Deployment) bool {
func HasRevisionHistoryLimit(d *apps.Deployment) bool {
return d.Spec.RevisionHistoryLimit != nil && *d.Spec.RevisionHistoryLimit != math.MaxInt32
}
// GetDeploymentsForReplicaSet returns a list of Deployments that potentially
// match a ReplicaSet. Only the one specified in the ReplicaSet's ControllerRef
// will actually manage it.
// Returns an error only if no matching Deployments are found.
func GetDeploymentsForReplicaSet(deploymentLister appslisters.DeploymentLister, rs *apps.ReplicaSet) ([]*apps.Deployment, error) {
if len(rs.Labels) == 0 {
return nil, fmt.Errorf("no deployments found for ReplicaSet %v because it has no labels", rs.Name)
}
// TODO: MODIFY THIS METHOD so that it checks for the podTemplateSpecHash label
dList, err := deploymentLister.Deployments(rs.Namespace).List(labels.Everything())
if err != nil {
return nil, err
}
var deployments []*apps.Deployment
for _, d := range dList {
selector, err := metav1.LabelSelectorAsSelector(d.Spec.Selector)
if err != nil {
return nil, fmt.Errorf("invalid label selector: %v", err)
}
// If a deployment with a nil or empty selector creeps in, it should match nothing, not everything.
if selector.Empty() || !selector.Matches(labels.Set(rs.Labels)) {
continue
}
deployments = append(deployments, d)
}
if len(deployments) == 0 {
return nil, fmt.Errorf("could not find deployments set for ReplicaSet %s in namespace %s with labels: %v", rs.Name, rs.Namespace, rs.Labels)
}
return deployments, nil
}

View File

@ -22,6 +22,7 @@ limitations under the License.
package nodelifecycle
import (
"context"
"fmt"
"strings"
"sync"
@ -128,7 +129,7 @@ const (
retrySleepTime = 20 * time.Millisecond
nodeNameKeyIndex = "spec.nodeName"
// podUpdateWorkerSizes assumes that in most cases pod will be handled by monitorNodeHealth pass.
// Pod update workes will only handle lagging cache pods. 4 workes should be enough.
// Pod update workers will only handle lagging cache pods. 4 workers should be enough.
podUpdateWorkerSize = 4
)
@ -350,10 +351,6 @@ type Controller struct {
// tainted nodes, if they're not tolerated.
runTaintManager bool
// if set to true Controller will taint Nodes with 'TaintNodeNotReady' and 'TaintNodeUnreachable'
// taints instead of evicting Pods itself.
useTaintBasedEvictions bool
nodeUpdateQueue workqueue.Interface
podUpdateQueue workqueue.RateLimitingInterface
}
@ -374,7 +371,6 @@ func NewNodeLifecycleController(
largeClusterThreshold int32,
unhealthyZoneThreshold float32,
runTaintManager bool,
useTaintBasedEvictions bool,
) (*Controller, error) {
if kubeClient == nil {
@ -415,13 +411,9 @@ func NewNodeLifecycleController(
largeClusterThreshold: largeClusterThreshold,
unhealthyZoneThreshold: unhealthyZoneThreshold,
runTaintManager: runTaintManager,
useTaintBasedEvictions: useTaintBasedEvictions && runTaintManager,
nodeUpdateQueue: workqueue.NewNamed("node_lifecycle_controller"),
podUpdateQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node_lifecycle_controller_pods"),
}
if useTaintBasedEvictions {
klog.Infof("Controller is using taint based evictions.")
}
nc.enterPartialDisruptionFunc = nc.ReducedQPSFunc
nc.enterFullDisruptionFunc = nc.HealthyQPSFunc
@ -579,7 +571,7 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
go wait.Until(nc.doPodProcessingWorker, time.Second, stopCh)
}
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
// Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated
// taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints.
go wait.Until(nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod, stopCh)
@ -767,9 +759,7 @@ func (nc *Controller) doEvictionPass() {
// monitorNodeHealth verifies node health are constantly updated by kubelet, and
// if not, post "NodeReady==ConditionUnknown".
// For nodes who are not ready or not reachable for a long period of time.
// This function will taint them if TaintBasedEvictions feature was enabled.
// Otherwise, it would evict it directly.
// This function will taint nodes who are not ready or not reachable for a long period of time.
func (nc *Controller) monitorNodeHealth() error {
// We are listing nodes from local cache as we can tolerate some small delays
// comparing to state from etcd and there is eventual consistency anyway.
@ -788,7 +778,7 @@ func (nc *Controller) monitorNodeHealth() error {
nodeutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name))
nc.knownNodeSet[added[i].Name] = added[i]
nc.addPodEvictorForNewZone(added[i])
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.markNodeAsReachable(added[i])
} else {
nc.cancelPodEviction(added[i])
@ -813,7 +803,7 @@ func (nc *Controller) monitorNodeHealth() error {
return true, nil
}
name := node.Name
node, err = nc.kubeClient.CoreV1().Nodes().Get(name, metav1.GetOptions{})
node, err = nc.kubeClient.CoreV1().Nodes().Get(context.TODO(), name, metav1.GetOptions{})
if err != nil {
klog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name)
return false, err
@ -842,7 +832,7 @@ func (nc *Controller) monitorNodeHealth() error {
}
continue
}
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.processTaintBaseEviction(node, &observedReadyCondition)
} else {
if err := nc.processNoTaintBaseEviction(node, &observedReadyCondition, gracePeriod, pods); err != nil {
@ -893,7 +883,7 @@ func (nc *Controller) processTaintBaseEviction(node *v1.Node, observedReadyCondi
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
taintToAdd := *UnreachableTaintTemplate
if !nodeutil.SwapNodeControllerTaint(nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
klog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
klog.Errorf("Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle.")
}
} else if nc.markNodeForTainting(node) {
klog.V(2).Infof("Node %v is unresponsive as of %v. Adding it to the Taint queue.",
@ -1148,7 +1138,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
_, currentReadyCondition = nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)
if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) {
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(node); err != nil {
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(context.TODO(), node, metav1.UpdateOptions{}); err != nil {
klog.Errorf("Error updating node %s: %v", node.Name, err)
return gracePeriod, observedReadyCondition, currentReadyCondition, err
}
@ -1208,7 +1198,7 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
if allAreFullyDisrupted {
klog.V(0).Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.")
for i := range nodes {
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
_, err := nc.markNodeAsReachable(nodes[i])
if err != nil {
klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
@ -1219,7 +1209,7 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
}
// We stop all evictions.
for k := range nc.zoneStates {
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[k].SwapLimiter(0)
} else {
nc.zonePodEvictor[k].SwapLimiter(0)
@ -1331,7 +1321,7 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
pods := []*v1.Pod{pod}
// In taint-based eviction mode, only node updates are processed by NodeLifecycleController.
// Pods are processed by TaintManager.
if !nc.useTaintBasedEvictions {
if !nc.runTaintManager {
if err := nc.processNoTaintBaseEviction(node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err)
nc.podUpdateQueue.AddRateLimited(podItem)
@ -1350,13 +1340,13 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
func (nc *Controller) setLimiterInZone(zone string, zoneSize int, state ZoneState) {
switch state {
case stateNormal:
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(nc.evictionLimiterQPS)
} else {
nc.zonePodEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
}
case statePartialDisruption:
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize))
} else {
@ -1364,7 +1354,7 @@ func (nc *Controller) setLimiterInZone(zone string, zoneSize int, state ZoneStat
nc.enterPartialDisruptionFunc(zoneSize))
}
case stateFullDisruption:
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(
nc.enterFullDisruptionFunc(zoneSize))
} else {
@ -1430,7 +1420,7 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
zone := utilnode.GetZoneKey(node)
if _, found := nc.zoneStates[zone]; !found {
nc.zoneStates[zone] = stateInitial
if !nc.useTaintBasedEvictions {
if !nc.runTaintManager {
nc.zonePodEvictor[zone] =
scheduler.NewRateLimitedTimedQueue(
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))

View File

@ -17,6 +17,7 @@ limitations under the License.
package scheduler
import (
"context"
"fmt"
"hash/fnv"
"io"
@ -108,7 +109,7 @@ func deletePodHandler(c clientset.Interface, emitEventFunc func(types.Namespaced
}
var err error
for i := 0; i < retries; i++ {
err = c.CoreV1().Pods(ns).Delete(name, &metav1.DeleteOptions{})
err = c.CoreV1().Pods(ns).Delete(context.TODO(), name, metav1.DeleteOptions{})
if err == nil {
break
}

View File

@ -137,7 +137,7 @@ func (q *TimedWorkerQueue) CancelWork(key string) bool {
}
// GetWorkerUnsafe returns a TimedWorker corresponding to the given key.
// Unsafe method - workers have attached goroutines which can fire afater this function is called.
// Unsafe method - workers have attached goroutines which can fire after this function is called.
func (q *TimedWorkerQueue) GetWorkerUnsafe(key string) *TimedWorker {
q.Lock()
defer q.Unlock()

View File

@ -17,6 +17,7 @@ limitations under the License.
package node
import (
"context"
"fmt"
"strings"
@ -79,7 +80,7 @@ func DeletePods(kubeClient clientset.Interface, pods []*v1.Pod, recorder record.
klog.V(2).Infof("Starting deletion of pod %v/%v", pod.Namespace, pod.Name)
recorder.Eventf(pod, v1.EventTypeNormal, "NodeControllerEviction", "Marking for deletion Pod %s from Node %s", pod.Name, nodeName)
if err := kubeClient.CoreV1().Pods(pod.Namespace).Delete(pod.Name, nil); err != nil {
if err := kubeClient.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{}); err != nil {
if apierrors.IsNotFound(err) {
// NotFound error means that pod was already deleted.
// There is nothing left to do with this pod.
@ -109,7 +110,7 @@ func SetPodTerminationReason(kubeClient clientset.Interface, pod *v1.Pod, nodeNa
var updatedPod *v1.Pod
var err error
if updatedPod, err = kubeClient.CoreV1().Pods(pod.Namespace).UpdateStatus(pod); err != nil {
if updatedPod, err = kubeClient.CoreV1().Pods(pod.Namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{}); err != nil {
return nil, err
}
return updatedPod, nil
@ -136,7 +137,7 @@ func MarkPodsNotReady(kubeClient clientset.Interface, pods []*v1.Pod, nodeName s
break
}
klog.V(2).Infof("Updating ready status of pod %v to false", pod.Name)
_, err := kubeClient.CoreV1().Pods(pod.Namespace).UpdateStatus(pod)
_, err := kubeClient.CoreV1().Pods(pod.Namespace).UpdateStatus(context.TODO(), pod, metav1.UpdateOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
// NotFound error means that pod was already deleted.
@ -159,10 +160,11 @@ func MarkPodsNotReady(kubeClient clientset.Interface, pods []*v1.Pod, nodeName s
// RecordNodeEvent records a event related to a node.
func RecordNodeEvent(recorder record.EventRecorder, nodeName, nodeUID, eventtype, reason, event string) {
ref := &v1.ObjectReference{
Kind: "Node",
Name: nodeName,
UID: types.UID(nodeUID),
Namespace: "",
APIVersion: "v1",
Kind: "Node",
Name: nodeName,
UID: types.UID(nodeUID),
Namespace: "",
}
klog.V(2).Infof("Recording %s event message for node %s", event, nodeName)
recorder.Eventf(ref, eventtype, reason, "Node %s event: %s", nodeName, event)
@ -171,10 +173,11 @@ func RecordNodeEvent(recorder record.EventRecorder, nodeName, nodeUID, eventtype
// RecordNodeStatusChange records a event related to a node status change. (Common to lifecycle and ipam)
func RecordNodeStatusChange(recorder record.EventRecorder, node *v1.Node, newStatus string) {
ref := &v1.ObjectReference{
Kind: "Node",
Name: node.Name,
UID: node.UID,
Namespace: "",
APIVersion: "v1",
Kind: "Node",
Name: node.Name,
UID: node.UID,
Namespace: "",
}
klog.V(2).Infof("Recording status change %s event message for node %s", newStatus, node.Name)
// TODO: This requires a transaction, either both node status is updated

View File

@ -64,6 +64,13 @@ const (
// recognize dynamically provisioned PVs in its decisions).
AnnDynamicallyProvisioned = "pv.kubernetes.io/provisioned-by"
// AnnMigratedTo annotation is added to a PVC and PV that is supposed to be
// dynamically provisioned/deleted by by its corresponding CSI driver
// through the CSIMigration feature flags. When this annotation is set the
// Kubernetes components will "stand-down" and the external-provisioner will
// act on the objects
AnnMigratedTo = "pv.kubernetes.io/migrated-to"
// AnnStorageProvisioner annotation is added to a PVC that is supposed to be dynamically
// provisioned. Its value is name of volume plugin that is supposed to provision
// a volume for this PVC.
@ -204,9 +211,14 @@ func FindMatchingVolume(
// Skip volumes in the excluded list
continue
}
if volume.Spec.ClaimRef != nil && !IsVolumeBoundToClaim(volume, claim) {
continue
}
volumeQty := volume.Spec.Capacity[v1.ResourceStorage]
if volumeQty.Cmp(requestedQty) < 0 {
continue
}
// filter out mismatching volumeModes
if CheckVolumeModeMismatches(&claim.Spec, &volume.Spec) {
continue
@ -223,6 +235,8 @@ func FindMatchingVolume(
if node != nil {
// Scheduler path, check that the PV NodeAffinity
// is satisfied by the node
// volumeutil.CheckNodeAffinity is the most expensive call in this loop.
// We should check cheaper conditions first or consider optimizing this function.
err := volumeutil.CheckNodeAffinity(volume, node.Labels)
if err != nil {
nodeAffinityValid = false
@ -230,13 +244,6 @@ func FindMatchingVolume(
}
if IsVolumeBoundToClaim(volume, claim) {
// this claim and volume are pre-bound; return
// the volume if the size request is satisfied,
// otherwise continue searching for a match
if volumeQty.Cmp(requestedQty) < 0 {
continue
}
// If PV node affinity is invalid, return no match.
// This means the prebound PV (and therefore PVC)
// is not suitable for this node.
@ -256,7 +263,6 @@ func FindMatchingVolume(
// filter out:
// - volumes in non-available phase
// - volumes bound to another claim
// - volumes whose labels don't match the claim's selector, if specified
// - volumes in Class that is not requested
// - volumes whose NodeAffinity does not match the node
@ -266,8 +272,6 @@ func FindMatchingVolume(
// them now has high chance of encountering unnecessary failures
// due to API conflicts.
continue
} else if volume.Spec.ClaimRef != nil {
continue
} else if selector != nil && !selector.Matches(labels.Set(volume.Labels)) {
continue
}
@ -286,11 +290,9 @@ func FindMatchingVolume(
}
}
if volumeQty.Cmp(requestedQty) >= 0 {
if smallestVolume == nil || smallestVolumeQty.Cmp(volumeQty) > 0 {
smallestVolume = volume
smallestVolumeQty = volumeQty
}
if smallestVolume == nil || smallestVolumeQty.Cmp(volumeQty) > 0 {
smallestVolume = volume
smallestVolumeQty = volumeQty
}
}
@ -305,23 +307,6 @@ func FindMatchingVolume(
// CheckVolumeModeMismatches is a convenience method that checks volumeMode for PersistentVolume
// and PersistentVolumeClaims
func CheckVolumeModeMismatches(pvcSpec *v1.PersistentVolumeClaimSpec, pvSpec *v1.PersistentVolumeSpec) bool {
if !utilfeature.DefaultFeatureGate.Enabled(features.BlockVolume) {
if pvcSpec.VolumeMode != nil && *pvcSpec.VolumeMode == v1.PersistentVolumeBlock {
// Block PVC does not match anything when the feature is off. We explicitly want
// to prevent binding block PVC to filesystem PV.
// The PVC should be ignored by PV controller.
return true
}
if pvSpec.VolumeMode != nil && *pvSpec.VolumeMode == v1.PersistentVolumeBlock {
// Block PV does not match anything when the feature is off. We explicitly want
// to prevent binding block PV to filesystem PVC.
// The PV should be ignored by PV controller.
return true
}
// Both PV + PVC are not block.
return false
}
// In HA upgrades, we cannot guarantee that the apiserver is on a version >= controller-manager.
// So we default a nil volumeMode to filesystem
requestedVolumeMode := v1.PersistentVolumeFilesystem

View File

@ -45,7 +45,6 @@ go_test(
],
embed = [":go_default_library"],
deps = [
"//pkg/api/testapi:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/controller/volume/persistentvolume/testing:go_default_library",
"//pkg/controller/volume/persistentvolume/util:go_default_library",

View File

@ -127,7 +127,7 @@ func (c *assumeCache) objInfoIndexFunc(obj interface{}) ([]string, error) {
return c.indexFunc(objInfo.latestObj)
}
// NewAssumeCache creates an assume cache for genernal objects.
// NewAssumeCache creates an assume cache for general objects.
func NewAssumeCache(informer cache.SharedIndexInformer, description, indexName string, indexFunc cache.IndexFunc) AssumeCache {
c := &assumeCache{
description: description,

View File

@ -17,6 +17,7 @@ limitations under the License.
package scheduling
import (
"context"
"fmt"
"sort"
"strings"
@ -44,6 +45,24 @@ import (
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
// ConflictReason is used for the special strings which explain why
// volume binding is impossible for a node.
type ConflictReason string
// ConflictReasons contains all reasons that explain why volume binding is impossible for a node.
type ConflictReasons []ConflictReason
func (reasons ConflictReasons) Len() int { return len(reasons) }
func (reasons ConflictReasons) Less(i, j int) bool { return reasons[i] < reasons[j] }
func (reasons ConflictReasons) Swap(i, j int) { reasons[i], reasons[j] = reasons[j], reasons[i] }
const (
// ErrReasonBindConflict is used for VolumeBindingNoMatch predicate error.
ErrReasonBindConflict ConflictReason = "node(s) didn't find available persistent volumes to bind"
// ErrReasonNodeConflict is used for VolumeNodeAffinityConflict predicate error.
ErrReasonNodeConflict ConflictReason = "node(s) had volume node affinity conflict"
)
// InTreeToCSITranslator contains methods required to check migratable status
// and perform translations from InTree PV's to CSI
type InTreeToCSITranslator interface {
@ -82,11 +101,11 @@ type SchedulerVolumeBinder interface {
// If a PVC is bound, it checks if the PV's NodeAffinity matches the Node.
// Otherwise, it tries to find an available PV to bind to the PVC.
//
// It returns true if all of the Pod's PVCs have matching PVs or can be dynamic provisioned,
// and returns true if bound volumes satisfy the PV NodeAffinity.
// It returns an error when something went wrong or a list of reasons why the node is
// (currently) not usable for the pod.
//
// This function is called by the volume binding scheduler predicate and can be called in parallel
FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolumesSatisified, boundVolumesSatisfied bool, err error)
FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons ConflictReasons, err error)
// AssumePodVolumes will:
// 1. Take the PV matches for unbound PVCs and update the PV cache assuming
@ -112,6 +131,9 @@ type SchedulerVolumeBinder interface {
// GetBindingsCache returns the cache used (if any) to store volume binding decisions.
GetBindingsCache() PodBindingCache
// DeletePodBindings will delete pod's bindingDecisions in podBindingCache.
DeletePodBindings(pod *v1.Pod)
}
type volumeBinder struct {
@ -162,18 +184,40 @@ func (b *volumeBinder) GetBindingsCache() PodBindingCache {
return b.podBindingCache
}
// DeletePodBindings will delete pod's bindingDecisions in podBindingCache.
func (b *volumeBinder) DeletePodBindings(pod *v1.Pod) {
cache := b.podBindingCache
if pod != nil {
cache.DeleteBindings(pod)
}
}
// FindPodVolumes caches the matching PVs and PVCs to provision per node in podBindingCache.
// This method intentionally takes in a *v1.Node object instead of using volumebinder.nodeInformer.
// That's necessary because some operations will need to pass in to the predicate fake node objects.
func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolumesSatisfied, boundVolumesSatisfied bool, err error) {
func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons ConflictReasons, err error) {
podName := getPodName(pod)
// Warning: Below log needs high verbosity as it can be printed several times (#60933).
klog.V(5).Infof("FindPodVolumes for pod %q, node %q", podName, node.Name)
// Initialize to true for pods that don't have volumes
unboundVolumesSatisfied = true
boundVolumesSatisfied = true
// Initialize to true for pods that don't have volumes. These
// booleans get translated into reason strings when the function
// returns without an error.
unboundVolumesSatisfied := true
boundVolumesSatisfied := true
defer func() {
if err != nil {
return
}
if !boundVolumesSatisfied {
reasons = append(reasons, ErrReasonNodeConflict)
}
if !unboundVolumesSatisfied {
reasons = append(reasons, ErrReasonBindConflict)
}
}()
start := time.Now()
defer func() {
metrics.VolumeSchedulingStageLatency.WithLabelValues("predicate").Observe(time.Since(start).Seconds())
@ -209,19 +253,19 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
// volumes can get bound/provisioned in between calls.
boundClaims, claimsToBind, unboundClaimsImmediate, err := b.getPodVolumes(pod)
if err != nil {
return false, false, err
return nil, err
}
// Immediate claims should be bound
if len(unboundClaimsImmediate) > 0 {
return false, false, fmt.Errorf("pod has unbound immediate PersistentVolumeClaims")
return nil, fmt.Errorf("pod has unbound immediate PersistentVolumeClaims")
}
// Check PV node affinity on bound volumes
if len(boundClaims) > 0 {
boundVolumesSatisfied, err = b.checkBoundClaims(boundClaims, node, podName)
if err != nil {
return false, false, err
return nil, err
}
}
@ -236,8 +280,9 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
for _, claim := range claimsToBind {
if selectedNode, ok := claim.Annotations[pvutil.AnnSelectedNode]; ok {
if selectedNode != node.Name {
// Fast path, skip unmatched node
return false, boundVolumesSatisfied, nil
// Fast path, skip unmatched node.
unboundVolumesSatisfied = false
return
}
claimsToProvision = append(claimsToProvision, claim)
} else {
@ -250,7 +295,7 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
var unboundClaims []*v1.PersistentVolumeClaim
unboundVolumesSatisfied, matchedBindings, unboundClaims, err = b.findMatchingVolumes(pod, claimsToFindMatching, node)
if err != nil {
return false, false, err
return nil, err
}
claimsToProvision = append(claimsToProvision, unboundClaims...)
}
@ -259,12 +304,12 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolume
if len(claimsToProvision) > 0 {
unboundVolumesSatisfied, provisionedClaims, err = b.checkVolumeProvisions(pod, claimsToProvision, node)
if err != nil {
return false, false, err
return nil, err
}
}
}
return unboundVolumesSatisfied, boundVolumesSatisfied, nil
return
}
// AssumePodVolumes will take the cached matching PVs and PVCs to provision
@ -423,7 +468,7 @@ func (b *volumeBinder) bindAPIUpdate(podName string, bindings []*bindingInfo, cl
// TODO: does it hurt if we make an api call and nothing needs to be updated?
claimKey := claimToClaimKey(binding.pvc)
klog.V(2).Infof("claim %q bound to volume %q", claimKey, binding.pv.Name)
newPV, err := b.kubeClient.CoreV1().PersistentVolumes().Update(binding.pv)
newPV, err := b.kubeClient.CoreV1().PersistentVolumes().Update(context.TODO(), binding.pv, metav1.UpdateOptions{})
if err != nil {
klog.V(4).Infof("updating PersistentVolume[%s]: binding to %q failed: %v", binding.pv.Name, claimKey, err)
return err
@ -438,7 +483,7 @@ func (b *volumeBinder) bindAPIUpdate(podName string, bindings []*bindingInfo, cl
// PV controller is expect to signal back by removing related annotations if actual provisioning fails
for i, claim = range claimsToProvision {
klog.V(5).Infof("bindAPIUpdate: Pod %q, PVC %q", podName, getPVCName(claim))
newClaim, err := b.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(claim)
newClaim, err := b.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(context.TODO(), claim, metav1.UpdateOptions{})
if err != nil {
return err
}

View File

@ -20,12 +20,11 @@ import "k8s.io/api/core/v1"
// FakeVolumeBinderConfig holds configurations for fake volume binder.
type FakeVolumeBinderConfig struct {
AllBound bool
FindUnboundSatsified bool
FindBoundSatsified bool
FindErr error
AssumeErr error
BindErr error
AllBound bool
FindReasons ConflictReasons
FindErr error
AssumeErr error
BindErr error
}
// NewFakeVolumeBinder sets up all the caches needed for the scheduler to make
@ -44,8 +43,8 @@ type FakeVolumeBinder struct {
}
// FindPodVolumes implements SchedulerVolumeBinder.FindPodVolumes.
func (b *FakeVolumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (unboundVolumesSatisfied, boundVolumesSatsified bool, err error) {
return b.config.FindUnboundSatsified, b.config.FindBoundSatsified, b.config.FindErr
func (b *FakeVolumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons ConflictReasons, err error) {
return b.config.FindReasons, b.config.FindErr
}
// AssumePodVolumes implements SchedulerVolumeBinder.AssumePodVolumes.
@ -64,3 +63,6 @@ func (b *FakeVolumeBinder) BindPodVolumes(assumedPod *v1.Pod) error {
func (b *FakeVolumeBinder) GetBindingsCache() PodBindingCache {
return nil
}
// DeletePodBindings implements SchedulerVolumeBinder.DeletePodBindings.
func (b *FakeVolumeBinder) DeletePodBindings(pod *v1.Pod) {}

View File

@ -10,7 +10,6 @@ go_library(
srcs = ["kube_features.go"],
importpath = "k8s.io/kubernetes/pkg/features",
deps = [
"//staging/src/k8s.io/apiextensions-apiserver/pkg/features:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/features:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",

View File

@ -17,7 +17,6 @@ limitations under the License.
package features
import (
apiextensionsfeatures "k8s.io/apiextensions-apiserver/pkg/features"
"k8s.io/apimachinery/pkg/util/runtime"
genericfeatures "k8s.io/apiserver/pkg/features"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -62,6 +61,7 @@ const (
// owner: @Huang-Wei
// beta: v1.13
// ga: v1.18
//
// Changes the logic behind evicting Pods from not ready Nodes
// to take advantage of NoExecute Taints and Tolerations.
@ -119,22 +119,6 @@ const (
// Allows all containers in a pod to share a process namespace.
PodShareProcessNamespace featuregate.Feature = "PodShareProcessNamespace"
// owner: @bsalamat
// alpha: v1.8
// beta: v1.11
// GA: v1.14
//
// Add priority to pods. Priority affects scheduling and preemption of pods.
PodPriority featuregate.Feature = "PodPriority"
// owner: @k82cn
// beta: v1.12
// GA: v1.17
//
// Taint nodes based on their condition status for 'NetworkUnavailable',
// 'MemoryPressure', 'PIDPressure' and 'DiskPressure'.
TaintNodesByCondition featuregate.Feature = "TaintNodesByCondition"
// owner: @sjenning
// alpha: v1.11
//
@ -157,6 +141,7 @@ const (
// owner: @lmdaly
// alpha: v1.16
// beta: v1.18
//
// Enable resource managers to make NUMA aligned decisions
TopologyManager featuregate.Feature = "TopologyManager"
@ -188,6 +173,7 @@ const (
// owner: @saad-ali
// alpha: v1.12
// beta: v1.14
// GA: v1.18
// Enable all logic related to the CSIDriver API object in storage.k8s.io
CSIDriverRegistry featuregate.Feature = "CSIDriverRegistry"
@ -200,7 +186,8 @@ const (
// owner: @screeley44
// alpha: v1.9
// beta: v1.13
// beta: v1.13
// ga: v1.18
//
// Enable Block volume support in containers.
BlockVolume featuregate.Feature = "BlockVolume"
@ -236,13 +223,6 @@ const (
// Enable Hyper-V containers on Windows
HyperVContainer featuregate.Feature = "HyperVContainer"
// owner: @k82cn
// beta: v1.12
// GA: v1.17
//
// Schedule DaemonSet Pods by default scheduler instead of DaemonSet controller
ScheduleDaemonSetPods featuregate.Feature = "ScheduleDaemonSetPods"
// owner: @mikedanese
// beta: v1.12
//
@ -263,6 +243,15 @@ const (
// to the API server.
BoundServiceAccountTokenVolume featuregate.Feature = "BoundServiceAccountTokenVolume"
// owner: @mtaufen
// alpha: v1.18
//
// Enable OIDC discovery endpoints (issuer and JWKS URLs) for the service
// account issuer in the API server.
// Note these endpoints serve minimally-compliant discovery docs that are
// intended to be used for service account token verification.
ServiceAccountIssuerDiscovery featuregate.Feature = "ServiceAccountIssuerDiscovery"
// owner: @Random-Liu
// beta: v1.11
//
@ -308,16 +297,10 @@ const (
// Only applicable if the VolumeSubpath feature is also enabled
VolumeSubpathEnvExpansion featuregate.Feature = "VolumeSubpathEnvExpansion"
// owner: @vikaschoudhary16
// beta: v1.12
// ga: v1.17
//
// Enable resource quota scope selectors
ResourceQuotaScopeSelectors featuregate.Feature = "ResourceQuotaScopeSelectors"
// owner: @vladimirvivien
// alpha: v1.11
// beta: v1.14
// beta: v1.14
// ga: v1.18
//
// Enables CSI to use raw block storage volumes
CSIBlockVolume featuregate.Feature = "CSIBlockVolume"
@ -438,6 +421,12 @@ const (
// Expects Azure File CSI Driver to be installed and configured on all nodes.
CSIMigrationAzureFileComplete featuregate.Feature = "CSIMigrationAzureFileComplete"
// owner: @gnufied
// alpha: v1.18
// Allows user to configure volume permission change policy for fsGroups when mounting
// a volume in a Pod.
ConfigurableFSGroupPolicy featuregate.Feature = "ConfigurableFSGroupPolicy"
// owner: @RobertKrawitz
// beta: v1.15
//
@ -454,12 +443,14 @@ const (
// owner: @bclau
// alpha: v1.16
// beta: v1.17
// GA: v1.18
//
// Enables support for running container entrypoints as different usernames than their default ones.
WindowsRunAsUserName featuregate.Feature = "WindowsRunAsUserName"
// owner: @adisky
// alpha: v1.14
// beta: v1.18
//
// Enables the OpenStack Cinder in-tree driver to OpenStack Cinder CSI Driver migration feature.
CSIMigrationOpenStack featuregate.Feature = "CSIMigrationOpenStack"
@ -495,12 +486,14 @@ const (
// owner: @j-griffith
// alpha: v1.15
// beta: v1.16
// GA: v1.18
//
// Enable support for specifying an existing PVC as a DataSource
VolumePVCDataSource featuregate.Feature = "VolumePVCDataSource"
// owner: @egernst
// alpha: v1.16
// beta: v1.18
//
// Enables PodOverhead, for accounting pod overheads which are specific to a given RuntimeClass
PodOverhead featuregate.Feature = "PodOverhead"
@ -517,14 +510,21 @@ const (
// Enable Endpoint Slices for more scalable Service endpoints.
EndpointSlice featuregate.Feature = "EndpointSlice"
// owner: @robscott @freehan
// alpha: v1.18
//
// Enable Endpoint Slice consumption by kube-proxy for improved scalability.
EndpointSliceProxying featuregate.Feature = "EndpointSliceProxying"
// owner: @Huang-Wei
// alpha: v1.16
// beta: v1.18
//
// Schedule pods evenly across available topology domains.
EvenPodsSpread featuregate.Feature = "EvenPodsSpread"
// owner: @matthyx
// alpha: v1.16
// beta: v1.18
//
// Enables the startupProbe in kubelet worker.
StartupProbe featuregate.Feature = "StartupProbe"
@ -547,6 +547,45 @@ const (
//
// Enables topology aware service routing
ServiceTopology featuregate.Feature = "ServiceTopology"
// owner: @robscott
// alpha: v1.18
//
// Enables AppProtocol field for Services and Endpoints.
ServiceAppProtocol featuregate.Feature = "ServiceAppProtocol"
// owner: @wojtek-t
// alpha: v1.18
//
// Enables a feature to make secrets and configmaps data immutable.
ImmutableEphemeralVolumes featuregate.Feature = "ImmutableEphemeralVolumes"
// owner: @robscott
// beta: v1.18
//
// Enables DefaultIngressClass admission controller.
DefaultIngressClass featuregate.Feature = "DefaultIngressClass"
// owner: @bart0sh
// alpha: v1.18
//
// Enables usage of HugePages-<size> in a volume medium,
// e.g. emptyDir:
// medium: HugePages-1Gi
HugePageStorageMediumSize featuregate.Feature = "HugePageStorageMediumSize"
// owner: @freehan
// GA: v1.18
//
// Enable ExternalTrafficPolicy for Service ExternalIPs.
// This is for bug fix #69811
ExternalPolicyForExternalIP featuregate.Feature = "ExternalPolicyForExternalIP"
// owner: @bswartz
// alpha: v1.18
//
// Enables usage of any object for volume data source in PVCs
AnyVolumeDataSource featuregate.Feature = "AnyVolumeDataSource"
)
func init() {
@ -561,15 +600,13 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
DynamicKubeletConfig: {Default: true, PreRelease: featuregate.Beta},
ExperimentalHostUserNamespaceDefaultingGate: {Default: false, PreRelease: featuregate.Beta},
DevicePlugins: {Default: true, PreRelease: featuregate.Beta},
TaintBasedEvictions: {Default: true, PreRelease: featuregate.Beta},
TaintBasedEvictions: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.19
RotateKubeletServerCertificate: {Default: true, PreRelease: featuregate.Beta},
RotateKubeletClientCertificate: {Default: true, PreRelease: featuregate.Beta},
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta},
Sysctls: {Default: true, PreRelease: featuregate.Beta},
EphemeralContainers: {Default: false, PreRelease: featuregate.Alpha},
PodShareProcessNamespace: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.19
PodPriority: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.18
TaintNodesByCondition: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.18
QOSReserved: {Default: false, PreRelease: featuregate.Alpha},
ExpandPersistentVolumes: {Default: true, PreRelease: featuregate.Beta},
ExpandInUsePersistentVolumes: {Default: true, PreRelease: featuregate.Beta},
@ -577,22 +614,22 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
AttachVolumeLimit: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.19
CPUManager: {Default: true, PreRelease: featuregate.Beta},
CPUCFSQuotaPeriod: {Default: false, PreRelease: featuregate.Alpha},
TopologyManager: {Default: false, PreRelease: featuregate.Alpha},
TopologyManager: {Default: true, PreRelease: featuregate.Beta},
ServiceNodeExclusion: {Default: false, PreRelease: featuregate.Alpha},
NodeDisruptionExclusion: {Default: false, PreRelease: featuregate.Alpha},
CSIDriverRegistry: {Default: true, PreRelease: featuregate.Beta},
CSIDriverRegistry: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.20
CSINodeInfo: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.19
BlockVolume: {Default: true, PreRelease: featuregate.Beta},
BlockVolume: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.20
StorageObjectInUseProtection: {Default: true, PreRelease: featuregate.GA},
ResourceLimitsPriorityFunction: {Default: false, PreRelease: featuregate.Alpha},
SupportIPVSProxyMode: {Default: true, PreRelease: featuregate.GA},
SupportPodPidsLimit: {Default: true, PreRelease: featuregate.Beta},
SupportNodePidsLimit: {Default: true, PreRelease: featuregate.Beta},
HyperVContainer: {Default: false, PreRelease: featuregate.Alpha},
ScheduleDaemonSetPods: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.18
TokenRequest: {Default: true, PreRelease: featuregate.Beta},
TokenRequestProjection: {Default: true, PreRelease: featuregate.Beta},
BoundServiceAccountTokenVolume: {Default: false, PreRelease: featuregate.Alpha},
ServiceAccountIssuerDiscovery: {Default: false, PreRelease: featuregate.Alpha},
CRIContainerLogRotation: {Default: true, PreRelease: featuregate.Beta},
CSIMigration: {Default: true, PreRelease: featuregate.Beta},
CSIMigrationGCE: {Default: false, PreRelease: featuregate.Beta}, // Off by default (requires GCE PD CSI Driver)
@ -604,13 +641,13 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
CSIMigrationAzureFile: {Default: false, PreRelease: featuregate.Alpha},
CSIMigrationAzureFileComplete: {Default: false, PreRelease: featuregate.Alpha},
RunAsGroup: {Default: true, PreRelease: featuregate.Beta},
CSIMigrationOpenStack: {Default: false, PreRelease: featuregate.Alpha},
CSIMigrationOpenStack: {Default: false, PreRelease: featuregate.Beta}, // Off by default (requires OpenStack Cinder CSI driver)
CSIMigrationOpenStackComplete: {Default: false, PreRelease: featuregate.Alpha},
VolumeSubpath: {Default: true, PreRelease: featuregate.GA},
ConfigurableFSGroupPolicy: {Default: false, PreRelease: featuregate.Alpha},
BalanceAttachedNodeVolumes: {Default: false, PreRelease: featuregate.Alpha},
VolumeSubpathEnvExpansion: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.19,
ResourceQuotaScopeSelectors: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.18
CSIBlockVolume: {Default: true, PreRelease: featuregate.Beta},
CSIBlockVolume: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.20
CSIInlineVolume: {Default: true, PreRelease: featuregate.Beta},
RuntimeClass: {Default: true, PreRelease: featuregate.Beta},
NodeLease: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},
@ -619,24 +656,31 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
ProcMountType: {Default: false, PreRelease: featuregate.Alpha},
TTLAfterFinished: {Default: false, PreRelease: featuregate.Alpha},
KubeletPodResources: {Default: true, PreRelease: featuregate.Beta},
WindowsGMSA: {Default: true, PreRelease: featuregate.Beta},
WindowsRunAsUserName: {Default: true, PreRelease: featuregate.Beta},
WindowsGMSA: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.20
WindowsRunAsUserName: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.20
ServiceLoadBalancerFinalizer: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},
LocalStorageCapacityIsolationFSQuotaMonitoring: {Default: false, PreRelease: featuregate.Alpha},
NonPreemptingPriority: {Default: false, PreRelease: featuregate.Alpha},
VolumePVCDataSource: {Default: true, PreRelease: featuregate.Beta},
PodOverhead: {Default: false, PreRelease: featuregate.Alpha},
VolumePVCDataSource: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.20
PodOverhead: {Default: true, PreRelease: featuregate.Beta},
IPv6DualStack: {Default: false, PreRelease: featuregate.Alpha},
EndpointSlice: {Default: false, PreRelease: featuregate.Beta},
EvenPodsSpread: {Default: false, PreRelease: featuregate.Alpha},
StartupProbe: {Default: false, PreRelease: featuregate.Alpha},
EndpointSlice: {Default: true, PreRelease: featuregate.Beta},
EndpointSliceProxying: {Default: false, PreRelease: featuregate.Alpha},
EvenPodsSpread: {Default: true, PreRelease: featuregate.Beta},
StartupProbe: {Default: true, PreRelease: featuregate.Beta},
AllowInsecureBackendProxy: {Default: true, PreRelease: featuregate.Beta},
PodDisruptionBudget: {Default: true, PreRelease: featuregate.Beta},
ServiceTopology: {Default: false, PreRelease: featuregate.Alpha},
ServiceAppProtocol: {Default: false, PreRelease: featuregate.Alpha},
ImmutableEphemeralVolumes: {Default: false, PreRelease: featuregate.Alpha},
DefaultIngressClass: {Default: true, PreRelease: featuregate.Beta},
HugePageStorageMediumSize: {Default: false, PreRelease: featuregate.Alpha},
ExternalPolicyForExternalIP: {Default: false, PreRelease: featuregate.GA}, // remove in 1.19
AnyVolumeDataSource: {Default: false, PreRelease: featuregate.Alpha},
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:
genericfeatures.StreamingProxyRedirects: {Default: true, PreRelease: featuregate.Beta},
genericfeatures.StreamingProxyRedirects: {Default: true, PreRelease: featuregate.Deprecated},
genericfeatures.ValidateProxyRedirects: {Default: true, PreRelease: featuregate.Beta},
genericfeatures.AdvancedAuditing: {Default: true, PreRelease: featuregate.GA},
genericfeatures.DynamicAuditing: {Default: false, PreRelease: featuregate.Alpha},
@ -646,14 +690,6 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
genericfeatures.ServerSideApply: {Default: true, PreRelease: featuregate.Beta},
genericfeatures.APIPriorityAndFairness: {Default: false, PreRelease: featuregate.Alpha},
// inherited features from apiextensions-apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:
apiextensionsfeatures.CustomResourceValidation: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},
apiextensionsfeatures.CustomResourceSubresources: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},
apiextensionsfeatures.CustomResourceWebhookConversion: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},
apiextensionsfeatures.CustomResourcePublishOpenAPI: {Default: true, PreRelease: featuregate.GA, LockToDefault: true},
apiextensionsfeatures.CustomResourceDefaulting: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // TODO: remove in 1.18
// features that enable backwards compatibility but are scheduled to be removed
// ...
HPAScaleToZero: {Default: false, PreRelease: featuregate.Alpha},

View File

@ -335,8 +335,15 @@ type KubeletConfiguration struct {
// This provide a "static" CPU list rather than the "dynamic" list by system-reserved and kube-reserved.
// This option overwrites CPUs provided by system-reserved and kube-reserved.
ReservedSystemCPUs string
// The previous version for which you want to show hidden metrics.
// Only the previous minor version is meaningful, other values will not be allowed.
// The format is <major>.<minor>, e.g.: '1.16'.
// The purpose of this format is make sure you have the opportunity to notice if the next release hides additional metrics,
// rather than being surprised when they are permanently removed in the release after that.
ShowHiddenMetricsForVersion string
}
// KubeletAuthorizationMode denotes the authorization mode for the kubelet
type KubeletAuthorizationMode string
const (
@ -346,6 +353,7 @@ const (
KubeletAuthorizationModeWebhook KubeletAuthorizationMode = "Webhook"
)
// KubeletAuthorization holds the state related to the authorization in the kublet.
type KubeletAuthorization struct {
// mode is the authorization mode to apply to requests to the kubelet server.
// Valid values are AlwaysAllow and Webhook.
@ -356,6 +364,8 @@ type KubeletAuthorization struct {
Webhook KubeletWebhookAuthorization
}
// KubeletWebhookAuthorization holds the state related to the Webhook
// Authorization in the Kubelet.
type KubeletWebhookAuthorization struct {
// cacheAuthorizedTTL is the duration to cache 'authorized' responses from the webhook authorizer.
CacheAuthorizedTTL metav1.Duration
@ -363,6 +373,7 @@ type KubeletWebhookAuthorization struct {
CacheUnauthorizedTTL metav1.Duration
}
// KubeletAuthentication holds the Kubetlet Authentication setttings.
type KubeletAuthentication struct {
// x509 contains settings related to x509 client certificate authentication
X509 KubeletX509Authentication
@ -372,6 +383,7 @@ type KubeletAuthentication struct {
Anonymous KubeletAnonymousAuthentication
}
// KubeletX509Authentication contains settings related to x509 client certificate authentication
type KubeletX509Authentication struct {
// clientCAFile is the path to a PEM-encoded certificate bundle. If set, any request presenting a client certificate
// signed by one of the authorities in the bundle is authenticated with a username corresponding to the CommonName,
@ -379,6 +391,7 @@ type KubeletX509Authentication struct {
ClientCAFile string
}
// KubeletWebhookAuthentication contains settings related to webhook authentication
type KubeletWebhookAuthentication struct {
// enabled allows bearer token authentication backed by the tokenreviews.authentication.k8s.io API
Enabled bool
@ -386,6 +399,7 @@ type KubeletWebhookAuthentication struct {
CacheTTL metav1.Duration
}
// KubeletAnonymousAuthentication enables anonymous requests to the kubetlet server.
type KubeletAnonymousAuthentication struct {
// enabled allows anonymous requests to the kubelet server.
// Requests that are not rejected by another authentication method are treated as anonymous requests.

View File

@ -25,7 +25,6 @@ go_library(
"//pkg/util/hash:go_default_library",
"//pkg/volume:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",

View File

@ -25,7 +25,6 @@ import (
"k8s.io/klog"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
@ -62,6 +61,10 @@ type RuntimeHelper interface {
// ShouldContainerBeRestarted checks whether a container needs to be restarted.
// TODO(yifan): Think about how to refactor this.
func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus *PodStatus) bool {
// Once a pod has been marked deleted, it should not be restarted
if pod.DeletionTimestamp != nil {
return false
}
// Get latest container status.
status := podStatus.FindContainerStatusByName(container.Name)
// If the container was never started before, we should start it.
@ -210,12 +213,6 @@ func (irecorder *innerEventRecorder) Eventf(object runtime.Object, eventtype, re
}
func (irecorder *innerEventRecorder) PastEventf(object runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) {
if ref, ok := irecorder.shouldRecordEvent(object); ok {
irecorder.recorder.PastEventf(ref, timestamp, eventtype, reason, messageFmt, args...)
}
}
func (irecorder *innerEventRecorder) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) {
if ref, ok := irecorder.shouldRecordEvent(object); ok {
irecorder.recorder.AnnotatedEventf(ref, annotations, eventtype, reason, messageFmt, args...)

View File

@ -34,15 +34,6 @@ const (
// DockerOperationsTimeoutKey is the key for the operation timeout metrics.
DockerOperationsTimeoutKey = "docker_operations_timeout_total"
// DeprecatedDockerOperationsKey is the deprecated key for docker operation metrics.
DeprecatedDockerOperationsKey = "docker_operations"
// DeprecatedDockerOperationsLatencyKey is the deprecated key for the operation latency metrics.
DeprecatedDockerOperationsLatencyKey = "docker_operations_latency_microseconds"
// DeprecatedDockerOperationsErrorsKey is the deprecated key for the operation error metrics.
DeprecatedDockerOperationsErrorsKey = "docker_operations_errors"
// DeprecatedDockerOperationsTimeoutKey is the deprecated key for the operation timeout metrics.
DeprecatedDockerOperationsTimeoutKey = "docker_operations_timeout"
// Keep the "kubelet" subsystem for backward compatibility.
kubeletSubsystem = "kubelet"
)
@ -91,49 +82,6 @@ var (
},
[]string{"operation_type"},
)
// DeprecatedDockerOperationsLatency collects operation latency numbers by operation
// type.
DeprecatedDockerOperationsLatency = metrics.NewSummaryVec(
&metrics.SummaryOpts{
Subsystem: kubeletSubsystem,
Name: DeprecatedDockerOperationsLatencyKey,
Help: "(Deprecated) Latency in microseconds of Docker operations. Broken down by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// DeprecatedDockerOperations collects operation counts by operation type.
DeprecatedDockerOperations = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeletSubsystem,
Name: DeprecatedDockerOperationsKey,
Help: "(Deprecated) Cumulative number of Docker operations by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// DeprecatedDockerOperationsErrors collects operation errors by operation
// type.
DeprecatedDockerOperationsErrors = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeletSubsystem,
Name: DeprecatedDockerOperationsErrorsKey,
Help: "(Deprecated) Cumulative number of Docker operation errors by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// DeprecatedDockerOperationsTimeout collects operation timeouts by operation type.
DeprecatedDockerOperationsTimeout = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeletSubsystem,
Name: DeprecatedDockerOperationsTimeoutKey,
Help: "(Deprecated) Cumulative number of Docker operation timeout by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
)
var registerMetrics sync.Once
@ -145,18 +93,9 @@ func Register() {
legacyregistry.MustRegister(DockerOperations)
legacyregistry.MustRegister(DockerOperationsErrors)
legacyregistry.MustRegister(DockerOperationsTimeout)
legacyregistry.MustRegister(DeprecatedDockerOperationsLatency)
legacyregistry.MustRegister(DeprecatedDockerOperations)
legacyregistry.MustRegister(DeprecatedDockerOperationsErrors)
legacyregistry.MustRegister(DeprecatedDockerOperationsTimeout)
})
}
// SinceInMicroseconds gets the time since the specified start in microseconds.
func SinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
}
// SinceInSeconds gets the time since the specified start in seconds.
func SinceInSeconds(start time.Time) float64 {
return time.Since(start).Seconds()

View File

@ -16,8 +16,8 @@ limitations under the License.
package events
// Container event reason list
const (
// Container event reason list
CreatedContainer = "Created"
StartedContainer = "Started"
FailedToCreateContainer = "Failed"
@ -26,22 +26,28 @@ const (
PreemptContainer = "Preempting"
BackOffStartContainer = "BackOff"
ExceededGracePeriod = "ExceededGracePeriod"
)
// Pod event reason list
// Pod event reason list
const (
FailedToKillPod = "FailedKillPod"
FailedToCreatePodContainer = "FailedCreatePodContainer"
FailedToMakePodDataDirectories = "Failed"
NetworkNotReady = "NetworkNotReady"
)
// Image event reason list
// Image event reason list
const (
PullingImage = "Pulling"
PulledImage = "Pulled"
FailedToPullImage = "Failed"
FailedToInspectImage = "InspectFailed"
ErrImageNeverPullPolicy = "ErrImageNeverPull"
BackOffPullImage = "BackOff"
)
// kubelet event reason list
// kubelet event reason list
const (
NodeReady = "NodeReady"
NodeNotReady = "NodeNotReady"
NodeSchedulable = "NodeSchedulable"
@ -66,22 +72,33 @@ const (
SandboxChanged = "SandboxChanged"
FailedCreatePodSandBox = "FailedCreatePodSandBox"
FailedStatusPodSandBox = "FailedPodSandBoxStatus"
FailedMountOnFilesystemMismatch = "FailedMountOnFilesystemMismatch"
)
// Image manager event reason list
// Image manager event reason list
const (
InvalidDiskCapacity = "InvalidDiskCapacity"
FreeDiskSpaceFailed = "FreeDiskSpaceFailed"
)
// Probe event reason list
// Probe event reason list
const (
ContainerUnhealthy = "Unhealthy"
ContainerProbeWarning = "ProbeWarning"
)
// Pod worker event reason list
// Pod worker event reason list
const (
FailedSync = "FailedSync"
)
// Config event reason list
// Config event reason list
const (
FailedValidation = "FailedValidation"
)
// Lifecycle hooks
// Lifecycle hooks
const (
FailedPostStartHook = "FailedPostStartHook"
FailedPreStopHook = "FailedPreStopHook"
)

View File

@ -21,7 +21,11 @@ go_library(
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/framework/plugins/helper:go_default_library",
"//pkg/scheduler/framework/plugins/nodeaffinity:go_default_library",
"//pkg/scheduler/framework/plugins/nodename:go_default_library",
"//pkg/scheduler/framework/plugins/nodeports:go_default_library",
"//pkg/scheduler/framework/plugins/noderesources:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//pkg/security/apparmor:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
@ -40,11 +44,15 @@ go_test(
],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/scheduler/framework/plugins/nodename:go_default_library",
"//pkg/scheduler/framework/plugins/nodeports:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
],
)

View File

@ -18,7 +18,6 @@ package lifecycle
import (
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
)
// AdmissionFailureHandlerStub is an AdmissionFailureHandler that does not perform any handling of admission failure.
@ -31,6 +30,6 @@ func NewAdmissionFailureHandlerStub() *AdmissionFailureHandlerStub {
return &AdmissionFailureHandlerStub{}
}
func (n *AdmissionFailureHandlerStub) HandleAdmissionFailure(admitPod *v1.Pod, failureReasons []predicates.PredicateFailureReason) (bool, []predicates.PredicateFailureReason, error) {
return false, failureReasons, nil
func (n *AdmissionFailureHandlerStub) HandleAdmissionFailure(admitPod *v1.Pod, failureReasons []PredicateFailureReason) ([]PredicateFailureReason, error) {
return failureReasons, nil
}

View File

@ -38,7 +38,7 @@ const (
)
type HandlerRunner struct {
httpGetter kubetypes.HttpGetter
httpGetter kubetypes.HTTPGetter
commandRunner kubecontainer.ContainerCommandRunner
containerManager podStatusProvider
}
@ -47,7 +47,7 @@ type podStatusProvider interface {
GetPodStatus(uid types.UID, name, namespace string) (*kubecontainer.PodStatus, error)
}
func NewHandlerRunner(httpGetter kubetypes.HttpGetter, commandRunner kubecontainer.ContainerCommandRunner, containerManager podStatusProvider) kubecontainer.HandlerRunner {
func NewHandlerRunner(httpGetter kubetypes.HTTPGetter, commandRunner kubecontainer.ContainerCommandRunner, containerManager podStatusProvider) kubecontainer.HandlerRunner {
return &HandlerRunner{
httpGetter: httpGetter,
commandRunner: commandRunner,

View File

@ -20,11 +20,15 @@ import (
"fmt"
"k8s.io/klog"
pluginhelper "k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources"
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
@ -35,7 +39,7 @@ type pluginResourceUpdateFuncType func(*schedulernodeinfo.NodeInfo, *PodAdmitAtt
// AdmissionFailureHandler is an interface which defines how to deal with a failure to admit a pod.
// This allows for the graceful handling of pod admission failure.
type AdmissionFailureHandler interface {
HandleAdmissionFailure(admitPod *v1.Pod, failureReasons []predicates.PredicateFailureReason) (bool, []predicates.PredicateFailureReason, error)
HandleAdmissionFailure(admitPod *v1.Pod, failureReasons []PredicateFailureReason) ([]PredicateFailureReason, error)
}
type predicateAdmitHandler struct {
@ -89,7 +93,8 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult
// the Resource Class API in the future.
podWithoutMissingExtendedResources := removeMissingExtendedResources(admitPod, nodeInfo)
fit, reasons, err := predicates.GeneralPredicates(podWithoutMissingExtendedResources, nil, nodeInfo)
reasons, err := GeneralPredicates(podWithoutMissingExtendedResources, nodeInfo)
fit := len(reasons) == 0 && err == nil
if err != nil {
message := fmt.Sprintf("GeneralPredicates failed due to %v, which is unexpected.", err)
klog.Warningf("Failed to admit pod %v - %s", format.Pod(admitPod), message)
@ -100,7 +105,8 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult
}
}
if !fit {
fit, reasons, err = w.admissionFailureHandler.HandleAdmissionFailure(admitPod, reasons)
reasons, err = w.admissionFailureHandler.HandleAdmissionFailure(admitPod, reasons)
fit = len(reasons) == 0 && err == nil
if err != nil {
message := fmt.Sprintf("Unexpected error while attempting to recover from admission failure: %v", err)
klog.Warningf("Failed to admit pod %v - %s", format.Pod(admitPod), message)
@ -126,18 +132,14 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult
// If there are failed predicates, we only return the first one as a reason.
r := reasons[0]
switch re := r.(type) {
case *predicates.PredicateFailureError:
case *PredicateFailureError:
reason = re.PredicateName
message = re.Error()
klog.V(2).Infof("Predicate failed on Pod: %v, for reason: %v", format.Pod(admitPod), message)
case *predicates.InsufficientResourceError:
case *InsufficientResourceError:
reason = fmt.Sprintf("OutOf%s", re.ResourceName)
message = re.Error()
klog.V(2).Infof("Predicate failed on Pod: %v, for reason: %v", format.Pod(admitPod), message)
case *predicates.FailureReason:
reason = re.GetReason()
message = fmt.Sprintf("Failure: %s", re.GetReason())
klog.V(2).Infof("Predicate failed on Pod: %v, for reason: %v", format.Pod(admitPod), message)
default:
reason = "UnexpectedPredicateFailureType"
message = fmt.Sprintf("GeneralPredicates failed due to %v, which is unexpected.", r)
@ -172,3 +174,76 @@ func removeMissingExtendedResources(pod *v1.Pod, nodeInfo *schedulernodeinfo.Nod
}
return podCopy
}
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {
ResourceName v1.ResourceName
Requested int64
Used int64
Capacity int64
}
func (e *InsufficientResourceError) Error() string {
return fmt.Sprintf("Node didn't have enough resource: %s, requested: %d, used: %d, capacity: %d",
e.ResourceName, e.Requested, e.Used, e.Capacity)
}
// PredicateFailureReason interface represents the failure reason of a predicate.
type PredicateFailureReason interface {
GetReason() string
}
// GetReason returns the reason of the InsufficientResourceError.
func (e *InsufficientResourceError) GetReason() string {
return fmt.Sprintf("Insufficient %v", e.ResourceName)
}
// GetInsufficientAmount returns the amount of the insufficient resource of the error.
func (e *InsufficientResourceError) GetInsufficientAmount() int64 {
return e.Requested - (e.Capacity - e.Used)
}
// PredicateFailureError describes a failure error of predicate.
type PredicateFailureError struct {
PredicateName string
PredicateDesc string
}
func (e *PredicateFailureError) Error() string {
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
}
// GetReason returns the reason of the PredicateFailureError.
func (e *PredicateFailureError) GetReason() string {
return e.PredicateDesc
}
// GeneralPredicates checks a group of predicates that the kubelet cares about.
func GeneralPredicates(pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) ([]PredicateFailureReason, error) {
if nodeInfo.Node() == nil {
return nil, fmt.Errorf("node not found")
}
var reasons []PredicateFailureReason
for _, r := range noderesources.Fits(pod, nodeInfo, nil) {
reasons = append(reasons, &InsufficientResourceError{
ResourceName: r.ResourceName,
Requested: r.Requested,
Used: r.Used,
Capacity: r.Capacity,
})
}
if !pluginhelper.PodMatchesNodeSelectorAndAffinityTerms(pod, nodeInfo.Node()) {
reasons = append(reasons, &PredicateFailureError{nodeaffinity.Name, nodeaffinity.ErrReason})
}
if !nodename.Fits(pod, nodeInfo) {
reasons = append(reasons, &PredicateFailureError{nodename.Name, nodename.ErrReason})
}
if !nodeports.Fits(pod, nodeInfo) {
reasons = append(reasons, &PredicateFailureError{nodeports.Name, nodeports.ErrReason})
}
return reasons, nil
}

View File

@ -34,44 +34,33 @@ import (
// This const block defines the metric names for the kubelet metrics.
const (
KubeletSubsystem = "kubelet"
NodeNameKey = "node_name"
NodeLabelKey = "node"
PodWorkerDurationKey = "pod_worker_duration_seconds"
PodStartDurationKey = "pod_start_duration_seconds"
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
PLEGDiscardEventsKey = "pleg_discard_events"
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
EvictionsKey = "evictions"
EvictionStatsAgeKey = "eviction_stats_age_seconds"
PreemptionsKey = "preemptions"
DeprecatedPodWorkerLatencyKey = "pod_worker_latency_microseconds"
DeprecatedPodStartLatencyKey = "pod_start_latency_microseconds"
DeprecatedCgroupManagerOperationsKey = "cgroup_manager_latency_microseconds"
DeprecatedPodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
DeprecatedPLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
DeprecatedPLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
DeprecatedEvictionStatsAgeKey = "eviction_stats_age_microseconds"
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
VolumeStatsInodesKey = "volume_stats_inodes"
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
KubeletSubsystem = "kubelet"
NodeNameKey = "node_name"
NodeLabelKey = "node"
PodWorkerDurationKey = "pod_worker_duration_seconds"
PodStartDurationKey = "pod_start_duration_seconds"
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
PLEGDiscardEventsKey = "pleg_discard_events"
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
PLEGLastSeenKey = "pleg_last_seen_seconds"
EvictionsKey = "evictions"
EvictionStatsAgeKey = "eviction_stats_age_seconds"
PreemptionsKey = "preemptions"
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
VolumeStatsInodesKey = "volume_stats_inodes"
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
// Metrics keys of remote runtime operations
RuntimeOperationsKey = "runtime_operations_total"
RuntimeOperationsDurationKey = "runtime_operations_duration_seconds"
RuntimeOperationsErrorsKey = "runtime_operations_errors_total"
DeprecatedRuntimeOperationsKey = "runtime_operations"
DeprecatedRuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds"
DeprecatedRuntimeOperationsErrorsKey = "runtime_operations_errors"
RuntimeOperationsKey = "runtime_operations_total"
RuntimeOperationsDurationKey = "runtime_operations_duration_seconds"
RuntimeOperationsErrorsKey = "runtime_operations_errors_total"
// Metrics keys of device plugin operations
DevicePluginRegistrationCountKey = "device_plugin_registration_total"
DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds"
DeprecatedDevicePluginRegistrationCountKey = "device_plugin_registration_count"
DeprecatedDevicePluginAllocationLatencyKey = "device_plugin_alloc_latency_microseconds"
DevicePluginRegistrationCountKey = "device_plugin_registration_total"
DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds"
// Metric keys for node config
AssignedConfigKey = "node_config_assigned"
@ -165,17 +154,16 @@ var (
StabilityLevel: metrics.ALPHA,
},
)
// PLEGDiscardEvents is a Histogram that tracks the duration (in seconds) it takes for discarding events in the Kubelet's
// Pod Lifecycle Event Generator (PLEG).
PLEGDiscardEvents = metrics.NewCounterVec(
// PLEGDiscardEvents is a Counter that tracks the number of discarding events in the Kubelet's Pod Lifecycle Event Generator (PLEG).
PLEGDiscardEvents = metrics.NewCounter(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PLEGDiscardEventsKey,
Help: "The number of discard events in PLEG.",
StabilityLevel: metrics.ALPHA,
},
[]string{},
)
// PLEGRelistInterval is a Histogram that tracks the intervals (in seconds) between relisting in the Kubelet's
// Pod Lifecycle Event Generator (PLEG).
PLEGRelistInterval = metrics.NewHistogram(
@ -187,6 +175,16 @@ var (
StabilityLevel: metrics.ALPHA,
},
)
// PLEGLastSeen is a Gauge giving the Unix timestamp when the Kubelet's
// Pod Lifecycle Event Generator (PLEG) was last seen active.
PLEGLastSeen = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: PLEGLastSeenKey,
Help: "Timestamp in seconds when PLEG was last seen active.",
StabilityLevel: metrics.ALPHA,
},
)
// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
// Broken down by operation type.
RuntimeOperations = metrics.NewCounterVec(
@ -279,134 +277,6 @@ var (
},
[]string{"resource_name"},
)
// DeprecatedPodWorkerLatency is a Summary that tracks the latency (in microseconds) to sync a single pod.
// Broken down by operation type. This metric is deprecated.
DeprecatedPodWorkerLatency = metrics.NewSummaryVec(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedPodWorkerLatencyKey,
Help: "(Deprecated) Latency in microseconds to sync a single pod. Broken down by operation type: create, update, or sync",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// DeprecatedPodStartLatency is a Summary that tracks the latency (in microseconds) for a single pod to go from pending to running.
// This metric is deprecated.
DeprecatedPodStartLatency = metrics.NewSummary(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedPodStartLatencyKey,
Help: "(Deprecated) Latency in microseconds for a single pod to go from pending to running.",
StabilityLevel: metrics.ALPHA,
},
)
// DeprecatedCgroupManagerLatency is a Summary that tracks the latency (in microseconds) for cgroup manager operations to complete.
// Broken down by operation type. This metric is deprecated.
DeprecatedCgroupManagerLatency = metrics.NewSummaryVec(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedCgroupManagerOperationsKey,
Help: "(Deprecated) Latency in microseconds for cgroup manager operations. Broken down by method.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// DeprecatedPodWorkerStartLatency is a Summary that tracks the latency (in microseconds) from seeing a pod to starting a worker.
// This metric is deprecated.
DeprecatedPodWorkerStartLatency = metrics.NewSummary(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedPodWorkerStartLatencyKey,
Help: "(Deprecated) Latency in microseconds from seeing a pod to starting a worker.",
StabilityLevel: metrics.ALPHA,
},
)
// DeprecatedPLEGRelistLatency is a Summary that tracks the latency (in microseconds) for relisting pods in PLEG.
// This metric is deprecated.
DeprecatedPLEGRelistLatency = metrics.NewSummary(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedPLEGRelistLatencyKey,
Help: "(Deprecated) Latency in microseconds for relisting pods in PLEG.",
StabilityLevel: metrics.ALPHA,
},
)
// DeprecatedPLEGRelistInterval is a Summary that tracks the interval (in microseconds) between relistings in PLEG.
// This metric is deprecated.
DeprecatedPLEGRelistInterval = metrics.NewSummary(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedPLEGRelistIntervalKey,
Help: "(Deprecated) Interval in microseconds between relisting in PLEG.",
StabilityLevel: metrics.ALPHA,
},
)
// DeprecatedRuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
// Broken down by operation type. This metric is deprecated.
DeprecatedRuntimeOperations = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedRuntimeOperationsKey,
Help: "(Deprecated) Cumulative number of runtime operations by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// DeprecatedRuntimeOperationsLatency is a Summary that tracks the latency (in microseconds) of remote runtime operations
// to complete. Broken down by operation type. This metric is deprecated.
DeprecatedRuntimeOperationsLatency = metrics.NewSummaryVec(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedRuntimeOperationsLatencyKey,
Help: "(Deprecated) Latency in microseconds of runtime operations. Broken down by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// DeprecatedRuntimeOperationsErrors is a Counter that tracks the cumulative number of remote runtime operation errors.
// Broken down by operation type. This metric is deprecated.
DeprecatedRuntimeOperationsErrors = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedRuntimeOperationsErrorsKey,
Help: "(Deprecated) Cumulative number of runtime operation errors by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// DeprecatedEvictionStatsAge is a Summary that tracks the time (in microseconds) between when stats are collected and when a pod
// is evicted based on those stats. Broken down by eviction signal. This metric is deprecated.
DeprecatedEvictionStatsAge = metrics.NewSummaryVec(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedEvictionStatsAgeKey,
Help: "(Deprecated) Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
StabilityLevel: metrics.ALPHA,
},
[]string{"eviction_signal"},
)
// DeprecatedDevicePluginRegistrationCount is a Counter that tracks the cumulative number of device plugin registrations.
// Broken down by resource name. This metric is deprecated.
DeprecatedDevicePluginRegistrationCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedDevicePluginRegistrationCountKey,
Help: "(Deprecated) Cumulative number of device plugin registrations. Broken down by resource name.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource_name"},
)
// DeprecatedDevicePluginAllocationLatency is a Summary that tracks the latncy (in microseconds) for serving device plugin allocation requests.
// Broken down by resource name. This metric is deprecated.
DeprecatedDevicePluginAllocationLatency = metrics.NewSummaryVec(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DeprecatedDevicePluginAllocationLatencyKey,
Help: "(Deprecated) Latency in microseconds to serve a device plugin Allocation request. Broken down by resource name.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource_name"},
)
// Metrics for node config
@ -451,12 +321,12 @@ var (
},
)
// RunPodSandboxDuration is a Histogram that tracks the duration (in seconds) it takes to run Pod Sandbox operations.
// Broken down by RuntimeClass.
// Broken down by RuntimeClass.Handler.
RunPodSandboxDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: RunPodSandboxDurationKey,
Help: "Duration in seconds of the run_podsandbox operations. Broken down by RuntimeClass.",
Help: "Duration in seconds of the run_podsandbox operations. Broken down by RuntimeClass.Handler.",
// Use DefBuckets for now, will customize the buckets if necessary.
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
@ -464,12 +334,12 @@ var (
[]string{"runtime_handler"},
)
// RunPodSandboxErrors is a Counter that tracks the cumulative number of Pod Sandbox operations errors.
// Broken down by RuntimeClass.
// Broken down by RuntimeClass.Handler.
RunPodSandboxErrors = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: RunPodSandboxErrorsKey,
Help: "Cumulative number of the run_podsandbox operation errors by RuntimeClass.",
Help: "Cumulative number of the run_podsandbox operation errors by RuntimeClass.Handler.",
StabilityLevel: metrics.ALPHA,
},
[]string{"runtime_handler"},
@ -511,6 +381,7 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...metrics.S
legacyregistry.MustRegister(PLEGRelistDuration)
legacyregistry.MustRegister(PLEGDiscardEvents)
legacyregistry.MustRegister(PLEGRelistInterval)
legacyregistry.MustRegister(PLEGLastSeen)
legacyregistry.MustRegister(RuntimeOperations)
legacyregistry.MustRegister(RuntimeOperationsDuration)
legacyregistry.MustRegister(RuntimeOperationsErrors)
@ -519,20 +390,10 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...metrics.S
legacyregistry.MustRegister(Preemptions)
legacyregistry.MustRegister(DevicePluginRegistrationCount)
legacyregistry.MustRegister(DevicePluginAllocationDuration)
legacyregistry.MustRegister(DeprecatedPodWorkerLatency)
legacyregistry.MustRegister(DeprecatedPodStartLatency)
legacyregistry.MustRegister(DeprecatedCgroupManagerLatency)
legacyregistry.MustRegister(DeprecatedPodWorkerStartLatency)
legacyregistry.MustRegister(DeprecatedPLEGRelistLatency)
legacyregistry.MustRegister(DeprecatedPLEGRelistInterval)
legacyregistry.MustRegister(DeprecatedRuntimeOperations)
legacyregistry.MustRegister(DeprecatedRuntimeOperationsLatency)
legacyregistry.MustRegister(DeprecatedRuntimeOperationsErrors)
legacyregistry.MustRegister(DeprecatedEvictionStatsAge)
legacyregistry.MustRegister(DeprecatedDevicePluginRegistrationCount)
legacyregistry.MustRegister(DeprecatedDevicePluginAllocationLatency)
legacyregistry.MustRegister(RunningContainerCount)
legacyregistry.MustRegister(RunningPodCount)
legacyregistry.MustRegister(RunPodSandboxDuration)
legacyregistry.MustRegister(RunPodSandboxErrors)
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
legacyregistry.MustRegister(AssignedConfig)
legacyregistry.MustRegister(ActiveConfig)
@ -550,11 +411,6 @@ func GetGather() metrics.Gatherer {
return legacyregistry.DefaultGatherer
}
// SinceInMicroseconds gets the time since the specified start in microseconds.
func SinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
}
// SinceInSeconds gets the time since the specified start in seconds.
func SinceInSeconds(start time.Time) float64 {
return time.Since(start).Seconds()

View File

@ -10,14 +10,12 @@ go_library(
name = "go_default_library",
srcs = [
"namespace.go",
"runtime.go",
"whitelist.go",
],
importpath = "k8s.io/kubernetes/pkg/kubelet/sysctl",
deps = [
"//pkg/apis/core/validation:go_default_library",
"//pkg/apis/policy/validation:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
],
)

View File

@ -1,95 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"fmt"
"k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
)
const (
UnsupportedReason = "SysctlUnsupported"
// CRI uses semver-compatible API version, while docker does not
// (e.g., 1.24). Append the version with a ".0".
dockerMinimumAPIVersion = "1.24.0"
dockerTypeName = "docker"
)
// TODO: The admission logic in this file is runtime-dependent. It should be
// changed to be generic and CRI-compatible.
type runtimeAdmitHandler struct {
result lifecycle.PodAdmitResult
}
var _ lifecycle.PodAdmitHandler = &runtimeAdmitHandler{}
// NewRuntimeAdmitHandler returns a sysctlRuntimeAdmitHandler which checks whether
// the given runtime support sysctls.
func NewRuntimeAdmitHandler(runtime container.Runtime) (*runtimeAdmitHandler, error) {
switch runtime.Type() {
case dockerTypeName:
v, err := runtime.APIVersion()
if err != nil {
return nil, fmt.Errorf("failed to get runtime version: %v", err)
}
// only Docker API version >= 1.24 supports sysctls
c, err := v.Compare(dockerMinimumAPIVersion)
if err != nil {
return nil, fmt.Errorf("failed to compare Docker version for sysctl support: %v", err)
}
if c >= 0 {
return &runtimeAdmitHandler{
result: lifecycle.PodAdmitResult{
Admit: true,
},
}, nil
}
return &runtimeAdmitHandler{
result: lifecycle.PodAdmitResult{
Admit: false,
Reason: UnsupportedReason,
Message: "Docker API version before 1.24 does not support sysctls",
},
}, nil
default:
// Return admit for other runtimes.
return &runtimeAdmitHandler{
result: lifecycle.PodAdmitResult{
Admit: true,
},
}, nil
}
}
// Admit checks whether the runtime supports sysctls.
func (w *runtimeAdmitHandler) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
if attrs.Pod.Spec.SecurityContext != nil {
if len(attrs.Pod.Spec.SecurityContext.Sysctls) > 0 {
return w.result
}
}
return lifecycle.PodAdmitResult{
Admit: true,
}
}

View File

@ -26,7 +26,9 @@ import (
// TODO: Reconcile custom types in kubelet/types and this subpackage
type HttpGetter interface {
// HTTPGetter is an interface representing the ability to perform HTTP GET requests.
type HTTPGetter interface {
// Get issues a GET to the specified URL.
Get(url string) (*http.Response, error)
}

View File

@ -1,41 +0,0 @@
package(default_visibility = ["//visibility:public"])
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"scheduler_interface.go",
"types.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm",
deps = [
"//pkg/apis/apps:go_default_library",
"//pkg/apis/core:go_default_library",
"//pkg/scheduler/apis/extender/v1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/apps/v1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/client-go/listers/apps/v1:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/scheduler/algorithm/predicates:all-srcs",
"//pkg/scheduler/algorithm/priorities:all-srcs",
],
tags = ["automanaged"],
)

View File

@ -1,19 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package algorithm contains a generic Scheduler interface and several
// implementations.
package algorithm // import "k8s.io/kubernetes/pkg/scheduler/algorithm"

View File

@ -1,90 +0,0 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"csi_volume_predicate.go",
"error.go",
"metadata.go",
"predicates.go",
"utils.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates",
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/listers:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//pkg/volume/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/storage/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/rand:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/listers/storage/v1:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//staging/src/k8s.io/cloud-provider/volume/helpers:go_default_library",
"//staging/src/k8s.io/csi-translation-lib:go_default_library",
"//staging/src/k8s.io/csi-translation-lib/plugins:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"csi_volume_predicate_test.go",
"max_attachable_volume_predicate_test.go",
"metadata_test.go",
"predicates_test.go",
"utils_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/listers/fake:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//pkg/scheduler/nodeinfo/snapshot:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/volume/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/api/storage/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//staging/src/k8s.io/csi-translation-lib/plugins:go_default_library",
"//vendor/k8s.io/utils/pointer:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -1,285 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/util/rand"
corelisters "k8s.io/client-go/listers/core/v1"
storagelisters "k8s.io/client-go/listers/storage/v1"
csitrans "k8s.io/csi-translation-lib"
"k8s.io/klog"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
// InTreeToCSITranslator contains methods required to check migratable status
// and perform translations from InTree PV's to CSI
type InTreeToCSITranslator interface {
IsPVMigratable(pv *v1.PersistentVolume) bool
IsMigratableIntreePluginByName(inTreePluginName string) bool
GetInTreePluginNameFromSpec(pv *v1.PersistentVolume, vol *v1.Volume) (string, error)
GetCSINameFromInTreeName(pluginName string) (string, error)
TranslateInTreePVToCSI(pv *v1.PersistentVolume) (*v1.PersistentVolume, error)
}
// CSIMaxVolumeLimitChecker defines predicate needed for counting CSI volumes
type CSIMaxVolumeLimitChecker struct {
csiNodeLister storagelisters.CSINodeLister
pvLister corelisters.PersistentVolumeLister
pvcLister corelisters.PersistentVolumeClaimLister
scLister storagelisters.StorageClassLister
randomVolumeIDPrefix string
translator InTreeToCSITranslator
}
// NewCSIMaxVolumeLimitPredicate returns a predicate for counting CSI volumes
func NewCSIMaxVolumeLimitPredicate(
csiNodeLister storagelisters.CSINodeLister, pvLister corelisters.PersistentVolumeLister, pvcLister corelisters.PersistentVolumeClaimLister, scLister storagelisters.StorageClassLister) FitPredicate {
c := &CSIMaxVolumeLimitChecker{
csiNodeLister: csiNodeLister,
pvLister: pvLister,
pvcLister: pvcLister,
scLister: scLister,
randomVolumeIDPrefix: rand.String(32),
translator: csitrans.New(),
}
return c.attachableLimitPredicate
}
func getVolumeLimits(nodeInfo *schedulernodeinfo.NodeInfo, csiNode *storagev1.CSINode) map[v1.ResourceName]int64 {
// TODO: stop getting values from Node object in v1.18
nodeVolumeLimits := nodeInfo.VolumeLimits()
if csiNode != nil {
for i := range csiNode.Spec.Drivers {
d := csiNode.Spec.Drivers[i]
if d.Allocatable != nil && d.Allocatable.Count != nil {
// TODO: drop GetCSIAttachLimitKey once we don't get values from Node object (v1.18)
k := v1.ResourceName(volumeutil.GetCSIAttachLimitKey(d.Name))
nodeVolumeLimits[k] = int64(*d.Allocatable.Count)
}
}
}
return nodeVolumeLimits
}
func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
// If the new pod doesn't have any volume attached to it, the predicate will always be true
if len(pod.Spec.Volumes) == 0 {
return true, nil, nil
}
node := nodeInfo.Node()
if node == nil {
return false, nil, fmt.Errorf("node not found")
}
// If CSINode doesn't exist, the predicate may read the limits from Node object
csiNode, err := c.csiNodeLister.Get(node.Name)
if err != nil {
// TODO: return the error once CSINode is created by default (2 releases)
klog.V(5).Infof("Could not get a CSINode object for the node: %v", err)
}
newVolumes := make(map[string]string)
if err := c.filterAttachableVolumes(csiNode, pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
return false, nil, err
}
// If the pod doesn't have any new CSI volumes, the predicate will always be true
if len(newVolumes) == 0 {
return true, nil, nil
}
// If the node doesn't have volume limits, the predicate will always be true
nodeVolumeLimits := getVolumeLimits(nodeInfo, csiNode)
if len(nodeVolumeLimits) == 0 {
return true, nil, nil
}
attachedVolumes := make(map[string]string)
for _, existingPod := range nodeInfo.Pods() {
if err := c.filterAttachableVolumes(csiNode, existingPod.Spec.Volumes, existingPod.Namespace, attachedVolumes); err != nil {
return false, nil, err
}
}
attachedVolumeCount := map[string]int{}
for volumeUniqueName, volumeLimitKey := range attachedVolumes {
if _, ok := newVolumes[volumeUniqueName]; ok {
// Don't count single volume used in multiple pods more than once
delete(newVolumes, volumeUniqueName)
}
attachedVolumeCount[volumeLimitKey]++
}
newVolumeCount := map[string]int{}
for _, volumeLimitKey := range newVolumes {
newVolumeCount[volumeLimitKey]++
}
for volumeLimitKey, count := range newVolumeCount {
maxVolumeLimit, ok := nodeVolumeLimits[v1.ResourceName(volumeLimitKey)]
if ok {
currentVolumeCount := attachedVolumeCount[volumeLimitKey]
if currentVolumeCount+count > int(maxVolumeLimit) {
return false, []PredicateFailureReason{ErrMaxVolumeCountExceeded}, nil
}
}
}
return true, nil, nil
}
func (c *CSIMaxVolumeLimitChecker) filterAttachableVolumes(
csiNode *storagev1.CSINode, volumes []v1.Volume, namespace string, result map[string]string) error {
for _, vol := range volumes {
// CSI volumes can only be used as persistent volumes
if vol.PersistentVolumeClaim == nil {
continue
}
pvcName := vol.PersistentVolumeClaim.ClaimName
if pvcName == "" {
return fmt.Errorf("PersistentVolumeClaim had no name")
}
pvc, err := c.pvcLister.PersistentVolumeClaims(namespace).Get(pvcName)
if err != nil {
klog.V(5).Infof("Unable to look up PVC info for %s/%s", namespace, pvcName)
continue
}
driverName, volumeHandle := c.getCSIDriverInfo(csiNode, pvc)
if driverName == "" || volumeHandle == "" {
klog.V(5).Infof("Could not find a CSI driver name or volume handle, not counting volume")
continue
}
volumeUniqueName := fmt.Sprintf("%s/%s", driverName, volumeHandle)
volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
result[volumeUniqueName] = volumeLimitKey
}
return nil
}
// getCSIDriverInfo returns the CSI driver name and volume ID of a given PVC.
// If the PVC is from a migrated in-tree plugin, this function will return
// the information of the CSI driver that the plugin has been migrated to.
func (c *CSIMaxVolumeLimitChecker) getCSIDriverInfo(csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
pvName := pvc.Spec.VolumeName
namespace := pvc.Namespace
pvcName := pvc.Name
if pvName == "" {
klog.V(5).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName)
return c.getCSIDriverInfoFromSC(csiNode, pvc)
}
pv, err := c.pvLister.Get(pvName)
if err != nil {
klog.V(5).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
// If we can't fetch PV associated with PVC, may be it got deleted
// or PVC was prebound to a PVC that hasn't been created yet.
// fallback to using StorageClass for volume counting
return c.getCSIDriverInfoFromSC(csiNode, pvc)
}
csiSource := pv.Spec.PersistentVolumeSource.CSI
if csiSource == nil {
// We make a fast path for non-CSI volumes that aren't migratable
if !c.translator.IsPVMigratable(pv) {
return "", ""
}
pluginName, err := c.translator.GetInTreePluginNameFromSpec(pv, nil)
if err != nil {
klog.V(5).Infof("Unable to look up plugin name from PV spec: %v", err)
return "", ""
}
if !isCSIMigrationOn(csiNode, pluginName) {
klog.V(5).Infof("CSI Migration of plugin %s is not enabled", pluginName)
return "", ""
}
csiPV, err := c.translator.TranslateInTreePVToCSI(pv)
if err != nil {
klog.V(5).Infof("Unable to translate in-tree volume to CSI: %v", err)
return "", ""
}
if csiPV.Spec.PersistentVolumeSource.CSI == nil {
klog.V(5).Infof("Unable to get a valid volume source for translated PV %s", pvName)
return "", ""
}
csiSource = csiPV.Spec.PersistentVolumeSource.CSI
}
return csiSource.Driver, csiSource.VolumeHandle
}
// getCSIDriverInfoFromSC returns the CSI driver name and a random volume ID of a given PVC's StorageClass.
func (c *CSIMaxVolumeLimitChecker) getCSIDriverInfoFromSC(csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
namespace := pvc.Namespace
pvcName := pvc.Name
scName := v1helper.GetPersistentVolumeClaimClass(pvc)
// If StorageClass is not set or not found, then PVC must be using immediate binding mode
// and hence it must be bound before scheduling. So it is safe to not count it.
if scName == "" {
klog.V(5).Infof("PVC %s/%s has no StorageClass", namespace, pvcName)
return "", ""
}
storageClass, err := c.scLister.Get(scName)
if err != nil {
klog.V(5).Infof("Could not get StorageClass for PVC %s/%s: %v", namespace, pvcName, err)
return "", ""
}
// We use random prefix to avoid conflict with volume IDs. If PVC is bound during the execution of the
// predicate and there is another pod on the same node that uses same volume, then we will overcount
// the volume and consider both volumes as different.
volumeHandle := fmt.Sprintf("%s-%s/%s", c.randomVolumeIDPrefix, namespace, pvcName)
provisioner := storageClass.Provisioner
if c.translator.IsMigratableIntreePluginByName(provisioner) {
if !isCSIMigrationOn(csiNode, provisioner) {
klog.V(5).Infof("CSI Migration of plugin %s is not enabled", provisioner)
return "", ""
}
driverName, err := c.translator.GetCSINameFromInTreeName(provisioner)
if err != nil {
klog.V(5).Infof("Unable to look up driver name from plugin name: %v", err)
return "", ""
}
return driverName, volumeHandle
}
return provisioner, volumeHandle
}

View File

@ -1,189 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/api/core/v1"
)
var (
// The predicateName tries to be consistent as the predicate name used in DefaultAlgorithmProvider defined in
// defaults.go (which tend to be stable for backward compatibility)
// NOTE: If you add a new predicate failure error for a predicate that can never
// be made to pass by removing pods, or you change an existing predicate so that
// it can never be made to pass by removing pods, you need to add the predicate
// failure error in nodesWherePreemptionMightHelp() in scheduler/core/generic_scheduler.go
// ErrDiskConflict is used for NoDiskConflict predicate error.
ErrDiskConflict = newPredicateFailureError("NoDiskConflict", "node(s) had no available disk")
// ErrVolumeZoneConflict is used for NoVolumeZoneConflict predicate error.
ErrVolumeZoneConflict = newPredicateFailureError("NoVolumeZoneConflict", "node(s) had no available volume zone")
// ErrNodeSelectorNotMatch is used for MatchNodeSelector predicate error.
ErrNodeSelectorNotMatch = newPredicateFailureError("MatchNodeSelector", "node(s) didn't match node selector")
// ErrPodAffinityNotMatch is used for MatchInterPodAffinity predicate error.
ErrPodAffinityNotMatch = newPredicateFailureError("MatchInterPodAffinity", "node(s) didn't match pod affinity/anti-affinity")
// ErrPodAffinityRulesNotMatch is used for PodAffinityRulesNotMatch predicate error.
ErrPodAffinityRulesNotMatch = newPredicateFailureError("PodAffinityRulesNotMatch", "node(s) didn't match pod affinity rules")
// ErrPodAntiAffinityRulesNotMatch is used for PodAntiAffinityRulesNotMatch predicate error.
ErrPodAntiAffinityRulesNotMatch = newPredicateFailureError("PodAntiAffinityRulesNotMatch", "node(s) didn't match pod anti-affinity rules")
// ErrExistingPodsAntiAffinityRulesNotMatch is used for ExistingPodsAntiAffinityRulesNotMatch predicate error.
ErrExistingPodsAntiAffinityRulesNotMatch = newPredicateFailureError("ExistingPodsAntiAffinityRulesNotMatch", "node(s) didn't satisfy existing pods anti-affinity rules")
// ErrTaintsTolerationsNotMatch is used for PodToleratesNodeTaints predicate error.
ErrTaintsTolerationsNotMatch = newPredicateFailureError("PodToleratesNodeTaints", "node(s) had taints that the pod didn't tolerate")
// ErrPodNotMatchHostName is used for HostName predicate error.
ErrPodNotMatchHostName = newPredicateFailureError("HostName", "node(s) didn't match the requested hostname")
// ErrPodNotFitsHostPorts is used for PodFitsHostPorts predicate error.
ErrPodNotFitsHostPorts = newPredicateFailureError("PodFitsHostPorts", "node(s) didn't have free ports for the requested pod ports")
// ErrNodeLabelPresenceViolated is used for CheckNodeLabelPresence predicate error.
ErrNodeLabelPresenceViolated = newPredicateFailureError("CheckNodeLabelPresence", "node(s) didn't have the requested labels")
// ErrServiceAffinityViolated is used for CheckServiceAffinity predicate error.
ErrServiceAffinityViolated = newPredicateFailureError("CheckServiceAffinity", "node(s) didn't match service affinity")
// ErrMaxVolumeCountExceeded is used for MaxVolumeCount predicate error.
ErrMaxVolumeCountExceeded = newPredicateFailureError("MaxVolumeCount", "node(s) exceed max volume count")
// ErrNodeUnderMemoryPressure is used for NodeUnderMemoryPressure predicate error.
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure", "node(s) had memory pressure")
// ErrNodeUnderDiskPressure is used for NodeUnderDiskPressure predicate error.
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure", "node(s) had disk pressure")
// ErrNodeUnderPIDPressure is used for NodeUnderPIDPressure predicate error.
ErrNodeUnderPIDPressure = newPredicateFailureError("NodeUnderPIDPressure", "node(s) had pid pressure")
// ErrNodeNotReady is used for NodeNotReady predicate error.
ErrNodeNotReady = newPredicateFailureError("NodeNotReady", "node(s) were not ready")
// ErrNodeNetworkUnavailable is used for NodeNetworkUnavailable predicate error.
ErrNodeNetworkUnavailable = newPredicateFailureError("NodeNetworkUnavailable", "node(s) had unavailable network")
// ErrNodeUnschedulable is used for NodeUnschedulable predicate error.
ErrNodeUnschedulable = newPredicateFailureError("NodeUnschedulable", "node(s) were unschedulable")
// ErrNodeUnknownCondition is used for NodeUnknownCondition predicate error.
ErrNodeUnknownCondition = newPredicateFailureError("NodeUnknownCondition", "node(s) had unknown conditions")
// ErrVolumeNodeConflict is used for VolumeNodeAffinityConflict predicate error.
ErrVolumeNodeConflict = newPredicateFailureError("VolumeNodeAffinityConflict", "node(s) had volume node affinity conflict")
// ErrVolumeBindConflict is used for VolumeBindingNoMatch predicate error.
ErrVolumeBindConflict = newPredicateFailureError("VolumeBindingNoMatch", "node(s) didn't find available persistent volumes to bind")
// ErrTopologySpreadConstraintsNotMatch is used for EvenPodsSpread predicate error.
ErrTopologySpreadConstraintsNotMatch = newPredicateFailureError("EvenPodsSpreadNotMatch", "node(s) didn't match pod topology spread constraints")
// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
// as ErrFakePredicate.
ErrFakePredicate = newPredicateFailureError("FakePredicateError", "Nodes failed the fake predicate")
)
var unresolvablePredicateFailureErrors = map[PredicateFailureReason]struct{}{
ErrNodeSelectorNotMatch: {},
ErrPodAffinityRulesNotMatch: {},
ErrPodNotMatchHostName: {},
ErrTaintsTolerationsNotMatch: {},
ErrNodeLabelPresenceViolated: {},
// Node conditions won't change when scheduler simulates removal of preemption victims.
// So, it is pointless to try nodes that have not been able to host the pod due to node
// conditions. These include ErrNodeNotReady, ErrNodeUnderPIDPressure, ErrNodeUnderMemoryPressure, ....
ErrNodeNotReady: {},
ErrNodeNetworkUnavailable: {},
ErrNodeUnderDiskPressure: {},
ErrNodeUnderPIDPressure: {},
ErrNodeUnderMemoryPressure: {},
ErrNodeUnschedulable: {},
ErrNodeUnknownCondition: {},
ErrVolumeZoneConflict: {},
ErrVolumeNodeConflict: {},
ErrVolumeBindConflict: {},
}
// UnresolvablePredicateExists checks if there is at least one unresolvable predicate failure reason, if true
// returns the first one in the list.
func UnresolvablePredicateExists(reasons []PredicateFailureReason) PredicateFailureReason {
for _, r := range reasons {
if _, ok := unresolvablePredicateFailureErrors[r]; ok {
return r
}
}
return nil
}
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {
// resourceName is the name of the resource that is insufficient
ResourceName v1.ResourceName
requested int64
used int64
capacity int64
}
// NewInsufficientResourceError returns an InsufficientResourceError.
func NewInsufficientResourceError(resourceName v1.ResourceName, requested, used, capacity int64) *InsufficientResourceError {
return &InsufficientResourceError{
ResourceName: resourceName,
requested: requested,
used: used,
capacity: capacity,
}
}
func (e *InsufficientResourceError) Error() string {
return fmt.Sprintf("Node didn't have enough resource: %s, requested: %d, used: %d, capacity: %d",
e.ResourceName, e.requested, e.used, e.capacity)
}
// GetReason returns the reason of the InsufficientResourceError.
func (e *InsufficientResourceError) GetReason() string {
return fmt.Sprintf("Insufficient %v", e.ResourceName)
}
// GetInsufficientAmount returns the amount of the insufficient resource of the error.
func (e *InsufficientResourceError) GetInsufficientAmount() int64 {
return e.requested - (e.capacity - e.used)
}
// PredicateFailureError describes a failure error of predicate.
type PredicateFailureError struct {
PredicateName string
PredicateDesc string
}
func newPredicateFailureError(predicateName, predicateDesc string) *PredicateFailureError {
return &PredicateFailureError{PredicateName: predicateName, PredicateDesc: predicateDesc}
}
func (e *PredicateFailureError) Error() string {
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
}
// GetReason returns the reason of the PredicateFailureError.
func (e *PredicateFailureError) GetReason() string {
return e.PredicateDesc
}
// PredicateFailureReason interface represents the failure reason of a predicate.
type PredicateFailureReason interface {
GetReason() string
}
// FailureReason describes a failure reason.
type FailureReason struct {
reason string
}
// NewFailureReason creates a FailureReason with message.
func NewFailureReason(msg string) *FailureReason {
return &FailureReason{reason: msg}
}
// GetReason returns the reason of the FailureReason.
func (e *FailureReason) GetReason() string {
return e.reason
}

View File

@ -1,889 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"context"
"fmt"
"math"
"sync"
"k8s.io/klog"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/workqueue"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
// Metadata interface represents anything that can access a predicate metadata.
type Metadata interface {
ShallowCopy() Metadata
AddPod(addedPod *v1.Pod, node *v1.Node) error
RemovePod(deletedPod *v1.Pod, node *v1.Node) error
}
// MetadataProducer is a function that computes predicate metadata for a given pod.
type MetadataProducer func(pod *v1.Pod, sharedLister schedulerlisters.SharedLister) Metadata
// AntiAffinityTerm's topology key value used in predicate metadata
type topologyPair struct {
key string
value string
}
type podSet map[*v1.Pod]struct{}
type topologyPairSet map[topologyPair]struct{}
// topologyPairsMaps keeps topologyPairToAntiAffinityPods and antiAffinityPodToTopologyPairs in sync
// as they are the inverse of each others.
type topologyPairsMaps struct {
topologyPairToPods map[topologyPair]podSet
podToTopologyPairs map[string]topologyPairSet
}
type criticalPath struct {
// topologyValue denotes the topology value mapping to topology key.
topologyValue string
// matchNum denotes the number of matching pods.
matchNum int32
}
// CAVEAT: the reason that `[2]criticalPath` can work is based on the implementation of current
// preemption algorithm, in particular the following 2 facts:
// Fact 1: we only preempt pods on the same node, instead of pods on multiple nodes.
// Fact 2: each node is evaluated on a separate copy of the metadata during its preemption cycle.
// If we plan to turn to a more complex algorithm like "arbitrary pods on multiple nodes", this
// structure needs to be revisited.
type criticalPaths [2]criticalPath
func newCriticalPaths() *criticalPaths {
return &criticalPaths{{matchNum: math.MaxInt32}, {matchNum: math.MaxInt32}}
}
func (paths *criticalPaths) update(tpVal string, num int32) {
// first verify if `tpVal` exists or not
i := -1
if tpVal == paths[0].topologyValue {
i = 0
} else if tpVal == paths[1].topologyValue {
i = 1
}
if i >= 0 {
// `tpVal` exists
paths[i].matchNum = num
if paths[0].matchNum > paths[1].matchNum {
// swap paths[0] and paths[1]
paths[0], paths[1] = paths[1], paths[0]
}
} else {
// `tpVal` doesn't exist
if num < paths[0].matchNum {
// update paths[1] with paths[0]
paths[1] = paths[0]
// update paths[0]
paths[0].topologyValue, paths[0].matchNum = tpVal, num
} else if num < paths[1].matchNum {
// update paths[1]
paths[1].topologyValue, paths[1].matchNum = tpVal, num
}
}
}
// evenPodsSpreadMetadata combines tpKeyToCriticalPaths and tpPairToMatchNum
// to represent:
// (1) critical paths where the least pods are matched on each spread constraint.
// (2) number of pods matched on each spread constraint.
type evenPodsSpreadMetadata struct {
constraints []topologySpreadConstraint
// We record 2 critical paths instead of all critical paths here.
// criticalPaths[0].matchNum always holds the minimum matching number.
// criticalPaths[1].matchNum is always greater or equal to criticalPaths[0].matchNum, but
// it's not guaranteed to be the 2nd minimum match number.
tpKeyToCriticalPaths map[string]*criticalPaths
// tpPairToMatchNum is keyed with topologyPair, and valued with the number of matching pods.
tpPairToMatchNum map[topologyPair]int32
}
// topologySpreadConstraint is an internal version for a hard (DoNotSchedule
// unsatisfiable constraint action) v1.TopologySpreadConstraint and where the
// selector is parsed.
type topologySpreadConstraint struct {
maxSkew int32
topologyKey string
selector labels.Selector
}
type serviceAffinityMetadata struct {
matchingPodList []*v1.Pod
matchingPodServices []*v1.Service
}
func (m *serviceAffinityMetadata) addPod(addedPod *v1.Pod, pod *v1.Pod, node *v1.Node) {
// If addedPod is in the same namespace as the pod, update the list
// of matching pods if applicable.
if m == nil || addedPod.Namespace != pod.Namespace {
return
}
selector := CreateSelectorFromLabels(pod.Labels)
if selector.Matches(labels.Set(addedPod.Labels)) {
m.matchingPodList = append(m.matchingPodList, addedPod)
}
}
func (m *serviceAffinityMetadata) removePod(deletedPod *v1.Pod, node *v1.Node) {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
if m == nil ||
len(m.matchingPodList) == 0 ||
deletedPod.Namespace != m.matchingPodList[0].Namespace {
return
}
for i, pod := range m.matchingPodList {
if schedutil.GetPodFullName(pod) == deletedPodFullName {
m.matchingPodList = append(m.matchingPodList[:i], m.matchingPodList[i+1:]...)
break
}
}
}
func (m *serviceAffinityMetadata) clone() *serviceAffinityMetadata {
if m == nil {
return nil
}
copy := serviceAffinityMetadata{}
copy.matchingPodServices = append([]*v1.Service(nil),
m.matchingPodServices...)
copy.matchingPodList = append([]*v1.Pod(nil),
m.matchingPodList...)
return &copy
}
type podAffinityMetadata struct {
topologyPairsAntiAffinityPodsMap *topologyPairsMaps
// A map of topology pairs to a list of Pods that can potentially match
// the affinity terms of the "pod" and its inverse.
topologyPairsPotentialAffinityPods *topologyPairsMaps
// A map of topology pairs to a list of Pods that can potentially match
// the anti-affinity terms of the "pod" and its inverse.
topologyPairsPotentialAntiAffinityPods *topologyPairsMaps
}
func (m *podAffinityMetadata) addPod(addedPod *v1.Pod, pod *v1.Pod, node *v1.Node) error {
// Add matching anti-affinity terms of the addedPod to the map.
topologyPairsMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, addedPod, node)
if err != nil {
return err
}
m.topologyPairsAntiAffinityPodsMap.appendMaps(topologyPairsMaps)
// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
affinity := pod.Spec.Affinity
podNodeName := addedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
// It is assumed that when the added pod matches affinity of the pod, all the terms must match,
// this should be changed when the implementation of targetPodMatchesAffinityOfPod/podMatchesAffinityTermProperties
// is changed
if targetPodMatchesAffinityOfPod(pod, addedPod) {
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
for _, term := range affinityTerms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
m.topologyPairsPotentialAffinityPods.addTopologyPair(pair, addedPod)
}
}
}
if targetPodMatchesAntiAffinityOfPod(pod, addedPod) {
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
for _, term := range antiAffinityTerms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
m.topologyPairsPotentialAntiAffinityPods.addTopologyPair(pair, addedPod)
}
}
}
}
return nil
}
func (m *podAffinityMetadata) removePod(deletedPod *v1.Pod) {
if m == nil {
return
}
m.topologyPairsAntiAffinityPodsMap.removePod(deletedPod)
// Delete pod from the matching affinity or anti-affinity topology pairs maps.
m.topologyPairsPotentialAffinityPods.removePod(deletedPod)
m.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod)
}
func (m *podAffinityMetadata) clone() *podAffinityMetadata {
if m == nil {
return nil
}
copy := podAffinityMetadata{}
copy.topologyPairsPotentialAffinityPods = m.topologyPairsPotentialAffinityPods.clone()
copy.topologyPairsPotentialAntiAffinityPods = m.topologyPairsPotentialAntiAffinityPods.clone()
copy.topologyPairsAntiAffinityPodsMap = m.topologyPairsAntiAffinityPodsMap.clone()
return &copy
}
type podFitsResourcesMetadata struct {
// ignoredExtendedResources is a set of extended resource names that will
// be ignored in the PodFitsResources predicate.
//
// They can be scheduler extender managed resources, the consumption of
// which should be accounted only by the extenders. This set is synthesized
// from scheduler extender configuration and does not change per pod.
ignoredExtendedResources sets.String
podRequest *schedulernodeinfo.Resource
}
func (m *podFitsResourcesMetadata) clone() *podFitsResourcesMetadata {
if m == nil {
return nil
}
copy := podFitsResourcesMetadata{}
copy.ignoredExtendedResources = m.ignoredExtendedResources
copy.podRequest = m.podRequest
return &copy
}
type podFitsHostPortsMetadata struct {
podPorts []*v1.ContainerPort
}
func (m *podFitsHostPortsMetadata) clone() *podFitsHostPortsMetadata {
if m == nil {
return nil
}
copy := podFitsHostPortsMetadata{}
copy.podPorts = append([]*v1.ContainerPort(nil), m.podPorts...)
return &copy
}
// NOTE: When new fields are added/removed or logic is changed, please make sure that
// RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes.
type predicateMetadata struct {
pod *v1.Pod
podBestEffort bool
// evenPodsSpreadMetadata holds info of the minimum match number on each topology spread constraint,
// and the match number of all valid topology pairs.
evenPodsSpreadMetadata *evenPodsSpreadMetadata
serviceAffinityMetadata *serviceAffinityMetadata
podAffinityMetadata *podAffinityMetadata
podFitsResourcesMetadata *podFitsResourcesMetadata
podFitsHostPortsMetadata *podFitsHostPortsMetadata
}
// Ensure that predicateMetadata implements algorithm.Metadata.
var _ Metadata = &predicateMetadata{}
// predicateMetadataProducer function produces predicate metadata. It is stored in a global variable below
// and used to modify the return values of MetadataProducer
type predicateMetadataProducer func(pm *predicateMetadata)
var predicateMetadataProducers = make(map[string]predicateMetadataProducer)
// RegisterPredicateMetadataProducer registers a MetadataProducer.
func RegisterPredicateMetadataProducer(predicateName string, precomp predicateMetadataProducer) {
predicateMetadataProducers[predicateName] = precomp
}
// EmptyMetadataProducer returns a no-op MetadataProducer type.
func EmptyMetadataProducer(pod *v1.Pod, sharedLister schedulerlisters.SharedLister) Metadata {
return nil
}
// RegisterPredicateMetadataProducerWithExtendedResourceOptions registers a
// MetadataProducer that creates predicate metadata with the provided
// options for extended resources.
//
// See the comments in "predicateMetadata" for the explanation of the options.
func RegisterPredicateMetadataProducerWithExtendedResourceOptions(ignoredExtendedResources sets.String) {
RegisterPredicateMetadataProducer("PredicateWithExtendedResourceOptions", func(pm *predicateMetadata) {
pm.podFitsResourcesMetadata.ignoredExtendedResources = ignoredExtendedResources
})
}
// MetadataProducerFactory is a factory to produce Metadata.
type MetadataProducerFactory struct{}
// GetPredicateMetadata returns the predicateMetadata which will be used by various predicates.
func (f *MetadataProducerFactory) GetPredicateMetadata(pod *v1.Pod, sharedLister schedulerlisters.SharedLister) Metadata {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
var allNodes []*schedulernodeinfo.NodeInfo
var havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo
if sharedLister != nil {
var err error
allNodes, err = sharedLister.NodeInfos().List()
if err != nil {
klog.Errorf("failed to list NodeInfos: %v", err)
return nil
}
havePodsWithAffinityNodes, err = sharedLister.NodeInfos().HavePodsWithAffinityList()
if err != nil {
klog.Errorf("failed to list NodeInfos: %v", err)
return nil
}
}
// evenPodsSpreadMetadata represents how existing pods match "pod"
// on its spread constraints
evenPodsSpreadMetadata, err := getEvenPodsSpreadMetadata(pod, allNodes)
if err != nil {
klog.Errorf("Error calculating spreadConstraintsMap: %v", err)
return nil
}
podAffinityMetadata, err := getPodAffinityMetadata(pod, allNodes, havePodsWithAffinityNodes)
if err != nil {
klog.Errorf("Error calculating podAffinityMetadata: %v", err)
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
evenPodsSpreadMetadata: evenPodsSpreadMetadata,
podAffinityMetadata: podAffinityMetadata,
podFitsResourcesMetadata: getPodFitsResourcesMetedata(pod),
podFitsHostPortsMetadata: getPodFitsHostPortsMetadata(pod),
}
for predicateName, precomputeFunc := range predicateMetadataProducers {
klog.V(10).Infof("Precompute: %v", predicateName)
precomputeFunc(predicateMetadata)
}
return predicateMetadata
}
func getPodFitsHostPortsMetadata(pod *v1.Pod) *podFitsHostPortsMetadata {
return &podFitsHostPortsMetadata{
podPorts: schedutil.GetContainerPorts(pod),
}
}
func getPodFitsResourcesMetedata(pod *v1.Pod) *podFitsResourcesMetadata {
return &podFitsResourcesMetadata{
podRequest: GetResourceRequest(pod),
}
}
func getPodAffinityMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo, havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo) (*podAffinityMetadata, error) {
// existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity
existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, havePodsWithAffinityNodes)
if err != nil {
return nil, err
}
// incomingPodAffinityMap will be used later for efficient check on incoming pod's affinity
// incomingPodAntiAffinityMap will be used later for efficient check on incoming pod's anti-affinity
incomingPodAffinityMap, incomingPodAntiAffinityMap, err := getTPMapMatchingIncomingAffinityAntiAffinity(pod, allNodes)
if err != nil {
return nil, err
}
return &podAffinityMetadata{
topologyPairsPotentialAffinityPods: incomingPodAffinityMap,
topologyPairsPotentialAntiAffinityPods: incomingPodAntiAffinityMap,
topologyPairsAntiAffinityPodsMap: existingPodAntiAffinityMap,
}, nil
}
func getEvenPodsSpreadMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (*evenPodsSpreadMetadata, error) {
// We have feature gating in APIServer to strip the spec
// so don't need to re-check feature gate, just check length of constraints.
constraints, err := filterHardTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints)
if err != nil {
return nil, err
}
if len(constraints) == 0 {
return nil, nil
}
var lock sync.Mutex
// TODO(Huang-Wei): It might be possible to use "make(map[topologyPair]*int32)".
// In that case, need to consider how to init each tpPairToCount[pair] in an atomic fashion.
m := evenPodsSpreadMetadata{
constraints: constraints,
tpKeyToCriticalPaths: make(map[string]*criticalPaths, len(constraints)),
tpPairToMatchNum: make(map[topologyPair]int32),
}
addTopologyPairMatchNum := func(pair topologyPair, num int32) {
lock.Lock()
m.tpPairToMatchNum[pair] += num
lock.Unlock()
}
processNode := func(i int) {
nodeInfo := allNodes[i]
node := nodeInfo.Node()
if node == nil {
klog.Error("node not found")
return
}
// In accordance to design, if NodeAffinity or NodeSelector is defined,
// spreading is applied to nodes that pass those filters.
if !PodMatchesNodeSelectorAndAffinityTerms(pod, node) {
return
}
// Ensure current node's labels contains all topologyKeys in 'constraints'.
if !NodeLabelsMatchSpreadConstraints(node.Labels, constraints) {
return
}
for _, constraint := range constraints {
matchTotal := int32(0)
// nodeInfo.Pods() can be empty; or all pods don't fit
for _, existingPod := range nodeInfo.Pods() {
if existingPod.Namespace != pod.Namespace {
continue
}
if constraint.selector.Matches(labels.Set(existingPod.Labels)) {
matchTotal++
}
}
pair := topologyPair{key: constraint.topologyKey, value: node.Labels[constraint.topologyKey]}
addTopologyPairMatchNum(pair, matchTotal)
}
}
workqueue.ParallelizeUntil(context.Background(), 16, len(allNodes), processNode)
// calculate min match for each topology pair
for i := 0; i < len(constraints); i++ {
key := constraints[i].topologyKey
m.tpKeyToCriticalPaths[key] = newCriticalPaths()
}
for pair, num := range m.tpPairToMatchNum {
m.tpKeyToCriticalPaths[pair.key].update(pair.value, num)
}
return &m, nil
}
func filterHardTopologySpreadConstraints(constraints []v1.TopologySpreadConstraint) ([]topologySpreadConstraint, error) {
var result []topologySpreadConstraint
for _, c := range constraints {
if c.WhenUnsatisfiable == v1.DoNotSchedule {
selector, err := metav1.LabelSelectorAsSelector(c.LabelSelector)
if err != nil {
return nil, err
}
result = append(result, topologySpreadConstraint{
maxSkew: c.MaxSkew,
topologyKey: c.TopologyKey,
selector: selector,
})
}
}
return result, nil
}
// NodeLabelsMatchSpreadConstraints checks if ALL topology keys in spread constraints are present in node labels.
func NodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints []topologySpreadConstraint) bool {
for _, c := range constraints {
if _, ok := nodeLabels[c.topologyKey]; !ok {
return false
}
}
return true
}
// returns a pointer to a new topologyPairsMaps
func newTopologyPairsMaps() *topologyPairsMaps {
return &topologyPairsMaps{
topologyPairToPods: make(map[topologyPair]podSet),
podToTopologyPairs: make(map[string]topologyPairSet),
}
}
func (m *topologyPairsMaps) addTopologyPair(pair topologyPair, pod *v1.Pod) {
podFullName := schedutil.GetPodFullName(pod)
if m.topologyPairToPods[pair] == nil {
m.topologyPairToPods[pair] = make(map[*v1.Pod]struct{})
}
m.topologyPairToPods[pair][pod] = struct{}{}
if m.podToTopologyPairs[podFullName] == nil {
m.podToTopologyPairs[podFullName] = make(map[topologyPair]struct{})
}
m.podToTopologyPairs[podFullName][pair] = struct{}{}
}
func (m *topologyPairsMaps) removePod(deletedPod *v1.Pod) {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
for pair := range m.podToTopologyPairs[deletedPodFullName] {
delete(m.topologyPairToPods[pair], deletedPod)
if len(m.topologyPairToPods[pair]) == 0 {
delete(m.topologyPairToPods, pair)
}
}
delete(m.podToTopologyPairs, deletedPodFullName)
}
func (m *topologyPairsMaps) appendMaps(toAppend *topologyPairsMaps) {
if toAppend == nil {
return
}
for pair := range toAppend.topologyPairToPods {
for pod := range toAppend.topologyPairToPods[pair] {
m.addTopologyPair(pair, pod)
}
}
}
func (m *topologyPairsMaps) clone() *topologyPairsMaps {
copy := newTopologyPairsMaps()
copy.appendMaps(m)
return copy
}
func (m *evenPodsSpreadMetadata) addPod(addedPod, preemptorPod *v1.Pod, node *v1.Node) {
m.updatePod(addedPod, preemptorPod, node, 1)
}
func (m *evenPodsSpreadMetadata) removePod(deletedPod, preemptorPod *v1.Pod, node *v1.Node) {
m.updatePod(deletedPod, preemptorPod, node, -1)
}
func (m *evenPodsSpreadMetadata) updatePod(updatedPod, preemptorPod *v1.Pod, node *v1.Node, delta int32) {
if m == nil || updatedPod.Namespace != preemptorPod.Namespace || node == nil {
return
}
if !NodeLabelsMatchSpreadConstraints(node.Labels, m.constraints) {
return
}
podLabelSet := labels.Set(updatedPod.Labels)
for _, constraint := range m.constraints {
if !constraint.selector.Matches(podLabelSet) {
continue
}
k, v := constraint.topologyKey, node.Labels[constraint.topologyKey]
pair := topologyPair{key: k, value: v}
m.tpPairToMatchNum[pair] = m.tpPairToMatchNum[pair] + delta
m.tpKeyToCriticalPaths[k].update(v, m.tpPairToMatchNum[pair])
}
}
func (m *evenPodsSpreadMetadata) clone() *evenPodsSpreadMetadata {
// c could be nil when EvenPodsSpread feature is disabled
if m == nil {
return nil
}
cp := evenPodsSpreadMetadata{
// constraints are shared because they don't change.
constraints: m.constraints,
tpKeyToCriticalPaths: make(map[string]*criticalPaths, len(m.tpKeyToCriticalPaths)),
tpPairToMatchNum: make(map[topologyPair]int32, len(m.tpPairToMatchNum)),
}
for tpKey, paths := range m.tpKeyToCriticalPaths {
cp.tpKeyToCriticalPaths[tpKey] = &criticalPaths{paths[0], paths[1]}
}
for tpPair, matchNum := range m.tpPairToMatchNum {
copyPair := topologyPair{key: tpPair.key, value: tpPair.value}
cp.tpPairToMatchNum[copyPair] = matchNum
}
return &cp
}
// RemovePod changes predicateMetadata assuming that the given `deletedPod` is
// deleted from the system.
func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod, node *v1.Node) error {
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
if deletedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("deletedPod and meta.pod must not be the same")
}
meta.podAffinityMetadata.removePod(deletedPod)
meta.evenPodsSpreadMetadata.removePod(deletedPod, meta.pod, node)
meta.serviceAffinityMetadata.removePod(deletedPod, node)
return nil
}
// AddPod changes predicateMetadata assuming that the given `addedPod` is added to the
// system.
func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, node *v1.Node) error {
addedPodFullName := schedutil.GetPodFullName(addedPod)
if addedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("addedPod and meta.pod must not be the same")
}
if node == nil {
return fmt.Errorf("node not found")
}
if err := meta.podAffinityMetadata.addPod(addedPod, meta.pod, node); err != nil {
return err
}
// Update meta.evenPodsSpreadMetadata if meta.pod has hard spread constraints
// and addedPod matches that
meta.evenPodsSpreadMetadata.addPod(addedPod, meta.pod, node)
meta.serviceAffinityMetadata.addPod(addedPod, meta.pod, node)
return nil
}
// ShallowCopy copies a metadata struct into a new struct and creates a copy of
// its maps and slices, but it does not copy the contents of pointer values.
func (meta *predicateMetadata) ShallowCopy() Metadata {
newPredMeta := &predicateMetadata{
pod: meta.pod,
podBestEffort: meta.podBestEffort,
}
newPredMeta.podFitsHostPortsMetadata = meta.podFitsHostPortsMetadata.clone()
newPredMeta.podAffinityMetadata = meta.podAffinityMetadata.clone()
newPredMeta.evenPodsSpreadMetadata = meta.evenPodsSpreadMetadata.clone()
newPredMeta.serviceAffinityMetadata = meta.serviceAffinityMetadata.clone()
newPredMeta.podFitsResourcesMetadata = meta.podFitsResourcesMetadata.clone()
return (Metadata)(newPredMeta)
}
type affinityTermProperties struct {
namespaces sets.String
selector labels.Selector
}
// getAffinityTermProperties receives a Pod and affinity terms and returns the namespaces and
// selectors of the terms.
func getAffinityTermProperties(pod *v1.Pod, terms []v1.PodAffinityTerm) (properties []*affinityTermProperties, err error) {
if terms == nil {
return properties, nil
}
for _, term := range terms {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
return nil, err
}
properties = append(properties, &affinityTermProperties{namespaces: namespaces, selector: selector})
}
return properties, nil
}
// podMatchesAllAffinityTermProperties returns true IFF the given pod matches all the given properties.
func podMatchesAllAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
if len(properties) == 0 {
return false
}
for _, property := range properties {
if !priorityutil.PodMatchesTermsNamespaceAndSelector(pod, property.namespaces, property.selector) {
return false
}
}
return true
}
// podMatchesAnyAffinityTermProperties returns true if the given pod matches any given property.
func podMatchesAnyAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
if len(properties) == 0 {
return false
}
for _, property := range properties {
if priorityutil.PodMatchesTermsNamespaceAndSelector(pod, property.namespaces, property.selector) {
return true
}
}
return false
}
// getTPMapMatchingExistingAntiAffinity calculates the following for each existing pod on each node:
// (1) Whether it has PodAntiAffinity
// (2) Whether any AffinityTerm matches the incoming pod
func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (*topologyPairsMaps, error) {
errCh := schedutil.NewErrorChannel()
var lock sync.Mutex
topologyMaps := newTopologyPairsMaps()
appendTopologyPairsMaps := func(toAppend *topologyPairsMaps) {
lock.Lock()
defer lock.Unlock()
topologyMaps.appendMaps(toAppend)
}
ctx, cancel := context.WithCancel(context.Background())
processNode := func(i int) {
nodeInfo := allNodes[i]
node := nodeInfo.Node()
if node == nil {
klog.Error("node not found")
return
}
for _, existingPod := range nodeInfo.PodsWithAffinity() {
existingPodTopologyMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, existingPod, node)
if err != nil {
errCh.SendErrorWithCancel(err, cancel)
return
}
if existingPodTopologyMaps != nil {
appendTopologyPairsMaps(existingPodTopologyMaps)
}
}
}
workqueue.ParallelizeUntil(ctx, 16, len(allNodes), processNode)
if err := errCh.ReceiveError(); err != nil {
return nil, err
}
return topologyMaps, nil
}
// getTPMapMatchingIncomingAffinityAntiAffinity finds existing Pods that match affinity terms of the given "pod".
// It returns a topologyPairsMaps that are checked later by the affinity
// predicate. With this topologyPairsMaps available, the affinity predicate does not
// need to check all the pods in the cluster.
func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (topologyPairsAffinityPodsMaps *topologyPairsMaps, topologyPairsAntiAffinityPodsMaps *topologyPairsMaps, err error) {
affinity := pod.Spec.Affinity
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return newTopologyPairsMaps(), newTopologyPairsMaps(), nil
}
errCh := schedutil.NewErrorChannel()
var lock sync.Mutex
topologyPairsAffinityPodsMaps = newTopologyPairsMaps()
topologyPairsAntiAffinityPodsMaps = newTopologyPairsMaps()
appendResult := func(nodeName string, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps *topologyPairsMaps) {
lock.Lock()
defer lock.Unlock()
if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 {
topologyPairsAffinityPodsMaps.appendMaps(nodeTopologyPairsAffinityPodsMaps)
}
if len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
topologyPairsAntiAffinityPodsMaps.appendMaps(nodeTopologyPairsAntiAffinityPodsMaps)
}
}
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
affinityProperties, err := getAffinityTermProperties(pod, affinityTerms)
if err != nil {
return nil, nil, err
}
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
ctx, cancel := context.WithCancel(context.Background())
processNode := func(i int) {
nodeInfo := allNodes[i]
node := nodeInfo.Node()
if node == nil {
klog.Error("node not found")
return
}
nodeTopologyPairsAffinityPodsMaps := newTopologyPairsMaps()
nodeTopologyPairsAntiAffinityPodsMaps := newTopologyPairsMaps()
for _, existingPod := range nodeInfo.Pods() {
// Check affinity properties.
if podMatchesAllAffinityTermProperties(existingPod, affinityProperties) {
for _, term := range affinityTerms {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
nodeTopologyPairsAffinityPodsMaps.addTopologyPair(pair, existingPod)
}
}
}
// Check anti-affinity properties.
for _, term := range antiAffinityTerms {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
errCh.SendErrorWithCancel(err, cancel)
return
}
if priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
nodeTopologyPairsAntiAffinityPodsMaps.addTopologyPair(pair, existingPod)
}
}
}
}
if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 || len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
appendResult(node.Name, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps)
}
}
workqueue.ParallelizeUntil(ctx, 16, len(allNodes), processNode)
if err := errCh.ReceiveError(); err != nil {
return nil, nil, err
}
return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, nil
}
// targetPodMatchesAffinityOfPod returns true if "targetPod" matches ALL affinity terms of
// "pod". This function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAffinityOfPod(pod, targetPod *v1.Pod) bool {
affinity := pod.Spec.Affinity
if affinity == nil || affinity.PodAffinity == nil {
return false
}
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
if err != nil {
klog.Errorf("error in getting affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAllAffinityTermProperties(targetPod, affinityProperties)
}
// targetPodMatchesAntiAffinityOfPod returns true if "targetPod" matches ANY anti-affinity
// term of "pod". This function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAntiAffinityOfPod(pod, targetPod *v1.Pod) bool {
affinity := pod.Spec.Affinity
if affinity == nil || affinity.PodAntiAffinity == nil {
return false
}
properties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
if err != nil {
klog.Errorf("error in getting anti-affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAnyAffinityTermProperties(targetPod, properties)
}

File diff suppressed because it is too large Load Diff

View File

@ -1,149 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"strings"
v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
csilibplugins "k8s.io/csi-translation-lib/plugins"
"k8s.io/kubernetes/pkg/features"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// FindLabelsInSet gets as many key/value pairs as possible out of a label set.
func FindLabelsInSet(labelsToKeep []string, selector labels.Set) map[string]string {
aL := make(map[string]string)
for _, l := range labelsToKeep {
if selector.Has(l) {
aL[l] = selector.Get(l)
}
}
return aL
}
// AddUnsetLabelsToMap backfills missing values with values we find in a map.
func AddUnsetLabelsToMap(aL map[string]string, labelsToAdd []string, labelSet labels.Set) {
for _, l := range labelsToAdd {
// if the label is already there, dont overwrite it.
if _, exists := aL[l]; exists {
continue
}
// otherwise, backfill this label.
if labelSet.Has(l) {
aL[l] = labelSet.Get(l)
}
}
}
// FilterPodsByNamespace filters pods outside a namespace from the given list.
func FilterPodsByNamespace(pods []*v1.Pod, ns string) []*v1.Pod {
filtered := []*v1.Pod{}
for _, nsPod := range pods {
if nsPod.Namespace == ns {
filtered = append(filtered, nsPod)
}
}
return filtered
}
// CreateSelectorFromLabels is used to define a selector that corresponds to the keys in a map.
func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
if len(aL) == 0 {
return labels.Everything()
}
return labels.Set(aL).AsSelector()
}
// portsConflict check whether existingPorts and wantPorts conflict with each other
// return true if we have a conflict
func portsConflict(existingPorts schedulernodeinfo.HostPortInfo, wantPorts []*v1.ContainerPort) bool {
for _, cp := range wantPorts {
if existingPorts.CheckConflict(cp.HostIP, string(cp.Protocol), cp.HostPort) {
return true
}
}
return false
}
// SetPredicatesOrderingDuringTest sets the predicatesOrdering to the specified
// value, and returns a function that restores the original value.
func SetPredicatesOrderingDuringTest(value []string) func() {
origVal := predicatesOrdering
predicatesOrdering = value
return func() {
predicatesOrdering = origVal
}
}
// isCSIMigrationOn returns a boolean value indicating whether
// the CSI migration has been enabled for a particular storage plugin.
func isCSIMigrationOn(csiNode *storagev1.CSINode, pluginName string) bool {
if csiNode == nil || len(pluginName) == 0 {
return false
}
// In-tree storage to CSI driver migration feature should be enabled,
// along with the plugin-specific one
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigration) {
return false
}
switch pluginName {
case csilibplugins.AWSEBSInTreePluginName:
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationAWS) {
return false
}
case csilibplugins.GCEPDInTreePluginName:
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationGCE) {
return false
}
case csilibplugins.AzureDiskInTreePluginName:
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationAzureDisk) {
return false
}
case csilibplugins.CinderInTreePluginName:
if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationOpenStack) {
return false
}
default:
return false
}
// The plugin name should be listed in the CSINode object annotation.
// This indicates that the plugin has been migrated to a CSI driver in the node.
csiNodeAnn := csiNode.GetAnnotations()
if csiNodeAnn == nil {
return false
}
var mpaSet sets.String
mpa := csiNodeAnn[v1.MigratedPluginsAnnotationKey]
if len(mpa) == 0 {
mpaSet = sets.NewString()
} else {
tok := strings.Split(mpa, ",")
mpaSet = sets.NewString(tok...)
}
return mpaSet.Has(pluginName)
}

View File

@ -1,53 +0,0 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = [
"non_zero_test.go",
"topologies_test.go",
],
embed = [":go_default_library"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/selection:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"non_zero.go",
"topologies.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util",
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -1,74 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"k8s.io/api/core/v1"
extenderv1 "k8s.io/kubernetes/pkg/scheduler/apis/extender/v1"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// SchedulerExtender is an interface for external processes to influence scheduling
// decisions made by Kubernetes. This is typically needed for resources not directly
// managed by Kubernetes.
type SchedulerExtender interface {
// Name returns a unique name that identifies the extender.
Name() string
// Filter based on extender-implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
Filter(pod *v1.Pod,
nodes []*v1.Node, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo,
) (filteredNodes []*v1.Node, failedNodesMap extenderv1.FailedNodesMap, err error)
// Prioritize based on extender-implemented priority functions. The returned scores & weight
// are used to compute the weighted score for an extender. The weighted scores are added to
// the scores computed by Kubernetes scheduler. The total scores are used to do the host selection.
Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *extenderv1.HostPriorityList, weight int64, err error)
// Bind delegates the action of binding a pod to a node to the extender.
Bind(binding *v1.Binding) error
// IsBinder returns whether this extender is configured for the Bind method.
IsBinder() bool
// IsInterested returns true if at least one extended resource requested by
// this pod is managed by this extender.
IsInterested(pod *v1.Pod) bool
// ProcessPreemption returns nodes with their victim pods processed by extender based on
// given:
// 1. Pod to schedule
// 2. Candidate nodes and victim pods (nodeToVictims) generated by previous scheduling process.
// 3. nodeNameToInfo to restore v1.Node from node name if extender cache is enabled.
// The possible changes made by extender may include:
// 1. Subset of given candidate nodes after preemption phase of extender.
// 2. A different set of victim pod for every given candidate node after preemption phase of extender.
ProcessPreemption(
pod *v1.Pod,
nodeToVictims map[*v1.Node]*extenderv1.Victims,
nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo,
) (map[*v1.Node]*extenderv1.Victims, error)
// SupportsPreemption returns if the scheduler extender support preemption or not.
SupportsPreemption() bool
// IsIgnorable returns true indicates scheduling should not fail when this extender
// is unavailable. This gives scheduler ability to fail fast and tolerate non-critical extenders as well.
IsIgnorable() bool
}

View File

@ -1,99 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
appslisters "k8s.io/client-go/listers/apps/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/kubernetes/pkg/apis/apps"
api "k8s.io/kubernetes/pkg/apis/core"
)
// NodeFieldSelectorKeys is a map that: the keys are node field selector keys; the values are
// the functions to get the value of the node field.
var NodeFieldSelectorKeys = map[string]func(*v1.Node) string{
api.ObjectNameField: func(n *v1.Node) string { return n.Name },
}
var _ corelisters.ReplicationControllerLister = &EmptyControllerLister{}
// EmptyControllerLister implements ControllerLister on []v1.ReplicationController returning empty data
type EmptyControllerLister struct{}
// List returns nil
func (f EmptyControllerLister) List(labels.Selector) ([]*v1.ReplicationController, error) {
return nil, nil
}
// GetPodControllers returns nil
func (f EmptyControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1.ReplicationController, err error) {
return nil, nil
}
// ReplicationControllers returns nil
func (f EmptyControllerLister) ReplicationControllers(namespace string) corelisters.ReplicationControllerNamespaceLister {
return nil
}
var _ appslisters.ReplicaSetLister = &EmptyReplicaSetLister{}
// EmptyReplicaSetLister implements ReplicaSetLister on []extensions.ReplicaSet returning empty data
type EmptyReplicaSetLister struct{}
// List returns nil
func (f EmptyReplicaSetLister) List(labels.Selector) ([]*appsv1.ReplicaSet, error) {
return nil, nil
}
// GetPodReplicaSets returns nil
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*appsv1.ReplicaSet, err error) {
return nil, nil
}
// ReplicaSets returns nil
func (f EmptyReplicaSetLister) ReplicaSets(namespace string) appslisters.ReplicaSetNamespaceLister {
return nil
}
// StatefulSetLister interface represents anything that can produce a list of StatefulSet; the list is consumed by a scheduler.
type StatefulSetLister interface {
// Gets the StatefulSet for the given pod.
GetPodStatefulSets(*v1.Pod) ([]*apps.StatefulSet, error)
}
var _ appslisters.StatefulSetLister = &EmptyStatefulSetLister{}
// EmptyStatefulSetLister implements StatefulSetLister on []apps.StatefulSet returning empty data.
type EmptyStatefulSetLister struct{}
// List returns nil
func (f EmptyStatefulSetLister) List(labels.Selector) ([]*appsv1.StatefulSet, error) {
return nil, nil
}
// GetPodStatefulSets of EmptyStatefulSetLister returns nil.
func (f EmptyStatefulSetLister) GetPodStatefulSets(pod *v1.Pod) (sss []*appsv1.StatefulSet, err error) {
return nil, nil
}
// StatefulSets returns nil
func (f EmptyStatefulSetLister) StatefulSets(namespace string) appslisters.StatefulSetNamespaceLister {
return nil
}

View File

@ -0,0 +1,50 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"legacy_types.go",
"register.go",
"types.go",
"zz_generated.deepcopy.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/apis/config",
visibility = ["//visibility:public"],
deps = [
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/component-base/config:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/scheduler/apis/config/scheme:all-srcs",
"//pkg/scheduler/apis/config/testing:all-srcs",
"//pkg/scheduler/apis/config/v1:all-srcs",
"//pkg/scheduler/apis/config/v1alpha1:all-srcs",
"//pkg/scheduler/apis/config/v1alpha2:all-srcs",
"//pkg/scheduler/apis/config/validation:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = ["types_test.go"],
embed = [":go_default_library"],
deps = ["//vendor/github.com/google/go-cmp/cmp:go_default_library"],
)

View File

@ -0,0 +1,13 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- api-approvers
- sig-scheduling-maintainers
- sttts
- luxas
reviewers:
- sig-scheduling
- api-reviewers
- dixudx
- luxas
- sttts

View File

@ -1,5 +1,5 @@
/*
Copyright 2019 The Kubernetes Authors.
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -15,6 +15,6 @@ limitations under the License.
*/
// +k8s:deepcopy-gen=package
// +groupName=kubescheduler.config.k8s.io
// Package v1 contains scheduler API objects.
package v1 // import "k8s.io/kubernetes/pkg/scheduler/apis/extender/v1"
package config // import "k8s.io/kubernetes/pkg/scheduler/apis/config"

View File

@ -0,0 +1,234 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// Policy describes a struct of a policy resource in api.
type Policy struct {
metav1.TypeMeta
// Holds the information to configure the fit predicate functions.
// If unspecified, the default predicate functions will be applied.
// If empty list, all predicates (except the mandatory ones) will be
// bypassed.
Predicates []PredicatePolicy
// Holds the information to configure the priority functions.
// If unspecified, the default priority functions will be applied.
// If empty list, all priority functions will be bypassed.
Priorities []PriorityPolicy
// Holds the information to communicate with the extender(s)
Extenders []Extender
// RequiredDuringScheduling affinity is not symmetric, but there is an implicit PreferredDuringScheduling affinity rule
// corresponding to every RequiredDuringScheduling affinity rule.
// HardPodAffinitySymmetricWeight represents the weight of implicit PreferredDuringScheduling affinity rule, in the range 1-100.
HardPodAffinitySymmetricWeight int32
// When AlwaysCheckAllPredicates is set to true, scheduler checks all
// the configured predicates even after one or more of them fails.
// When the flag is set to false, scheduler skips checking the rest
// of the predicates after it finds one predicate that failed.
AlwaysCheckAllPredicates bool
}
// PredicatePolicy describes a struct of a predicate policy.
type PredicatePolicy struct {
// Identifier of the predicate policy
// For a custom predicate, the name can be user-defined
// For the Kubernetes provided predicates, the name is the identifier of the pre-defined predicate
Name string
// Holds the parameters to configure the given predicate
Argument *PredicateArgument
}
// PriorityPolicy describes a struct of a priority policy.
type PriorityPolicy struct {
// Identifier of the priority policy
// For a custom priority, the name can be user-defined
// For the Kubernetes provided priority functions, the name is the identifier of the pre-defined priority function
Name string
// The numeric multiplier for the node scores that the priority function generates
// The weight should be a positive integer
Weight int64
// Holds the parameters to configure the given priority function
Argument *PriorityArgument
}
// PredicateArgument represents the arguments to configure predicate functions in scheduler policy configuration.
// Only one of its members may be specified
type PredicateArgument struct {
// The predicate that provides affinity for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAffinity *ServiceAffinity
// The predicate that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelsPresence *LabelsPresence
}
// PriorityArgument represents the arguments to configure priority functions in scheduler policy configuration.
// Only one of its members may be specified
type PriorityArgument struct {
// The priority function that ensures a good spread (anti-affinity) for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAntiAffinity *ServiceAntiAffinity
// The priority function that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelPreference *LabelPreference
// The RequestedToCapacityRatio priority function is parametrized with function shape.
RequestedToCapacityRatioArguments *RequestedToCapacityRatioArguments
}
// ServiceAffinity holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type ServiceAffinity struct {
// The list of labels that identify node "groups"
// All of the labels should match for the node to be considered a fit for hosting the pod
Labels []string
}
// LabelsPresence holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
type LabelsPresence struct {
// The list of labels that identify node "groups"
// All of the labels should be either present (or absent) for the node to be considered a fit for hosting the pod
Labels []string
// The boolean flag that indicates whether the labels should be present or absent from the node
Presence bool
}
// ServiceAntiAffinity holds the parameters that are used to configure the corresponding priority function
type ServiceAntiAffinity struct {
// Used to identify node "groups"
Label string
}
// LabelPreference holds the parameters that are used to configure the corresponding priority function
type LabelPreference struct {
// Used to identify node "groups"
Label string
// This is a boolean flag
// If true, higher priority is given to nodes that have the label
// If false, higher priority is given to nodes that do not have the label
Presence bool
}
// RequestedToCapacityRatioArguments holds arguments specific to RequestedToCapacityRatio priority function.
type RequestedToCapacityRatioArguments struct {
// Array of point defining priority function shape.
Shape []UtilizationShapePoint `json:"shape"`
Resources []ResourceSpec `json:"resources,omitempty"`
}
// UtilizationShapePoint represents single point of priority function shape
type UtilizationShapePoint struct {
// Utilization (x axis). Valid values are 0 to 100. Fully utilized node maps to 100.
Utilization int32
// Score assigned to given utilization (y axis). Valid values are 0 to 10.
Score int32
}
// ResourceSpec represents single resource for bin packing of priority RequestedToCapacityRatioArguments.
type ResourceSpec struct {
// Name of the resource to be managed by RequestedToCapacityRatio function.
Name string
// Weight of the resource.
Weight int64
}
// ExtenderManagedResource describes the arguments of extended resources
// managed by an extender.
type ExtenderManagedResource struct {
// Name is the extended resource name.
Name string
// IgnoredByScheduler indicates whether kube-scheduler should ignore this
// resource when applying predicates.
IgnoredByScheduler bool
}
// ExtenderTLSConfig contains settings to enable TLS with extender
type ExtenderTLSConfig struct {
// Server should be accessed without verifying the TLS certificate. For testing only.
Insecure bool
// ServerName is passed to the server for SNI and is used in the client to check server
// certificates against. If ServerName is empty, the hostname used to contact the
// server is used.
ServerName string
// Server requires TLS client certificate authentication
CertFile string
// Server requires TLS client certificate authentication
KeyFile string
// Trusted root certificates for server
CAFile string
// CertData holds PEM-encoded bytes (typically read from a client certificate file).
// CertData takes precedence over CertFile
CertData []byte
// KeyData holds PEM-encoded bytes (typically read from a client certificate key file).
// KeyData takes precedence over KeyFile
KeyData []byte
// CAData holds PEM-encoded bytes (typically read from a root certificates bundle).
// CAData takes precedence over CAFile
CAData []byte
}
// Extender holds the parameters used to communicate with the extender. If a verb is unspecified/empty,
// it is assumed that the extender chose not to provide that extension.
type Extender struct {
// URLPrefix at which the extender is available
URLPrefix string
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
FilterVerb string
// Verb for the preempt call, empty if not supported. This verb is appended to the URLPrefix when issuing the preempt call to extender.
PreemptVerb string
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
PrioritizeVerb string
// The numeric multiplier for the node scores that the prioritize call generates.
// The weight should be a positive integer
Weight int64
// Verb for the bind call, empty if not supported. This verb is appended to the URLPrefix when issuing the bind call to extender.
// If this method is implemented by the extender, it is the extender's responsibility to bind the pod to apiserver. Only one extender
// can implement this function.
BindVerb string
// EnableHTTPS specifies whether https should be used to communicate with the extender
EnableHTTPS bool
// TLSConfig specifies the transport layer security config
TLSConfig *ExtenderTLSConfig
// HTTPTimeout specifies the timeout duration for a call to the extender. Filter timeout fails the scheduling of the pod. Prioritize
// timeout is ignored, k8s/other extenders priorities are used to select the node.
HTTPTimeout time.Duration
// NodeCacheCapable specifies that the extender is capable of caching node information,
// so the scheduler should only send minimal information about the eligible nodes
// assuming that the extender already cached full details of all nodes in the cluster
NodeCacheCapable bool
// ManagedResources is a list of extended resources that are managed by
// this extender.
// - A pod will be sent to the extender on the Filter, Prioritize and Bind
// (if the extender is the binder) phases iff the pod requests at least
// one of the extended resources in this list. If empty or unspecified,
// all pods will be sent to this extender.
// - If IgnoredByScheduler is set to true for a resource, kube-scheduler
// will skip checking the resource in predicates.
// +optional
ManagedResources []ExtenderManagedResource
// Ignorable specifies if the extender is ignorable, i.e. scheduling should not
// fail when the extender returns an error or is not reachable.
Ignorable bool
}

View File

@ -0,0 +1,45 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
)
// GroupName is the group name used in this package
const GroupName = "kubescheduler.config.k8s.io"
// SchemeGroupVersion is group version used to register these objects
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
var (
// SchemeBuilder is the scheme builder with scheme init functions to run for this API package
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
// AddToScheme is a global function that registers this API group & version to a scheme
AddToScheme = SchemeBuilder.AddToScheme
)
// addKnownTypes registers known types to the given scheme
func addKnownTypes(scheme *runtime.Scheme) error {
scheme.AddKnownTypes(SchemeGroupVersion,
&KubeSchedulerConfiguration{},
&Policy{},
)
scheme.AddKnownTypes(schema.GroupVersion{Group: "", Version: runtime.APIVersionInternal}, &Policy{})
return nil
}

View File

@ -0,0 +1,339 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"math"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
componentbaseconfig "k8s.io/component-base/config"
)
const (
// SchedulerDefaultLockObjectNamespace defines default scheduler lock object namespace ("kube-system")
SchedulerDefaultLockObjectNamespace string = metav1.NamespaceSystem
// SchedulerDefaultLockObjectName defines default scheduler lock object name ("kube-scheduler")
SchedulerDefaultLockObjectName = "kube-scheduler"
// SchedulerPolicyConfigMapKey defines the key of the element in the
// scheduler's policy ConfigMap that contains scheduler's policy config.
SchedulerPolicyConfigMapKey = "policy.cfg"
// SchedulerDefaultProviderName defines the default provider names
SchedulerDefaultProviderName = "DefaultProvider"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// KubeSchedulerConfiguration configures a scheduler
type KubeSchedulerConfiguration struct {
metav1.TypeMeta
// AlgorithmSource specifies the scheduler algorithm source.
// TODO(#87526): Remove AlgorithmSource from this package
// DEPRECATED: AlgorithmSource is removed in the v1alpha2 ComponentConfig
AlgorithmSource SchedulerAlgorithmSource
// LeaderElection defines the configuration of leader election client.
LeaderElection KubeSchedulerLeaderElectionConfiguration
// ClientConnection specifies the kubeconfig file and client connection
// settings for the proxy server to use when communicating with the apiserver.
ClientConnection componentbaseconfig.ClientConnectionConfiguration
// HealthzBindAddress is the IP address and port for the health check server to serve on,
// defaulting to 0.0.0.0:10251
HealthzBindAddress string
// MetricsBindAddress is the IP address and port for the metrics server to
// serve on, defaulting to 0.0.0.0:10251.
MetricsBindAddress string
// DebuggingConfiguration holds configuration for Debugging related features
// TODO: We might wanna make this a substruct like Debugging componentbaseconfig.DebuggingConfiguration
componentbaseconfig.DebuggingConfiguration
// DisablePreemption disables the pod preemption feature.
DisablePreemption bool
// PercentageOfNodeToScore is the percentage of all nodes that once found feasible
// for running a pod, the scheduler stops its search for more feasible nodes in
// the cluster. This helps improve scheduler's performance. Scheduler always tries to find
// at least "minFeasibleNodesToFind" feasible nodes no matter what the value of this flag is.
// Example: if the cluster size is 500 nodes and the value of this flag is 30,
// then scheduler stops finding further feasible nodes once it finds 150 feasible ones.
// When the value is 0, default percentage (5%--50% based on the size of the cluster) of the
// nodes will be scored.
PercentageOfNodesToScore int32
// Duration to wait for a binding operation to complete before timing out
// Value must be non-negative integer. The value zero indicates no waiting.
// If this value is nil, the default value will be used.
BindTimeoutSeconds int64
// PodInitialBackoffSeconds is the initial backoff for unschedulable pods.
// If specified, it must be greater than 0. If this value is null, the default value (1s)
// will be used.
PodInitialBackoffSeconds int64
// PodMaxBackoffSeconds is the max backoff for unschedulable pods.
// If specified, it must be greater than or equal to podInitialBackoffSeconds. If this value is null,
// the default value (10s) will be used.
PodMaxBackoffSeconds int64
// Profiles are scheduling profiles that kube-scheduler supports. Pods can
// choose to be scheduled under a particular profile by setting its associated
// scheduler name. Pods that don't specify any scheduler name are scheduled
// with the "default-scheduler" profile, if present here.
Profiles []KubeSchedulerProfile
// Extenders are the list of scheduler extenders, each holding the values of how to communicate
// with the extender. These extenders are shared by all scheduler profiles.
Extenders []Extender
}
// KubeSchedulerProfile is a scheduling profile.
type KubeSchedulerProfile struct {
// SchedulerName is the name of the scheduler associated to this profile.
// If SchedulerName matches with the pod's "spec.schedulerName", then the pod
// is scheduled with this profile.
SchedulerName string
// Plugins specify the set of plugins that should be enabled or disabled.
// Enabled plugins are the ones that should be enabled in addition to the
// default plugins. Disabled plugins are any of the default plugins that
// should be disabled.
// When no enabled or disabled plugin is specified for an extension point,
// default plugins for that extension point will be used if there is any.
// If a QueueSort plugin is specified, the same QueueSort Plugin and
// PluginConfig must be specified for all profiles.
Plugins *Plugins
// PluginConfig is an optional set of custom plugin arguments for each plugin.
// Omitting config args for a plugin is equivalent to using the default config
// for that plugin.
PluginConfig []PluginConfig
}
// SchedulerAlgorithmSource is the source of a scheduler algorithm. One source
// field must be specified, and source fields are mutually exclusive.
type SchedulerAlgorithmSource struct {
// Policy is a policy based algorithm source.
Policy *SchedulerPolicySource
// Provider is the name of a scheduling algorithm provider to use.
Provider *string
}
// SchedulerPolicySource configures a means to obtain a scheduler Policy. One
// source field must be specified, and source fields are mutually exclusive.
type SchedulerPolicySource struct {
// File is a file policy source.
File *SchedulerPolicyFileSource
// ConfigMap is a config map policy source.
ConfigMap *SchedulerPolicyConfigMapSource
}
// SchedulerPolicyFileSource is a policy serialized to disk and accessed via
// path.
type SchedulerPolicyFileSource struct {
// Path is the location of a serialized policy.
Path string
}
// SchedulerPolicyConfigMapSource is a policy serialized into a config map value
// under the SchedulerPolicyConfigMapKey key.
type SchedulerPolicyConfigMapSource struct {
// Namespace is the namespace of the policy config map.
Namespace string
// Name is the name of the policy config map.
Name string
}
// KubeSchedulerLeaderElectionConfiguration expands LeaderElectionConfiguration
// to include scheduler specific configuration.
type KubeSchedulerLeaderElectionConfiguration struct {
componentbaseconfig.LeaderElectionConfiguration
}
// Plugins include multiple extension points. When specified, the list of plugins for
// a particular extension point are the only ones enabled. If an extension point is
// omitted from the config, then the default set of plugins is used for that extension point.
// Enabled plugins are called in the order specified here, after default plugins. If they need to
// be invoked before default plugins, default plugins must be disabled and re-enabled here in desired order.
type Plugins struct {
// QueueSort is a list of plugins that should be invoked when sorting pods in the scheduling queue.
QueueSort *PluginSet
// PreFilter is a list of plugins that should be invoked at "PreFilter" extension point of the scheduling framework.
PreFilter *PluginSet
// Filter is a list of plugins that should be invoked when filtering out nodes that cannot run the Pod.
Filter *PluginSet
// PreScore is a list of plugins that are invoked before scoring.
PreScore *PluginSet
// Score is a list of plugins that should be invoked when ranking nodes that have passed the filtering phase.
Score *PluginSet
// Reserve is a list of plugins invoked when reserving a node to run the pod.
Reserve *PluginSet
// Permit is a list of plugins that control binding of a Pod. These plugins can prevent or delay binding of a Pod.
Permit *PluginSet
// PreBind is a list of plugins that should be invoked before a pod is bound.
PreBind *PluginSet
// Bind is a list of plugins that should be invoked at "Bind" extension point of the scheduling framework.
// The scheduler call these plugins in order. Scheduler skips the rest of these plugins as soon as one returns success.
Bind *PluginSet
// PostBind is a list of plugins that should be invoked after a pod is successfully bound.
PostBind *PluginSet
// Unreserve is a list of plugins invoked when a pod that was previously reserved is rejected in a later phase.
Unreserve *PluginSet
}
// PluginSet specifies enabled and disabled plugins for an extension point.
// If an array is empty, missing, or nil, default plugins at that extension point will be used.
type PluginSet struct {
// Enabled specifies plugins that should be enabled in addition to default plugins.
// These are called after default plugins and in the same order specified here.
Enabled []Plugin
// Disabled specifies default plugins that should be disabled.
// When all default plugins need to be disabled, an array containing only one "*" should be provided.
Disabled []Plugin
}
// Plugin specifies a plugin name and its weight when applicable. Weight is used only for Score plugins.
type Plugin struct {
// Name defines the name of plugin
Name string
// Weight defines the weight of plugin, only used for Score plugins.
Weight int32
}
// PluginConfig specifies arguments that should be passed to a plugin at the time of initialization.
// A plugin that is invoked at multiple extension points is initialized once. Args can have arbitrary structure.
// It is up to the plugin to process these Args.
type PluginConfig struct {
// Name defines the name of plugin being configured
Name string
// Args defines the arguments passed to the plugins at the time of initialization. Args can have arbitrary structure.
Args runtime.Unknown
}
/*
* NOTE: The following variables and methods are intentionally left out of the staging mirror.
*/
const (
// DefaultPercentageOfNodesToScore defines the percentage of nodes of all nodes
// that once found feasible, the scheduler stops looking for more nodes.
// A value of 0 means adaptive, meaning the scheduler figures out a proper default.
DefaultPercentageOfNodesToScore = 0
// MaxCustomPriorityScore is the max score UtilizationShapePoint expects.
MaxCustomPriorityScore int64 = 10
// MaxTotalScore is the maximum total score.
MaxTotalScore int64 = math.MaxInt64
// MaxWeight defines the max weight value allowed for custom PriorityPolicy
MaxWeight = MaxTotalScore / MaxCustomPriorityScore
)
func appendPluginSet(dst *PluginSet, src *PluginSet) *PluginSet {
if dst == nil {
dst = &PluginSet{}
}
if src != nil {
dst.Enabled = append(dst.Enabled, src.Enabled...)
dst.Disabled = append(dst.Disabled, src.Disabled...)
}
return dst
}
// Append appends src Plugins to current Plugins. If a PluginSet is nil, it will
// be created.
func (p *Plugins) Append(src *Plugins) {
if p == nil || src == nil {
return
}
p.QueueSort = appendPluginSet(p.QueueSort, src.QueueSort)
p.PreFilter = appendPluginSet(p.PreFilter, src.PreFilter)
p.Filter = appendPluginSet(p.Filter, src.Filter)
p.PreScore = appendPluginSet(p.PreScore, src.PreScore)
p.Score = appendPluginSet(p.Score, src.Score)
p.Reserve = appendPluginSet(p.Reserve, src.Reserve)
p.Permit = appendPluginSet(p.Permit, src.Permit)
p.PreBind = appendPluginSet(p.PreBind, src.PreBind)
p.Bind = appendPluginSet(p.Bind, src.Bind)
p.PostBind = appendPluginSet(p.PostBind, src.PostBind)
p.Unreserve = appendPluginSet(p.Unreserve, src.Unreserve)
}
// Apply merges the plugin configuration from custom plugins, handling disabled sets.
func (p *Plugins) Apply(customPlugins *Plugins) {
if customPlugins == nil {
return
}
p.QueueSort = mergePluginSets(p.QueueSort, customPlugins.QueueSort)
p.PreFilter = mergePluginSets(p.PreFilter, customPlugins.PreFilter)
p.Filter = mergePluginSets(p.Filter, customPlugins.Filter)
p.PreScore = mergePluginSets(p.PreScore, customPlugins.PreScore)
p.Score = mergePluginSets(p.Score, customPlugins.Score)
p.Reserve = mergePluginSets(p.Reserve, customPlugins.Reserve)
p.Permit = mergePluginSets(p.Permit, customPlugins.Permit)
p.PreBind = mergePluginSets(p.PreBind, customPlugins.PreBind)
p.Bind = mergePluginSets(p.Bind, customPlugins.Bind)
p.PostBind = mergePluginSets(p.PostBind, customPlugins.PostBind)
p.Unreserve = mergePluginSets(p.Unreserve, customPlugins.Unreserve)
}
func mergePluginSets(defaultPluginSet, customPluginSet *PluginSet) *PluginSet {
if customPluginSet == nil {
customPluginSet = &PluginSet{}
}
if defaultPluginSet == nil {
defaultPluginSet = &PluginSet{}
}
disabledPlugins := sets.NewString()
for _, disabledPlugin := range customPluginSet.Disabled {
disabledPlugins.Insert(disabledPlugin.Name)
}
enabledPlugins := []Plugin{}
if !disabledPlugins.Has("*") {
for _, defaultEnabledPlugin := range defaultPluginSet.Enabled {
if disabledPlugins.Has(defaultEnabledPlugin.Name) {
continue
}
enabledPlugins = append(enabledPlugins, defaultEnabledPlugin)
}
}
enabledPlugins = append(enabledPlugins, customPluginSet.Enabled...)
return &PluginSet{Enabled: enabledPlugins}
}

View File

@ -0,0 +1,677 @@
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package config
import (
runtime "k8s.io/apimachinery/pkg/runtime"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Extender) DeepCopyInto(out *Extender) {
*out = *in
if in.TLSConfig != nil {
in, out := &in.TLSConfig, &out.TLSConfig
*out = new(ExtenderTLSConfig)
(*in).DeepCopyInto(*out)
}
if in.ManagedResources != nil {
in, out := &in.ManagedResources, &out.ManagedResources
*out = make([]ExtenderManagedResource, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extender.
func (in *Extender) DeepCopy() *Extender {
if in == nil {
return nil
}
out := new(Extender)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderManagedResource) DeepCopyInto(out *ExtenderManagedResource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderManagedResource.
func (in *ExtenderManagedResource) DeepCopy() *ExtenderManagedResource {
if in == nil {
return nil
}
out := new(ExtenderManagedResource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderTLSConfig) DeepCopyInto(out *ExtenderTLSConfig) {
*out = *in
if in.CertData != nil {
in, out := &in.CertData, &out.CertData
*out = make([]byte, len(*in))
copy(*out, *in)
}
if in.KeyData != nil {
in, out := &in.KeyData, &out.KeyData
*out = make([]byte, len(*in))
copy(*out, *in)
}
if in.CAData != nil {
in, out := &in.CAData, &out.CAData
*out = make([]byte, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderTLSConfig.
func (in *ExtenderTLSConfig) DeepCopy() *ExtenderTLSConfig {
if in == nil {
return nil
}
out := new(ExtenderTLSConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *KubeSchedulerConfiguration) DeepCopyInto(out *KubeSchedulerConfiguration) {
*out = *in
out.TypeMeta = in.TypeMeta
in.AlgorithmSource.DeepCopyInto(&out.AlgorithmSource)
out.LeaderElection = in.LeaderElection
out.ClientConnection = in.ClientConnection
out.DebuggingConfiguration = in.DebuggingConfiguration
if in.Profiles != nil {
in, out := &in.Profiles, &out.Profiles
*out = make([]KubeSchedulerProfile, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Extenders != nil {
in, out := &in.Extenders, &out.Extenders
*out = make([]Extender, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerConfiguration.
func (in *KubeSchedulerConfiguration) DeepCopy() *KubeSchedulerConfiguration {
if in == nil {
return nil
}
out := new(KubeSchedulerConfiguration)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *KubeSchedulerConfiguration) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *KubeSchedulerLeaderElectionConfiguration) DeepCopyInto(out *KubeSchedulerLeaderElectionConfiguration) {
*out = *in
out.LeaderElectionConfiguration = in.LeaderElectionConfiguration
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerLeaderElectionConfiguration.
func (in *KubeSchedulerLeaderElectionConfiguration) DeepCopy() *KubeSchedulerLeaderElectionConfiguration {
if in == nil {
return nil
}
out := new(KubeSchedulerLeaderElectionConfiguration)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *KubeSchedulerProfile) DeepCopyInto(out *KubeSchedulerProfile) {
*out = *in
if in.Plugins != nil {
in, out := &in.Plugins, &out.Plugins
*out = new(Plugins)
(*in).DeepCopyInto(*out)
}
if in.PluginConfig != nil {
in, out := &in.PluginConfig, &out.PluginConfig
*out = make([]PluginConfig, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerProfile.
func (in *KubeSchedulerProfile) DeepCopy() *KubeSchedulerProfile {
if in == nil {
return nil
}
out := new(KubeSchedulerProfile)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelPreference) DeepCopyInto(out *LabelPreference) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelPreference.
func (in *LabelPreference) DeepCopy() *LabelPreference {
if in == nil {
return nil
}
out := new(LabelPreference)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LabelsPresence) DeepCopyInto(out *LabelsPresence) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelsPresence.
func (in *LabelsPresence) DeepCopy() *LabelsPresence {
if in == nil {
return nil
}
out := new(LabelsPresence)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Plugin) DeepCopyInto(out *Plugin) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Plugin.
func (in *Plugin) DeepCopy() *Plugin {
if in == nil {
return nil
}
out := new(Plugin)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PluginConfig) DeepCopyInto(out *PluginConfig) {
*out = *in
in.Args.DeepCopyInto(&out.Args)
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PluginConfig.
func (in *PluginConfig) DeepCopy() *PluginConfig {
if in == nil {
return nil
}
out := new(PluginConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PluginSet) DeepCopyInto(out *PluginSet) {
*out = *in
if in.Enabled != nil {
in, out := &in.Enabled, &out.Enabled
*out = make([]Plugin, len(*in))
copy(*out, *in)
}
if in.Disabled != nil {
in, out := &in.Disabled, &out.Disabled
*out = make([]Plugin, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PluginSet.
func (in *PluginSet) DeepCopy() *PluginSet {
if in == nil {
return nil
}
out := new(PluginSet)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Plugins) DeepCopyInto(out *Plugins) {
*out = *in
if in.QueueSort != nil {
in, out := &in.QueueSort, &out.QueueSort
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.PreFilter != nil {
in, out := &in.PreFilter, &out.PreFilter
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Filter != nil {
in, out := &in.Filter, &out.Filter
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.PreScore != nil {
in, out := &in.PreScore, &out.PreScore
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Score != nil {
in, out := &in.Score, &out.Score
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Reserve != nil {
in, out := &in.Reserve, &out.Reserve
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Permit != nil {
in, out := &in.Permit, &out.Permit
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.PreBind != nil {
in, out := &in.PreBind, &out.PreBind
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Bind != nil {
in, out := &in.Bind, &out.Bind
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.PostBind != nil {
in, out := &in.PostBind, &out.PostBind
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
if in.Unreserve != nil {
in, out := &in.Unreserve, &out.Unreserve
*out = new(PluginSet)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Plugins.
func (in *Plugins) DeepCopy() *Plugins {
if in == nil {
return nil
}
out := new(Plugins)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Policy) DeepCopyInto(out *Policy) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Predicates != nil {
in, out := &in.Predicates, &out.Predicates
*out = make([]PredicatePolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Priorities != nil {
in, out := &in.Priorities, &out.Priorities
*out = make([]PriorityPolicy, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Extenders != nil {
in, out := &in.Extenders, &out.Extenders
*out = make([]Extender, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Policy.
func (in *Policy) DeepCopy() *Policy {
if in == nil {
return nil
}
out := new(Policy)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *Policy) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicateArgument) DeepCopyInto(out *PredicateArgument) {
*out = *in
if in.ServiceAffinity != nil {
in, out := &in.ServiceAffinity, &out.ServiceAffinity
*out = new(ServiceAffinity)
(*in).DeepCopyInto(*out)
}
if in.LabelsPresence != nil {
in, out := &in.LabelsPresence, &out.LabelsPresence
*out = new(LabelsPresence)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicateArgument.
func (in *PredicateArgument) DeepCopy() *PredicateArgument {
if in == nil {
return nil
}
out := new(PredicateArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PredicatePolicy) DeepCopyInto(out *PredicatePolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
*out = new(PredicateArgument)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PredicatePolicy.
func (in *PredicatePolicy) DeepCopy() *PredicatePolicy {
if in == nil {
return nil
}
out := new(PredicatePolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityArgument) DeepCopyInto(out *PriorityArgument) {
*out = *in
if in.ServiceAntiAffinity != nil {
in, out := &in.ServiceAntiAffinity, &out.ServiceAntiAffinity
*out = new(ServiceAntiAffinity)
**out = **in
}
if in.LabelPreference != nil {
in, out := &in.LabelPreference, &out.LabelPreference
*out = new(LabelPreference)
**out = **in
}
if in.RequestedToCapacityRatioArguments != nil {
in, out := &in.RequestedToCapacityRatioArguments, &out.RequestedToCapacityRatioArguments
*out = new(RequestedToCapacityRatioArguments)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityArgument.
func (in *PriorityArgument) DeepCopy() *PriorityArgument {
if in == nil {
return nil
}
out := new(PriorityArgument)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityPolicy) DeepCopyInto(out *PriorityPolicy) {
*out = *in
if in.Argument != nil {
in, out := &in.Argument, &out.Argument
*out = new(PriorityArgument)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityPolicy.
func (in *PriorityPolicy) DeepCopy() *PriorityPolicy {
if in == nil {
return nil
}
out := new(PriorityPolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RequestedToCapacityRatioArguments) DeepCopyInto(out *RequestedToCapacityRatioArguments) {
*out = *in
if in.Shape != nil {
in, out := &in.Shape, &out.Shape
*out = make([]UtilizationShapePoint, len(*in))
copy(*out, *in)
}
if in.Resources != nil {
in, out := &in.Resources, &out.Resources
*out = make([]ResourceSpec, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RequestedToCapacityRatioArguments.
func (in *RequestedToCapacityRatioArguments) DeepCopy() *RequestedToCapacityRatioArguments {
if in == nil {
return nil
}
out := new(RequestedToCapacityRatioArguments)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ResourceSpec) DeepCopyInto(out *ResourceSpec) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceSpec.
func (in *ResourceSpec) DeepCopy() *ResourceSpec {
if in == nil {
return nil
}
out := new(ResourceSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerAlgorithmSource) DeepCopyInto(out *SchedulerAlgorithmSource) {
*out = *in
if in.Policy != nil {
in, out := &in.Policy, &out.Policy
*out = new(SchedulerPolicySource)
(*in).DeepCopyInto(*out)
}
if in.Provider != nil {
in, out := &in.Provider, &out.Provider
*out = new(string)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerAlgorithmSource.
func (in *SchedulerAlgorithmSource) DeepCopy() *SchedulerAlgorithmSource {
if in == nil {
return nil
}
out := new(SchedulerAlgorithmSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerPolicyConfigMapSource) DeepCopyInto(out *SchedulerPolicyConfigMapSource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerPolicyConfigMapSource.
func (in *SchedulerPolicyConfigMapSource) DeepCopy() *SchedulerPolicyConfigMapSource {
if in == nil {
return nil
}
out := new(SchedulerPolicyConfigMapSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerPolicyFileSource) DeepCopyInto(out *SchedulerPolicyFileSource) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerPolicyFileSource.
func (in *SchedulerPolicyFileSource) DeepCopy() *SchedulerPolicyFileSource {
if in == nil {
return nil
}
out := new(SchedulerPolicyFileSource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SchedulerPolicySource) DeepCopyInto(out *SchedulerPolicySource) {
*out = *in
if in.File != nil {
in, out := &in.File, &out.File
*out = new(SchedulerPolicyFileSource)
**out = **in
}
if in.ConfigMap != nil {
in, out := &in.ConfigMap, &out.ConfigMap
*out = new(SchedulerPolicyConfigMapSource)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerPolicySource.
func (in *SchedulerPolicySource) DeepCopy() *SchedulerPolicySource {
if in == nil {
return nil
}
out := new(SchedulerPolicySource)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAffinity) DeepCopyInto(out *ServiceAffinity) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAffinity.
func (in *ServiceAffinity) DeepCopy() *ServiceAffinity {
if in == nil {
return nil
}
out := new(ServiceAffinity)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAntiAffinity) DeepCopyInto(out *ServiceAntiAffinity) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceAntiAffinity.
func (in *ServiceAntiAffinity) DeepCopy() *ServiceAntiAffinity {
if in == nil {
return nil
}
out := new(ServiceAntiAffinity)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *UtilizationShapePoint) DeepCopyInto(out *UtilizationShapePoint) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UtilizationShapePoint.
func (in *UtilizationShapePoint) DeepCopy() *UtilizationShapePoint {
if in == nil {
return nil
}
out := new(UtilizationShapePoint)
in.DeepCopyInto(out)
return out
}

View File

@ -1,30 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"types.go",
"zz_generated.deepcopy.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/apis/extender/v1",
visibility = ["//visibility:public"],
deps = [
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -1,126 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
)
const (
// MinExtenderPriority defines the min priority value for extender.
MinExtenderPriority int64 = 0
// MaxExtenderPriority defines the max priority value for extender.
MaxExtenderPriority int64 = 10
)
// ExtenderPreemptionResult represents the result returned by preemption phase of extender.
type ExtenderPreemptionResult struct {
NodeNameToMetaVictims map[string]*MetaVictims
}
// ExtenderPreemptionArgs represents the arguments needed by the extender to preempt pods on nodes.
type ExtenderPreemptionArgs struct {
// Pod being scheduled
Pod *v1.Pod
// Victims map generated by scheduler preemption phase
// Only set NodeNameToMetaVictims if ExtenderConfig.NodeCacheCapable == true. Otherwise, only set NodeNameToVictims.
NodeNameToVictims map[string]*Victims
NodeNameToMetaVictims map[string]*MetaVictims
}
// Victims represents:
// pods: a group of pods expected to be preempted.
// numPDBViolations: the count of violations of PodDisruptionBudget
type Victims struct {
Pods []*v1.Pod
NumPDBViolations int64
}
// MetaPod represent identifier for a v1.Pod
type MetaPod struct {
UID string
}
// MetaVictims represents:
// pods: a group of pods expected to be preempted.
// Only Pod identifiers will be sent and user are expect to get v1.Pod in their own way.
// numPDBViolations: the count of violations of PodDisruptionBudget
type MetaVictims struct {
Pods []*MetaPod
NumPDBViolations int64
}
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
// nodes for a pod.
type ExtenderArgs struct {
// Pod being scheduled
Pod *v1.Pod
// List of candidate nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
// List of candidate node names where the pod can be scheduled; to be
// populated only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string
}
// FailedNodesMap represents the filtered out nodes, with node names and failure messages
type FailedNodesMap map[string]string
// ExtenderFilterResult represents the results of a filter call to an extender
type ExtenderFilterResult struct {
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
// Filtered set of nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == true
NodeNames *[]string
// Filtered out nodes where the pod can't be scheduled and the failure messages
FailedNodes FailedNodesMap
// Error message indicating failure
Error string
}
// ExtenderBindingArgs represents the arguments to an extender for binding a pod to a node.
type ExtenderBindingArgs struct {
// PodName is the name of the pod being bound
PodName string
// PodNamespace is the namespace of the pod being bound
PodNamespace string
// PodUID is the UID of the pod being bound
PodUID types.UID
// Node selected by the scheduler
Node string
}
// ExtenderBindingResult represents the result of binding of a pod to a node from an extender.
type ExtenderBindingResult struct {
// Error message indicating failure
Error string
}
// HostPriority represents the priority of scheduling to a particular host, higher priority is better.
type HostPriority struct {
// Name of the host
Host string
// Score associated with the host
Score int64
}
// HostPriorityList declares a []HostPriority type.
type HostPriorityList []HostPriority

View File

@ -1,339 +0,0 @@
// +build !ignore_autogenerated
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by deepcopy-gen. DO NOT EDIT.
package v1
import (
corev1 "k8s.io/api/core/v1"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderArgs) DeepCopyInto(out *ExtenderArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
*out = new(corev1.Pod)
(*in).DeepCopyInto(*out)
}
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
*out = new(corev1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderArgs.
func (in *ExtenderArgs) DeepCopy() *ExtenderArgs {
if in == nil {
return nil
}
out := new(ExtenderArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingArgs) DeepCopyInto(out *ExtenderBindingArgs) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingArgs.
func (in *ExtenderBindingArgs) DeepCopy() *ExtenderBindingArgs {
if in == nil {
return nil
}
out := new(ExtenderBindingArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderBindingResult) DeepCopyInto(out *ExtenderBindingResult) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderBindingResult.
func (in *ExtenderBindingResult) DeepCopy() *ExtenderBindingResult {
if in == nil {
return nil
}
out := new(ExtenderBindingResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderFilterResult) DeepCopyInto(out *ExtenderFilterResult) {
*out = *in
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
*out = new(corev1.NodeList)
(*in).DeepCopyInto(*out)
}
if in.NodeNames != nil {
in, out := &in.NodeNames, &out.NodeNames
*out = new([]string)
if **in != nil {
in, out := *in, *out
*out = make([]string, len(*in))
copy(*out, *in)
}
}
if in.FailedNodes != nil {
in, out := &in.FailedNodes, &out.FailedNodes
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderFilterResult.
func (in *ExtenderFilterResult) DeepCopy() *ExtenderFilterResult {
if in == nil {
return nil
}
out := new(ExtenderFilterResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionArgs) DeepCopyInto(out *ExtenderPreemptionArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
*out = new(corev1.Pod)
(*in).DeepCopyInto(*out)
}
if in.NodeNameToVictims != nil {
in, out := &in.NodeNameToVictims, &out.NodeNameToVictims
*out = make(map[string]*Victims, len(*in))
for key, val := range *in {
var outVal *Victims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(Victims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionArgs.
func (in *ExtenderPreemptionArgs) DeepCopy() *ExtenderPreemptionArgs {
if in == nil {
return nil
}
out := new(ExtenderPreemptionArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionResult) DeepCopyInto(out *ExtenderPreemptionResult) {
*out = *in
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
var outVal *MetaVictims
if val == nil {
(*out)[key] = nil
} else {
in, out := &val, &outVal
*out = new(MetaVictims)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionResult.
func (in *ExtenderPreemptionResult) DeepCopy() *ExtenderPreemptionResult {
if in == nil {
return nil
}
out := new(ExtenderPreemptionResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in FailedNodesMap) DeepCopyInto(out *FailedNodesMap) {
{
in := &in
*out = make(FailedNodesMap, len(*in))
for key, val := range *in {
(*out)[key] = val
}
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedNodesMap.
func (in FailedNodesMap) DeepCopy() FailedNodesMap {
if in == nil {
return nil
}
out := new(FailedNodesMap)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HostPriority) DeepCopyInto(out *HostPriority) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriority.
func (in *HostPriority) DeepCopy() *HostPriority {
if in == nil {
return nil
}
out := new(HostPriority)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in HostPriorityList) DeepCopyInto(out *HostPriorityList) {
{
in := &in
*out = make(HostPriorityList, len(*in))
copy(*out, *in)
return
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPriorityList.
func (in HostPriorityList) DeepCopy() HostPriorityList {
if in == nil {
return nil
}
out := new(HostPriorityList)
in.DeepCopyInto(out)
return *out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaPod) DeepCopyInto(out *MetaPod) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaPod.
func (in *MetaPod) DeepCopy() *MetaPod {
if in == nil {
return nil
}
out := new(MetaPod)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaVictims) DeepCopyInto(out *MetaVictims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*MetaPod, len(*in))
for i := range *in {
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(MetaPod)
**out = **in
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaVictims.
func (in *MetaVictims) DeepCopy() *MetaVictims {
if in == nil {
return nil
}
out := new(MetaVictims)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Victims) DeepCopyInto(out *Victims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*corev1.Pod, len(*in))
for i := range *in {
if (*in)[i] != nil {
in, out := &(*in)[i], &(*out)[i]
*out = new(corev1.Pod)
(*in).DeepCopyInto(*out)
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Victims.
func (in *Victims) DeepCopy() *Victims {
if in == nil {
return nil
}
out := new(Victims)
in.DeepCopyInto(out)
return out
}

View File

@ -0,0 +1,54 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"node_affinity.go",
"normalize_score.go",
"spread.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/client-go/listers/apps/v1:go_default_library",
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"node_affinity_test.go",
"normalize_score_test.go",
"spread_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,78 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package helper
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
)
// PodMatchesNodeSelectorAndAffinityTerms checks whether the pod is schedulable onto nodes according to
// the requirements in both NodeAffinity and nodeSelector.
func PodMatchesNodeSelectorAndAffinityTerms(pod *v1.Pod, node *v1.Node) bool {
// Check if node.Labels match pod.Spec.NodeSelector.
if len(pod.Spec.NodeSelector) > 0 {
selector := labels.SelectorFromSet(pod.Spec.NodeSelector)
if !selector.Matches(labels.Set(node.Labels)) {
return false
}
}
// 1. nil NodeSelector matches all nodes (i.e. does not filter out any nodes)
// 2. nil []NodeSelectorTerm (equivalent to non-nil empty NodeSelector) matches no nodes
// 3. zero-length non-nil []NodeSelectorTerm matches no nodes also, just for simplicity
// 4. nil []NodeSelectorRequirement (equivalent to non-nil empty NodeSelectorTerm) matches no nodes
// 5. zero-length non-nil []NodeSelectorRequirement matches no nodes also, just for simplicity
// 6. non-nil empty NodeSelectorRequirement is not allowed
nodeAffinityMatches := true
affinity := pod.Spec.Affinity
if affinity != nil && affinity.NodeAffinity != nil {
nodeAffinity := affinity.NodeAffinity
// if no required NodeAffinity requirements, will do no-op, means select all nodes.
// TODO: Replace next line with subsequent commented-out line when implement RequiredDuringSchedulingRequiredDuringExecution.
if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
// if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution == nil && nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
return true
}
// Match node selector for requiredDuringSchedulingRequiredDuringExecution.
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
// if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution != nil {
// nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution.NodeSelectorTerms
// klog.V(10).Infof("Match for RequiredDuringSchedulingRequiredDuringExecution node selector terms %+v", nodeSelectorTerms)
// nodeAffinityMatches = nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms)
// }
// Match node selector for requiredDuringSchedulingIgnoredDuringExecution.
if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
nodeAffinityMatches = nodeAffinityMatches && nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms)
}
}
return nodeAffinityMatches
}
// nodeMatchesNodeSelectorTerms checks if a node's labels satisfy a list of node selector terms,
// terms are ORed, and an empty list of terms will match nothing.
func nodeMatchesNodeSelectorTerms(node *v1.Node, nodeSelectorTerms []v1.NodeSelectorTerm) bool {
return v1helper.MatchNodeSelectorTerms(nodeSelectorTerms, node.Labels, fields.Set{
"metadata.name": node.Name,
})
}

View File

@ -0,0 +1,54 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package helper
import (
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
// DefaultNormalizeScore generates a Normalize Score function that can normalize the
// scores to [0, maxPriority]. If reverse is set to true, it reverses the scores by
// subtracting it from maxPriority.
func DefaultNormalizeScore(maxPriority int64, reverse bool, scores framework.NodeScoreList) *framework.Status {
var maxCount int64
for i := range scores {
if scores[i].Score > maxCount {
maxCount = scores[i].Score
}
}
if maxCount == 0 {
if reverse {
for i := range scores {
scores[i].Score = maxPriority
}
}
return nil
}
for i := range scores {
score := scores[i].Score
score = maxPriority * score / maxCount
if reverse {
score = maxPriority - score
}
scores[i].Score = score
}
return nil
}

View File

@ -0,0 +1,95 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package helper
import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
appslisters "k8s.io/client-go/listers/apps/v1"
corelisters "k8s.io/client-go/listers/core/v1"
)
// DefaultSelector returns a selector deduced from the Services, Replication
// Controllers, Replica Sets, and Stateful Sets matching the given pod.
func DefaultSelector(pod *v1.Pod, sl corelisters.ServiceLister, cl corelisters.ReplicationControllerLister, rsl appslisters.ReplicaSetLister, ssl appslisters.StatefulSetLister) labels.Selector {
labelSet := make(labels.Set)
// Since services, RCs, RSs and SSs match the pod, they won't have conflicting
// labels. Merging is safe.
if services, err := GetPodServices(sl, pod); err == nil {
for _, service := range services {
labelSet = labels.Merge(labelSet, service.Spec.Selector)
}
}
if rcs, err := cl.GetPodControllers(pod); err == nil {
for _, rc := range rcs {
labelSet = labels.Merge(labelSet, rc.Spec.Selector)
}
}
selector := labels.NewSelector()
if len(labelSet) != 0 {
selector = labelSet.AsSelector()
}
if rss, err := rsl.GetPodReplicaSets(pod); err == nil {
for _, rs := range rss {
if other, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
if r, ok := other.Requirements(); ok {
selector = selector.Add(r...)
}
}
}
}
if sss, err := ssl.GetPodStatefulSets(pod); err == nil {
for _, ss := range sss {
if other, err := metav1.LabelSelectorAsSelector(ss.Spec.Selector); err == nil {
if r, ok := other.Requirements(); ok {
selector = selector.Add(r...)
}
}
}
}
return selector
}
// GetPodServices gets the services that have the selector that match the labels on the given pod.
func GetPodServices(sl corelisters.ServiceLister, pod *v1.Pod) ([]*v1.Service, error) {
allServices, err := sl.Services(pod.Namespace).List(labels.Everything())
if err != nil {
return nil, err
}
var services []*v1.Service
for i := range allServices {
service := allServices[i]
if service.Spec.Selector == nil {
// services with nil selectors match nothing, not everything.
continue
}
selector := labels.Set(service.Spec.Selector).AsSelectorPreValidated()
if selector.Matches(labels.Set(pod.Labels)) {
services = append(services, service)
}
}
return services, nil
}

View File

@ -0,0 +1,45 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["node_affinity.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/scheduler/framework/plugins/helper:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = ["node_affinity_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/internal/cache:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],
)

View File

@ -0,0 +1,119 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeaffinity
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
pluginhelper "k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
"k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// NodeAffinity is a plugin that checks if a pod node selector matches the node label.
type NodeAffinity struct {
handle framework.FrameworkHandle
}
var _ framework.FilterPlugin = &NodeAffinity{}
var _ framework.ScorePlugin = &NodeAffinity{}
const (
// Name is the name of the plugin used in the plugin registry and configurations.
Name = "NodeAffinity"
// ErrReason for node affinity/selector not matching.
ErrReason = "node(s) didn't match node selector"
)
// Name returns name of the plugin. It is used in logs, etc.
func (pl *NodeAffinity) Name() string {
return Name
}
// Filter invoked at the filter extension point.
func (pl *NodeAffinity) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
node := nodeInfo.Node()
if node == nil {
return framework.NewStatus(framework.Error, "node not found")
}
if !pluginhelper.PodMatchesNodeSelectorAndAffinityTerms(pod, node) {
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason)
}
return nil
}
// Score invoked at the Score extension point.
func (pl *NodeAffinity) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := pl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
node := nodeInfo.Node()
if node == nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
affinity := pod.Spec.Affinity
var count int64
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
// empty PreferredSchedulingTerm matches all objects.
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
// Match PreferredDuringSchedulingIgnoredDuringExecution term by term.
for i := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
preferredSchedulingTerm := &affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution[i]
if preferredSchedulingTerm.Weight == 0 {
continue
}
// TODO: Avoid computing it for all nodes if this becomes a performance problem.
nodeSelector, err := v1helper.NodeSelectorRequirementsAsSelector(preferredSchedulingTerm.Preference.MatchExpressions)
if err != nil {
return 0, framework.NewStatus(framework.Error, err.Error())
}
if nodeSelector.Matches(labels.Set(node.Labels)) {
count += int64(preferredSchedulingTerm.Weight)
}
}
}
return count, nil
}
// NormalizeScore invoked after scoring all nodes.
func (pl *NodeAffinity) NormalizeScore(ctx context.Context, state *framework.CycleState, pod *v1.Pod, scores framework.NodeScoreList) *framework.Status {
return pluginhelper.DefaultNormalizeScore(framework.MaxNodeScore, false, scores)
}
// ScoreExtensions of the Score plugin.
func (pl *NodeAffinity) ScoreExtensions() framework.ScoreExtensions {
return pl
}
// New initializes a new plugin and returns it.
func New(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
return &NodeAffinity{handle: h}, nil
}

View File

@ -0,0 +1,40 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["node_name.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodename",
visibility = ["//visibility:public"],
deps = [
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = ["node_name_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
],
)

View File

@ -0,0 +1,65 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodename
import (
"context"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
"k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// NodeName is a plugin that checks if a pod spec node name matches the current node.
type NodeName struct{}
var _ framework.FilterPlugin = &NodeName{}
const (
// Name is the name of the plugin used in the plugin registry and configurations.
Name = "NodeName"
// ErrReason returned when node name doesn't match.
ErrReason = "node(s) didn't match the requested hostname"
)
// Name returns name of the plugin. It is used in logs, etc.
func (pl *NodeName) Name() string {
return Name
}
// Filter invoked at the filter extension point.
func (pl *NodeName) Filter(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
if nodeInfo.Node() == nil {
return framework.NewStatus(framework.Error, "node not found")
}
if !Fits(pod, nodeInfo) {
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason)
}
return nil
}
// Fits actually checks if the pod fits the node.
func Fits(pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) bool {
return len(pod.Spec.NodeName) == 0 || pod.Spec.NodeName == nodeInfo.Node().Name
}
// New initializes a new plugin and returns it.
func New(_ *runtime.Unknown, _ framework.FrameworkHandle) (framework.Plugin, error) {
return &NodeName{}, nil
}

View File

@ -0,0 +1,40 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["node_ports.go"],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeports",
visibility = ["//visibility:public"],
deps = [
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = ["node_ports_test.go"],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
],
)

View File

@ -0,0 +1,134 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeports
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
"k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// NodePorts is a plugin that checks if a node has free ports for the requested pod ports.
type NodePorts struct{}
var _ framework.FilterPlugin = &NodePorts{}
const (
// Name is the name of the plugin used in the plugin registry and configurations.
Name = "NodePorts"
// preFilterStateKey is the key in CycleState to NodePorts pre-computed data.
// Using the name of the plugin will likely help us avoid collisions with other plugins.
preFilterStateKey = "PreFilter" + Name
// ErrReason when node ports aren't available.
ErrReason = "node(s) didn't have free ports for the requested pod ports"
)
type preFilterState []*v1.ContainerPort
// Clone the prefilter state.
func (s preFilterState) Clone() framework.StateData {
// The state is not impacted by adding/removing existing pods, hence we don't need to make a deep copy.
return s
}
// Name returns name of the plugin. It is used in logs, etc.
func (pl *NodePorts) Name() string {
return Name
}
// getContainerPorts returns the used host ports of Pods: if 'port' was used, a 'port:true' pair
// will be in the result; but it does not resolve port conflict.
func getContainerPorts(pods ...*v1.Pod) []*v1.ContainerPort {
ports := []*v1.ContainerPort{}
for _, pod := range pods {
for j := range pod.Spec.Containers {
container := &pod.Spec.Containers[j]
for k := range container.Ports {
ports = append(ports, &container.Ports[k])
}
}
}
return ports
}
// PreFilter invoked at the prefilter extension point.
func (pl *NodePorts) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) *framework.Status {
s := getContainerPorts(pod)
cycleState.Write(preFilterStateKey, preFilterState(s))
return nil
}
// PreFilterExtensions do not exist for this plugin.
func (pl *NodePorts) PreFilterExtensions() framework.PreFilterExtensions {
return nil
}
func getPreFilterState(cycleState *framework.CycleState) (preFilterState, error) {
c, err := cycleState.Read(preFilterStateKey)
if err != nil {
// preFilterState doesn't exist, likely PreFilter wasn't invoked.
return nil, fmt.Errorf("error reading %q from cycleState: %v", preFilterStateKey, err)
}
s, ok := c.(preFilterState)
if !ok {
return nil, fmt.Errorf("%+v convert to nodeports.preFilterState error", c)
}
return s, nil
}
// Filter invoked at the filter extension point.
func (pl *NodePorts) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
wantPorts, err := getPreFilterState(cycleState)
if err != nil {
return framework.NewStatus(framework.Error, err.Error())
}
fits := fitsPorts(wantPorts, nodeInfo)
if !fits {
return framework.NewStatus(framework.Unschedulable, ErrReason)
}
return nil
}
// Fits checks if the pod fits the node.
func Fits(pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) bool {
return fitsPorts(getContainerPorts(pod), nodeInfo)
}
func fitsPorts(wantPorts []*v1.ContainerPort, nodeInfo *nodeinfo.NodeInfo) bool {
// try to see whether existingPorts and wantPorts will conflict or not
existingPorts := nodeInfo.UsedPorts()
for _, cp := range wantPorts {
if existingPorts.CheckConflict(cp.HostIP, string(cp.Protocol), cp.HostPort) {
return false
}
}
return true
}
// New initializes a new plugin and returns it.
func New(_ *runtime.Unknown, _ framework.FrameworkHandle) (framework.Plugin, error) {
return &NodePorts{}, nil
}

View File

@ -0,0 +1,73 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"balanced_allocation.go",
"fit.go",
"least_allocated.go",
"most_allocated.go",
"requested_to_capacity_ratio.go",
"resource_allocation.go",
"resource_limits.go",
"test_util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/noderesources",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/apis/config:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = [
"balanced_allocation_test.go",
"fit_test.go",
"least_allocated_test.go",
"most_allocated_test.go",
"requested_to_capacity_ratio_test.go",
"resource_limits_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/internal/cache:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
],
)

View File

@ -0,0 +1,120 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
"math"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
// BalancedAllocation is a score plugin that calculates the difference between the cpu and memory fraction
// of capacity, and prioritizes the host based on how close the two metrics are to each other.
type BalancedAllocation struct {
handle framework.FrameworkHandle
resourceAllocationScorer
}
var _ = framework.ScorePlugin(&BalancedAllocation{})
// BalancedAllocationName is the name of the plugin used in the plugin registry and configurations.
const BalancedAllocationName = "NodeResourcesBalancedAllocation"
// Name returns name of the plugin. It is used in logs, etc.
func (ba *BalancedAllocation) Name() string {
return BalancedAllocationName
}
// Score invoked at the score extension point.
func (ba *BalancedAllocation) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := ba.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
// ba.score favors nodes with balanced resource usage rate.
// It should **NOT** be used alone, and **MUST** be used together
// with NodeResourcesLeastAllocated plugin. It calculates the difference between the cpu and memory fraction
// of capacity, and prioritizes the host based on how close the two metrics are to each other.
// Detail: score = 10 - variance(cpuFraction,memoryFraction,volumeFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced
// Resource Utilization"
return ba.score(pod, nodeInfo)
}
// ScoreExtensions of the Score plugin.
func (ba *BalancedAllocation) ScoreExtensions() framework.ScoreExtensions {
return nil
}
// NewBalancedAllocation initializes a new plugin and returns it.
func NewBalancedAllocation(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
return &BalancedAllocation{
handle: h,
resourceAllocationScorer: resourceAllocationScorer{
BalancedAllocationName,
balancedResourceScorer,
defaultRequestedRatioResources,
},
}, nil
}
// todo: use resource weights in the scorer function
func balancedResourceScorer(requested, allocable resourceToValueMap, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
cpuFraction := fractionOfCapacity(requested[v1.ResourceCPU], allocable[v1.ResourceCPU])
memoryFraction := fractionOfCapacity(requested[v1.ResourceMemory], allocable[v1.ResourceMemory])
// This to find a node which has most balanced CPU, memory and volume usage.
if cpuFraction >= 1 || memoryFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
return 0
}
if includeVolumes && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && allocatableVolumes > 0 {
volumeFraction := float64(requestedVolumes) / float64(allocatableVolumes)
if volumeFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
return 0
}
// Compute variance for all the three fractions.
mean := (cpuFraction + memoryFraction + volumeFraction) / float64(3)
variance := float64((((cpuFraction - mean) * (cpuFraction - mean)) + ((memoryFraction - mean) * (memoryFraction - mean)) + ((volumeFraction - mean) * (volumeFraction - mean))) / float64(3))
// Since the variance is between positive fractions, it will be positive fraction. 1-variance lets the
// score to be higher for node which has least variance and multiplying it with 10 provides the scaling
// factor needed.
return int64((1 - variance) * float64(framework.MaxNodeScore))
}
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
// respectively. Multiplying the absolute value of the difference by 10 scales the value to
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)
return int64((1 - diff) * float64(framework.MaxNodeScore))
}
func fractionOfCapacity(requested, capacity int64) float64 {
if capacity == 0 {
return 1
}
return float64(requested) / float64(capacity)
}

View File

@ -0,0 +1,267 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
var _ framework.PreFilterPlugin = &Fit{}
var _ framework.FilterPlugin = &Fit{}
const (
// FitName is the name of the plugin used in the plugin registry and configurations.
FitName = "NodeResourcesFit"
// preFilterStateKey is the key in CycleState to NodeResourcesFit pre-computed data.
// Using the name of the plugin will likely help us avoid collisions with other plugins.
preFilterStateKey = "PreFilter" + FitName
)
// Fit is a plugin that checks if a node has sufficient resources.
type Fit struct {
ignoredResources sets.String
}
// FitArgs holds the args that are used to configure the plugin.
type FitArgs struct {
// IgnoredResources is the list of resources that NodeResources fit filter
// should ignore.
IgnoredResources []string `json:"ignoredResources,omitempty"`
}
// preFilterState computed at PreFilter and used at Filter.
type preFilterState struct {
schedulernodeinfo.Resource
}
// Clone the prefilter state.
func (s *preFilterState) Clone() framework.StateData {
return s
}
// Name returns name of the plugin. It is used in logs, etc.
func (f *Fit) Name() string {
return FitName
}
// computePodResourceRequest returns a schedulernodeinfo.Resource that covers the largest
// width in each resource dimension. Because init-containers run sequentially, we collect
// the max in each dimension iteratively. In contrast, we sum the resource vectors for
// regular containers since they run simultaneously.
//
// If Pod Overhead is specified and the feature gate is set, the resources defined for Overhead
// are added to the calculated Resource request sum
//
// Example:
//
// Pod:
// InitContainers
// IC1:
// CPU: 2
// Memory: 1G
// IC2:
// CPU: 2
// Memory: 3G
// Containers
// C1:
// CPU: 2
// Memory: 1G
// C2:
// CPU: 1
// Memory: 1G
//
// Result: CPU: 3, Memory: 3G
func computePodResourceRequest(pod *v1.Pod) *preFilterState {
result := &preFilterState{}
for _, container := range pod.Spec.Containers {
result.Add(container.Resources.Requests)
}
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
result.SetMaxResource(container.Resources.Requests)
}
// If Overhead is being utilized, add to the total requests for the pod
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodOverhead) {
result.Add(pod.Spec.Overhead)
}
return result
}
// PreFilter invoked at the prefilter extension point.
func (f *Fit) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) *framework.Status {
cycleState.Write(preFilterStateKey, computePodResourceRequest(pod))
return nil
}
// PreFilterExtensions returns prefilter extensions, pod add and remove.
func (f *Fit) PreFilterExtensions() framework.PreFilterExtensions {
return nil
}
func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error) {
c, err := cycleState.Read(preFilterStateKey)
if err != nil {
// preFilterState doesn't exist, likely PreFilter wasn't invoked.
return nil, fmt.Errorf("error reading %q from cycleState: %v", preFilterStateKey, err)
}
s, ok := c.(*preFilterState)
if !ok {
return nil, fmt.Errorf("%+v convert to NodeResourcesFit.preFilterState error", c)
}
return s, nil
}
// Filter invoked at the filter extension point.
// Checks if a node has sufficient resources, such as cpu, memory, gpu, opaque int resources etc to run a pod.
// It returns a list of insufficient resources, if empty, then the node has all the resources requested by the pod.
func (f *Fit) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *framework.Status {
s, err := getPreFilterState(cycleState)
if err != nil {
return framework.NewStatus(framework.Error, err.Error())
}
insufficientResources := fitsRequest(s, nodeInfo, f.ignoredResources)
if len(insufficientResources) != 0 {
// We will keep all failure reasons.
failureReasons := make([]string, 0, len(insufficientResources))
for _, r := range insufficientResources {
failureReasons = append(failureReasons, r.Reason)
}
return framework.NewStatus(framework.Unschedulable, failureReasons...)
}
return nil
}
// InsufficientResource describes what kind of resource limit is hit and caused the pod to not fit the node.
type InsufficientResource struct {
ResourceName v1.ResourceName
// We explicitly have a parameter for reason to avoid formatting a message on the fly
// for common resources, which is expensive for cluster autoscaler simulations.
Reason string
Requested int64
Used int64
Capacity int64
}
// Fits checks if node have enough resources to host the pod.
func Fits(pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo, ignoredExtendedResources sets.String) []InsufficientResource {
return fitsRequest(computePodResourceRequest(pod), nodeInfo, ignoredExtendedResources)
}
func fitsRequest(podRequest *preFilterState, nodeInfo *schedulernodeinfo.NodeInfo, ignoredExtendedResources sets.String) []InsufficientResource {
insufficientResources := make([]InsufficientResource, 0, 4)
allowedPodNumber := nodeInfo.AllowedPodNumber()
if len(nodeInfo.Pods())+1 > allowedPodNumber {
insufficientResources = append(insufficientResources, InsufficientResource{
v1.ResourcePods,
"Too many pods",
1,
int64(len(nodeInfo.Pods())),
int64(allowedPodNumber),
})
}
if ignoredExtendedResources == nil {
ignoredExtendedResources = sets.NewString()
}
if podRequest.MilliCPU == 0 &&
podRequest.Memory == 0 &&
podRequest.EphemeralStorage == 0 &&
len(podRequest.ScalarResources) == 0 {
return insufficientResources
}
allocatable := nodeInfo.AllocatableResource()
if allocatable.MilliCPU < podRequest.MilliCPU+nodeInfo.RequestedResource().MilliCPU {
insufficientResources = append(insufficientResources, InsufficientResource{
v1.ResourceCPU,
"Insufficient cpu",
podRequest.MilliCPU,
nodeInfo.RequestedResource().MilliCPU,
allocatable.MilliCPU,
})
}
if allocatable.Memory < podRequest.Memory+nodeInfo.RequestedResource().Memory {
insufficientResources = append(insufficientResources, InsufficientResource{
v1.ResourceMemory,
"Insufficient memory",
podRequest.Memory,
nodeInfo.RequestedResource().Memory,
allocatable.Memory,
})
}
if allocatable.EphemeralStorage < podRequest.EphemeralStorage+nodeInfo.RequestedResource().EphemeralStorage {
insufficientResources = append(insufficientResources, InsufficientResource{
v1.ResourceEphemeralStorage,
"Insufficient ephemeral-storage",
podRequest.EphemeralStorage,
nodeInfo.RequestedResource().EphemeralStorage,
allocatable.EphemeralStorage,
})
}
for rName, rQuant := range podRequest.ScalarResources {
if v1helper.IsExtendedResourceName(rName) {
// If this resource is one of the extended resources that should be
// ignored, we will skip checking it.
if ignoredExtendedResources.Has(string(rName)) {
continue
}
}
if allocatable.ScalarResources[rName] < rQuant+nodeInfo.RequestedResource().ScalarResources[rName] {
insufficientResources = append(insufficientResources, InsufficientResource{
rName,
fmt.Sprintf("Insufficient %v", rName),
podRequest.ScalarResources[rName],
nodeInfo.RequestedResource().ScalarResources[rName],
allocatable.ScalarResources[rName],
})
}
}
return insufficientResources
}
// NewFit initializes a new plugin and returns it.
func NewFit(plArgs *runtime.Unknown, _ framework.FrameworkHandle) (framework.Plugin, error) {
args := &FitArgs{}
if err := framework.DecodeInto(plArgs, args); err != nil {
return nil, err
}
fit := &Fit{}
fit.ignoredResources = sets.NewString(args.IgnoredResources...)
return fit, nil
}

View File

@ -0,0 +1,99 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
// LeastAllocated is a score plugin that favors nodes with fewer allocation requested resources based on requested resources.
type LeastAllocated struct {
handle framework.FrameworkHandle
resourceAllocationScorer
}
var _ = framework.ScorePlugin(&LeastAllocated{})
// LeastAllocatedName is the name of the plugin used in the plugin registry and configurations.
const LeastAllocatedName = "NodeResourcesLeastAllocated"
// Name returns name of the plugin. It is used in logs, etc.
func (la *LeastAllocated) Name() string {
return LeastAllocatedName
}
// Score invoked at the score extension point.
func (la *LeastAllocated) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := la.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
// la.score favors nodes with fewer requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and
// prioritizes based on the minimum of the average of the fraction of requested to capacity.
//
// Details:
// (cpu((capacity-sum(requested))*10/capacity) + memory((capacity-sum(requested))*10/capacity))/2
return la.score(pod, nodeInfo)
}
// ScoreExtensions of the Score plugin.
func (la *LeastAllocated) ScoreExtensions() framework.ScoreExtensions {
return nil
}
// NewLeastAllocated initializes a new plugin and returns it.
func NewLeastAllocated(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
return &LeastAllocated{
handle: h,
resourceAllocationScorer: resourceAllocationScorer{
LeastAllocatedName,
leastResourceScorer,
defaultRequestedRatioResources,
},
}, nil
}
func leastResourceScorer(requested, allocable resourceToValueMap, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
var nodeScore, weightSum int64
for resource, weight := range defaultRequestedRatioResources {
resourceScore := leastRequestedScore(requested[resource], allocable[resource])
nodeScore += resourceScore * weight
weightSum += weight
}
return nodeScore / weightSum
}
// The unused capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more unused resources the higher the score is.
func leastRequestedScore(requested, capacity int64) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
return 0
}
return ((capacity - requested) * int64(framework.MaxNodeScore)) / capacity
}

View File

@ -0,0 +1,102 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
// MostAllocated is a score plugin that favors nodes with high allocation based on requested resources.
type MostAllocated struct {
handle framework.FrameworkHandle
resourceAllocationScorer
}
var _ = framework.ScorePlugin(&MostAllocated{})
// MostAllocatedName is the name of the plugin used in the plugin registry and configurations.
const MostAllocatedName = "NodeResourcesMostAllocated"
// Name returns name of the plugin. It is used in logs, etc.
func (ma *MostAllocated) Name() string {
return MostAllocatedName
}
// Score invoked at the Score extension point.
func (ma *MostAllocated) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := ma.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil || nodeInfo.Node() == nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v, node is nil: %v", nodeName, err, nodeInfo.Node() == nil))
}
// ma.score favors nodes with most requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
// based on the maximum of the average of the fraction of requested to capacity.
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
return ma.score(pod, nodeInfo)
}
// ScoreExtensions of the Score plugin.
func (ma *MostAllocated) ScoreExtensions() framework.ScoreExtensions {
return nil
}
// NewMostAllocated initializes a new plugin and returns it.
func NewMostAllocated(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
return &MostAllocated{
handle: h,
resourceAllocationScorer: resourceAllocationScorer{
MostAllocatedName,
mostResourceScorer,
defaultRequestedRatioResources,
},
}, nil
}
func mostResourceScorer(requested, allocable resourceToValueMap, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
var nodeScore, weightSum int64
for resource, weight := range defaultRequestedRatioResources {
resourceScore := mostRequestedScore(requested[resource], allocable[resource])
nodeScore += resourceScore * weight
weightSum += weight
}
return (nodeScore / weightSum)
}
// The used capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more resources are used the higher the score is. This function
// is almost a reversed version of least_requested_priority.calculateUnusedScore
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
// keep the final formula clean and not to modify the widely used (by users
// in their default scheduling policies) calculateUsedScore.
func mostRequestedScore(requested, capacity int64) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
return 0
}
return (requested * framework.MaxNodeScore) / capacity
}

View File

@ -0,0 +1,219 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
"math"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
)
const (
// RequestedToCapacityRatioName is the name of this plugin.
RequestedToCapacityRatioName = "RequestedToCapacityRatio"
minUtilization = 0
maxUtilization = 100
minScore = 0
maxScore = framework.MaxNodeScore
)
// RequestedToCapacityRatioArgs holds the args that are used to configure the plugin.
type RequestedToCapacityRatioArgs struct {
config.RequestedToCapacityRatioArguments
}
type functionShape []functionShapePoint
type functionShapePoint struct {
// Utilization is function argument.
utilization int64
// Score is function value.
score int64
}
// NewRequestedToCapacityRatio initializes a new plugin and returns it.
func NewRequestedToCapacityRatio(plArgs *runtime.Unknown, handle framework.FrameworkHandle) (framework.Plugin, error) {
args := &config.RequestedToCapacityRatioArguments{}
if err := framework.DecodeInto(plArgs, args); err != nil {
return nil, err
}
shape := make([]functionShapePoint, 0, len(args.Shape))
for _, point := range args.Shape {
shape = append(shape, functionShapePoint{
utilization: int64(point.Utilization),
// MaxCustomPriorityScore may diverge from the max score used in the scheduler and defined by MaxNodeScore,
// therefore we need to scale the score returned by requested to capacity ratio to the score range
// used by the scheduler.
score: int64(point.Score) * (framework.MaxNodeScore / config.MaxCustomPriorityScore),
})
}
if err := validateFunctionShape(shape); err != nil {
return nil, err
}
resourceToWeightMap := make(resourceToWeightMap)
for _, resource := range args.Resources {
resourceToWeightMap[v1.ResourceName(resource.Name)] = resource.Weight
if resource.Weight == 0 {
// Apply the default weight.
resourceToWeightMap[v1.ResourceName(resource.Name)] = 1
}
}
if len(args.Resources) == 0 {
// If no resources specified, used the default set.
resourceToWeightMap = defaultRequestedRatioResources
}
return &RequestedToCapacityRatio{
handle: handle,
resourceAllocationScorer: resourceAllocationScorer{
RequestedToCapacityRatioName,
buildRequestedToCapacityRatioScorerFunction(shape, resourceToWeightMap),
resourceToWeightMap,
},
}, nil
}
// RequestedToCapacityRatio is a score plugin that allow users to apply bin packing
// on core resources like CPU, Memory as well as extended resources like accelerators.
type RequestedToCapacityRatio struct {
handle framework.FrameworkHandle
resourceAllocationScorer
}
var _ framework.ScorePlugin = &RequestedToCapacityRatio{}
// Name returns name of the plugin. It is used in logs, etc.
func (pl *RequestedToCapacityRatio) Name() string {
return RequestedToCapacityRatioName
}
// Score invoked at the score extension point.
func (pl *RequestedToCapacityRatio) Score(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := pl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
return pl.score(pod, nodeInfo)
}
// ScoreExtensions of the Score plugin.
func (pl *RequestedToCapacityRatio) ScoreExtensions() framework.ScoreExtensions {
return nil
}
func validateFunctionShape(shape functionShape) error {
if len(shape) == 0 {
return fmt.Errorf("at least one point must be specified")
}
for i := 1; i < len(shape); i++ {
if shape[i-1].utilization >= shape[i].utilization {
return fmt.Errorf("utilization values must be sorted. Utilization[%d]==%d >= Utilization[%d]==%d", i-1, shape[i-1].utilization, i, shape[i].utilization)
}
}
for i, point := range shape {
if point.utilization < minUtilization {
return fmt.Errorf("utilization values must not be less than %d. Utilization[%d]==%d", minUtilization, i, point.utilization)
}
if point.utilization > maxUtilization {
return fmt.Errorf("utilization values must not be greater than %d. Utilization[%d]==%d", maxUtilization, i, point.utilization)
}
if point.score < minScore {
return fmt.Errorf("score values must not be less than %d. Score[%d]==%d", minScore, i, point.score)
}
if int64(point.score) > maxScore {
return fmt.Errorf("score values not be greater than %d. Score[%d]==%d", maxScore, i, point.score)
}
}
return nil
}
func validateResourceWeightMap(resourceToWeightMap resourceToWeightMap) error {
if len(resourceToWeightMap) == 0 {
return fmt.Errorf("resourceToWeightMap cannot be nil")
}
for resource, weight := range resourceToWeightMap {
if weight < 1 {
return fmt.Errorf("resource %s weight %d must not be less than 1", string(resource), weight)
}
}
return nil
}
func buildRequestedToCapacityRatioScorerFunction(scoringFunctionShape functionShape, resourceToWeightMap resourceToWeightMap) func(resourceToValueMap, resourceToValueMap, bool, int, int) int64 {
rawScoringFunction := buildBrokenLinearFunction(scoringFunctionShape)
err := validateResourceWeightMap(resourceToWeightMap)
if err != nil {
klog.Error(err)
}
resourceScoringFunction := func(requested, capacity int64) int64 {
if capacity == 0 || requested > capacity {
return rawScoringFunction(maxUtilization)
}
return rawScoringFunction(maxUtilization - (capacity-requested)*maxUtilization/capacity)
}
return func(requested, allocable resourceToValueMap, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
var nodeScore, weightSum int64
for resource, weight := range resourceToWeightMap {
resourceScore := resourceScoringFunction(requested[resource], allocable[resource])
if resourceScore > 0 {
nodeScore += resourceScore * weight
weightSum += weight
}
}
if weightSum == 0 {
return 0
}
return int64(math.Round(float64(nodeScore) / float64(weightSum)))
}
}
// Creates a function which is built using linear segments. Segments are defined via shape array.
// Shape[i].utilization slice represents points on "utilization" axis where different segments meet.
// Shape[i].score represents function values at meeting points.
//
// function f(p) is defined as:
// shape[0].score for p < f[0].utilization
// shape[i].score for p == shape[i].utilization
// shape[n-1].score for p > shape[n-1].utilization
// and linear between points (p < shape[i].utilization)
func buildBrokenLinearFunction(shape functionShape) func(int64) int64 {
return func(p int64) int64 {
for i := 0; i < len(shape); i++ {
if p <= int64(shape[i].utilization) {
if i == 0 {
return shape[0].score
}
return shape[i-1].score + (shape[i].score-shape[i-1].score)*(p-shape[i-1].utilization)/(shape[i].utilization-shape[i-1].utilization)
}
}
return shape[len(shape)-1].score
}
}

View File

@ -0,0 +1,135 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
// resourceToWeightMap contains resource name and weight.
type resourceToWeightMap map[v1.ResourceName]int64
// defaultRequestedRatioResources is used to set default requestToWeight map for CPU and memory
var defaultRequestedRatioResources = resourceToWeightMap{v1.ResourceMemory: 1, v1.ResourceCPU: 1}
// resourceAllocationScorer contains information to calculate resource allocation score.
type resourceAllocationScorer struct {
Name string
scorer func(requested, allocable resourceToValueMap, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64
resourceToWeightMap resourceToWeightMap
}
// resourceToValueMap contains resource name and score.
type resourceToValueMap map[v1.ResourceName]int64
// score will use `scorer` function to calculate the score.
func (r *resourceAllocationScorer) score(
pod *v1.Pod,
nodeInfo *schedulernodeinfo.NodeInfo) (int64, *framework.Status) {
node := nodeInfo.Node()
if node == nil {
return 0, framework.NewStatus(framework.Error, "node not found")
}
if r.resourceToWeightMap == nil {
return 0, framework.NewStatus(framework.Error, "resources not found")
}
requested := make(resourceToValueMap, len(r.resourceToWeightMap))
allocatable := make(resourceToValueMap, len(r.resourceToWeightMap))
for resource := range r.resourceToWeightMap {
allocatable[resource], requested[resource] = calculateResourceAllocatableRequest(nodeInfo, pod, resource)
}
var score int64
// Check if the pod has volumes and this could be added to scorer function for balanced resource allocation.
if len(pod.Spec.Volumes) >= 0 && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && nodeInfo.TransientInfo != nil {
score = r.scorer(requested, allocatable, true, nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount)
} else {
score = r.scorer(requested, allocatable, false, 0, 0)
}
if klog.V(10) {
if len(pod.Spec.Volumes) >= 0 && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && nodeInfo.TransientInfo != nil {
klog.Infof(
"%v -> %v: %v, map of allocatable resources %v, map of requested resources %v , allocatable volumes %d, requested volumes %d, score %d",
pod.Name, node.Name, r.Name,
allocatable, requested, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount,
nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes,
score,
)
} else {
klog.Infof(
"%v -> %v: %v, map of allocatable resources %v, map of requested resources %v ,score %d,",
pod.Name, node.Name, r.Name,
allocatable, requested, score,
)
}
}
return score, nil
}
// calculateResourceAllocatableRequest returns resources Allocatable and Requested values
func calculateResourceAllocatableRequest(nodeInfo *schedulernodeinfo.NodeInfo, pod *v1.Pod, resource v1.ResourceName) (int64, int64) {
allocatable := nodeInfo.AllocatableResource()
requested := nodeInfo.RequestedResource()
podRequest := calculatePodResourceRequest(pod, resource)
switch resource {
case v1.ResourceCPU:
return allocatable.MilliCPU, (nodeInfo.NonZeroRequest().MilliCPU + podRequest)
case v1.ResourceMemory:
return allocatable.Memory, (nodeInfo.NonZeroRequest().Memory + podRequest)
case v1.ResourceEphemeralStorage:
return allocatable.EphemeralStorage, (requested.EphemeralStorage + podRequest)
default:
if v1helper.IsScalarResourceName(resource) {
return allocatable.ScalarResources[resource], (requested.ScalarResources[resource] + podRequest)
}
}
if klog.V(10) {
klog.Infof("requested resource %v not considered for node score calculation",
resource,
)
}
return 0, 0
}
// calculatePodResourceRequest returns the total non-zero requests. If Overhead is defined for the pod and the
// PodOverhead feature is enabled, the Overhead is added to the result.
func calculatePodResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 {
var podRequest int64
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
value := schedutil.GetNonzeroRequestForResource(resource, &container.Resources.Requests)
podRequest += value
}
// If Overhead is being utilized, add to the total requests for the pod
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(features.PodOverhead) {
if quantity, found := pod.Spec.Overhead[resource]; found {
podRequest += quantity.Value()
}
}
return podRequest
}

View File

@ -0,0 +1,161 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// ResourceLimits is a score plugin that increases score of input node by 1 if the node satisfies
// input pod's resource limits
type ResourceLimits struct {
handle framework.FrameworkHandle
}
var _ = framework.PreScorePlugin(&ResourceLimits{})
var _ = framework.ScorePlugin(&ResourceLimits{})
const (
// ResourceLimitsName is the name of the plugin used in the plugin registry and configurations.
ResourceLimitsName = "NodeResourceLimits"
// preScoreStateKey is the key in CycleState to NodeResourceLimits pre-computed data.
// Using the name of the plugin will likely help us avoid collisions with other plugins.
preScoreStateKey = "PreScore" + ResourceLimitsName
)
// preScoreState computed at PreScore and used at Score.
type preScoreState struct {
podResourceRequest *schedulernodeinfo.Resource
}
// Clone the preScore state.
func (s *preScoreState) Clone() framework.StateData {
return s
}
// Name returns name of the plugin. It is used in logs, etc.
func (rl *ResourceLimits) Name() string {
return ResourceLimitsName
}
// PreScore builds and writes cycle state used by Score and NormalizeScore.
func (rl *ResourceLimits) PreScore(
pCtx context.Context,
cycleState *framework.CycleState,
pod *v1.Pod,
nodes []*v1.Node,
) *framework.Status {
if len(nodes) == 0 {
// No nodes to score.
return nil
}
if rl.handle.SnapshotSharedLister() == nil {
return framework.NewStatus(framework.Error, fmt.Sprintf("empty shared lister"))
}
s := &preScoreState{
podResourceRequest: getResourceLimits(pod),
}
cycleState.Write(preScoreStateKey, s)
return nil
}
func getPodResource(cycleState *framework.CycleState) (*schedulernodeinfo.Resource, error) {
c, err := cycleState.Read(preScoreStateKey)
if err != nil {
return nil, fmt.Errorf("Error reading %q from cycleState: %v", preScoreStateKey, err)
}
s, ok := c.(*preScoreState)
if !ok {
return nil, fmt.Errorf("%+v convert to ResourceLimits.preScoreState error", c)
}
return s.podResourceRequest, nil
}
// Score invoked at the Score extension point.
// The "score" returned in this function is the matching number of pods on the `nodeName`.
// Currently works as follows:
// If a node does not publish its allocatable resources (cpu and memory both), the node score is not affected.
// If a pod does not specify its cpu and memory limits both, the node score is not affected.
// If one or both of cpu and memory limits of the pod are satisfied, the node is assigned a score of 1.
// Rationale of choosing the lowest score of 1 is that this is mainly selected to break ties between nodes that have
// same scores assigned by one of least and most requested priority functions.
func (rl *ResourceLimits) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := rl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
if err != nil || nodeInfo.Node() == nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v, node is nil: %v", nodeName, err, nodeInfo.Node() == nil))
}
allocatableResources := nodeInfo.AllocatableResource()
podLimits, err := getPodResource(state)
if err != nil {
return 0, framework.NewStatus(framework.Error, err.Error())
}
cpuScore := computeScore(podLimits.MilliCPU, allocatableResources.MilliCPU)
memScore := computeScore(podLimits.Memory, allocatableResources.Memory)
score := int64(0)
if cpuScore == 1 || memScore == 1 {
score = 1
}
return score, nil
}
// ScoreExtensions of the Score plugin.
func (rl *ResourceLimits) ScoreExtensions() framework.ScoreExtensions {
return nil
}
// NewResourceLimits initializes a new plugin and returns it.
func NewResourceLimits(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
return &ResourceLimits{handle: h}, nil
}
// getResourceLimits computes resource limits for input pod.
// The reason to create this new function is to be consistent with other
// priority functions because most or perhaps all priority functions work
// with schedulernodeinfo.Resource.
func getResourceLimits(pod *v1.Pod) *schedulernodeinfo.Resource {
result := &schedulernodeinfo.Resource{}
for _, container := range pod.Spec.Containers {
result.Add(container.Resources.Limits)
}
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
result.SetMaxResource(container.Resources.Limits)
}
return result
}
// computeScore returns 1 if limit value is less than or equal to allocatable
// value, otherwise it returns 0.
func computeScore(limit, allocatable int64) int64 {
if limit != 0 && allocatable != 0 && limit <= allocatable {
return 1
}
return 0
}

View File

@ -0,0 +1,55 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesources
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func makeNode(node string, milliCPU, memory int64) *v1.Node {
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: node},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
},
},
}
}
func makeNodeWithExtendedResource(node string, milliCPU, memory int64, extendedResource map[string]int64) *v1.Node {
resourceList := make(map[v1.ResourceName]resource.Quantity)
for res, quantity := range extendedResource {
resourceList[v1.ResourceName(res)] = *resource.NewQuantity(quantity, resource.DecimalSI)
}
resourceList[v1.ResourceCPU] = *resource.NewMilliQuantity(milliCPU, resource.DecimalSI)
resourceList[v1.ResourceMemory] = *resource.NewQuantity(memory, resource.BinarySI)
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: node},
Status: v1.NodeStatus{
Capacity: resourceList,
Allocatable: resourceList,
},
}
}

View File

@ -0,0 +1,70 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"cycle_state.go",
"framework.go",
"interface.go",
"metrics_recorder.go",
"registry.go",
"waiting_pods_map.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1",
visibility = ["//visibility:public"],
deps = [
"//pkg/controller/volume/scheduling:go_default_library",
"//pkg/scheduler/apis/config:go_default_library",
"//pkg/scheduler/listers:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/json:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//staging/src/k8s.io/component-base/metrics:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/sigs.k8s.io/yaml:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = [
"cycle_state_test.go",
"framework_test.go",
"interface_test.go",
"registry_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/apis/config:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/github.com/prometheus/client_model/go:go_default_library",
],
)

View File

@ -0,0 +1,130 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"errors"
"sync"
)
const (
// NotFound is the not found error message.
NotFound = "not found"
)
// StateData is a generic type for arbitrary data stored in CycleState.
type StateData interface {
// Clone is an interface to make a copy of StateData. For performance reasons,
// clone should make shallow copies for members (e.g., slices or maps) that are not
// impacted by PreFilter's optional AddPod/RemovePod methods.
Clone() StateData
}
// StateKey is the type of keys stored in CycleState.
type StateKey string
// CycleState provides a mechanism for plugins to store and retrieve arbitrary data.
// StateData stored by one plugin can be read, altered, or deleted by another plugin.
// CycleState does not provide any data protection, as all plugins are assumed to be
// trusted.
type CycleState struct {
mx sync.RWMutex
storage map[StateKey]StateData
// if recordPluginMetrics is true, PluginExecutionDuration will be recorded for this cycle.
recordPluginMetrics bool
}
// NewCycleState initializes a new CycleState and returns its pointer.
func NewCycleState() *CycleState {
return &CycleState{
storage: make(map[StateKey]StateData),
}
}
// ShouldRecordPluginMetrics returns whether PluginExecutionDuration metrics should be recorded.
func (c *CycleState) ShouldRecordPluginMetrics() bool {
if c == nil {
return false
}
return c.recordPluginMetrics
}
// SetRecordPluginMetrics sets recordPluginMetrics to the given value.
func (c *CycleState) SetRecordPluginMetrics(flag bool) {
if c == nil {
return
}
c.recordPluginMetrics = flag
}
// Clone creates a copy of CycleState and returns its pointer. Clone returns
// nil if the context being cloned is nil.
func (c *CycleState) Clone() *CycleState {
if c == nil {
return nil
}
copy := NewCycleState()
for k, v := range c.storage {
copy.Write(k, v.Clone())
}
return copy
}
// Read retrieves data with the given "key" from CycleState. If the key is not
// present an error is returned.
// This function is not thread safe. In multi-threaded code, lock should be
// acquired first.
func (c *CycleState) Read(key StateKey) (StateData, error) {
if v, ok := c.storage[key]; ok {
return v, nil
}
return nil, errors.New(NotFound)
}
// Write stores the given "val" in CycleState with the given "key".
// This function is not thread safe. In multi-threaded code, lock should be
// acquired first.
func (c *CycleState) Write(key StateKey, val StateData) {
c.storage[key] = val
}
// Delete deletes data with the given key from CycleState.
// This function is not thread safe. In multi-threaded code, lock should be
// acquired first.
func (c *CycleState) Delete(key StateKey) {
delete(c.storage, key)
}
// Lock acquires CycleState lock.
func (c *CycleState) Lock() {
c.mx.Lock()
}
// Unlock releases CycleState lock.
func (c *CycleState) Unlock() {
c.mx.Unlock()
}
// RLock acquires CycleState read lock.
func (c *CycleState) RLock() {
c.mx.RLock()
}
// RUnlock releases CycleState read lock.
func (c *CycleState) RUnlock() {
c.mx.RUnlock()
}

View File

@ -0,0 +1,918 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"context"
"fmt"
"reflect"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/controller/volume/scheduling"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers"
"k8s.io/kubernetes/pkg/scheduler/metrics"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
const (
// Filter is the name of the filter extension point.
Filter = "Filter"
// Specifies the maximum timeout a permit plugin can return.
maxTimeout time.Duration = 15 * time.Minute
preFilter = "PreFilter"
preFilterExtensionAddPod = "PreFilterExtensionAddPod"
preFilterExtensionRemovePod = "PreFilterExtensionRemovePod"
preScore = "PreScore"
score = "Score"
scoreExtensionNormalize = "ScoreExtensionNormalize"
preBind = "PreBind"
bind = "Bind"
postBind = "PostBind"
reserve = "Reserve"
unreserve = "Unreserve"
permit = "Permit"
)
// framework is the component responsible for initializing and running scheduler
// plugins.
type framework struct {
registry Registry
snapshotSharedLister schedulerlisters.SharedLister
waitingPods *waitingPodsMap
pluginNameToWeightMap map[string]int
queueSortPlugins []QueueSortPlugin
preFilterPlugins []PreFilterPlugin
filterPlugins []FilterPlugin
preScorePlugins []PreScorePlugin
scorePlugins []ScorePlugin
reservePlugins []ReservePlugin
preBindPlugins []PreBindPlugin
bindPlugins []BindPlugin
postBindPlugins []PostBindPlugin
unreservePlugins []UnreservePlugin
permitPlugins []PermitPlugin
clientSet clientset.Interface
informerFactory informers.SharedInformerFactory
volumeBinder scheduling.SchedulerVolumeBinder
metricsRecorder *metricsRecorder
// Indicates that RunFilterPlugins should accumulate all failed statuses and not return
// after the first failure.
runAllFilters bool
}
// extensionPoint encapsulates desired and applied set of plugins at a specific extension
// point. This is used to simplify iterating over all extension points supported by the
// framework.
type extensionPoint struct {
// the set of plugins to be configured at this extension point.
plugins *config.PluginSet
// a pointer to the slice storing plugins implementations that will run at this
// extension point.
slicePtr interface{}
}
func (f *framework) getExtensionPoints(plugins *config.Plugins) []extensionPoint {
return []extensionPoint{
{plugins.PreFilter, &f.preFilterPlugins},
{plugins.Filter, &f.filterPlugins},
{plugins.Reserve, &f.reservePlugins},
{plugins.PreScore, &f.preScorePlugins},
{plugins.Score, &f.scorePlugins},
{plugins.PreBind, &f.preBindPlugins},
{plugins.Bind, &f.bindPlugins},
{plugins.PostBind, &f.postBindPlugins},
{plugins.Unreserve, &f.unreservePlugins},
{plugins.Permit, &f.permitPlugins},
{plugins.QueueSort, &f.queueSortPlugins},
}
}
type frameworkOptions struct {
clientSet clientset.Interface
informerFactory informers.SharedInformerFactory
snapshotSharedLister schedulerlisters.SharedLister
metricsRecorder *metricsRecorder
volumeBinder scheduling.SchedulerVolumeBinder
runAllFilters bool
}
// Option for the framework.
type Option func(*frameworkOptions)
// WithClientSet sets clientSet for the scheduling framework.
func WithClientSet(clientSet clientset.Interface) Option {
return func(o *frameworkOptions) {
o.clientSet = clientSet
}
}
// WithInformerFactory sets informer factory for the scheduling framework.
func WithInformerFactory(informerFactory informers.SharedInformerFactory) Option {
return func(o *frameworkOptions) {
o.informerFactory = informerFactory
}
}
// WithSnapshotSharedLister sets the SharedLister of the snapshot.
func WithSnapshotSharedLister(snapshotSharedLister schedulerlisters.SharedLister) Option {
return func(o *frameworkOptions) {
o.snapshotSharedLister = snapshotSharedLister
}
}
// WithRunAllFilters sets the runAllFilters flag, which means RunFilterPlugins accumulates
// all failure Statuses.
func WithRunAllFilters(runAllFilters bool) Option {
return func(o *frameworkOptions) {
o.runAllFilters = runAllFilters
}
}
// withMetricsRecorder is only used in tests.
func withMetricsRecorder(recorder *metricsRecorder) Option {
return func(o *frameworkOptions) {
o.metricsRecorder = recorder
}
}
// WithVolumeBinder sets volume binder for the scheduling framework.
func WithVolumeBinder(binder scheduling.SchedulerVolumeBinder) Option {
return func(o *frameworkOptions) {
o.volumeBinder = binder
}
}
var defaultFrameworkOptions = frameworkOptions{
metricsRecorder: newMetricsRecorder(1000, time.Second),
}
var _ Framework = &framework{}
// NewFramework initializes plugins given the configuration and the registry.
func NewFramework(r Registry, plugins *config.Plugins, args []config.PluginConfig, opts ...Option) (Framework, error) {
options := defaultFrameworkOptions
for _, opt := range opts {
opt(&options)
}
f := &framework{
registry: r,
snapshotSharedLister: options.snapshotSharedLister,
pluginNameToWeightMap: make(map[string]int),
waitingPods: newWaitingPodsMap(),
clientSet: options.clientSet,
informerFactory: options.informerFactory,
volumeBinder: options.volumeBinder,
metricsRecorder: options.metricsRecorder,
runAllFilters: options.runAllFilters,
}
if plugins == nil {
return f, nil
}
// get needed plugins from config
pg := f.pluginsNeeded(plugins)
pluginConfig := make(map[string]*runtime.Unknown, 0)
for i := range args {
name := args[i].Name
if _, ok := pluginConfig[name]; ok {
return nil, fmt.Errorf("repeated config for plugin %s", name)
}
pluginConfig[name] = &args[i].Args
}
pluginsMap := make(map[string]Plugin)
var totalPriority int64
for name, factory := range r {
// initialize only needed plugins.
if _, ok := pg[name]; !ok {
continue
}
p, err := factory(pluginConfig[name], f)
if err != nil {
return nil, fmt.Errorf("error initializing plugin %q: %v", name, err)
}
pluginsMap[name] = p
// a weight of zero is not permitted, plugins can be disabled explicitly
// when configured.
f.pluginNameToWeightMap[name] = int(pg[name].Weight)
if f.pluginNameToWeightMap[name] == 0 {
f.pluginNameToWeightMap[name] = 1
}
// Checks totalPriority against MaxTotalScore to avoid overflow
if int64(f.pluginNameToWeightMap[name])*MaxNodeScore > MaxTotalScore-totalPriority {
return nil, fmt.Errorf("total score of Score plugins could overflow")
}
totalPriority += int64(f.pluginNameToWeightMap[name]) * MaxNodeScore
}
for _, e := range f.getExtensionPoints(plugins) {
if err := updatePluginList(e.slicePtr, e.plugins, pluginsMap); err != nil {
return nil, err
}
}
// Verifying the score weights again since Plugin.Name() could return a different
// value from the one used in the configuration.
for _, scorePlugin := range f.scorePlugins {
if f.pluginNameToWeightMap[scorePlugin.Name()] == 0 {
return nil, fmt.Errorf("score plugin %q is not configured with weight", scorePlugin.Name())
}
}
if len(f.queueSortPlugins) == 0 {
return nil, fmt.Errorf("no queue sort plugin is enabled")
}
if len(f.queueSortPlugins) > 1 {
return nil, fmt.Errorf("only one queue sort plugin can be enabled")
}
if len(f.bindPlugins) == 0 {
return nil, fmt.Errorf("at least one bind plugin is needed")
}
return f, nil
}
func updatePluginList(pluginList interface{}, pluginSet *config.PluginSet, pluginsMap map[string]Plugin) error {
if pluginSet == nil {
return nil
}
plugins := reflect.ValueOf(pluginList).Elem()
pluginType := plugins.Type().Elem()
set := sets.NewString()
for _, ep := range pluginSet.Enabled {
pg, ok := pluginsMap[ep.Name]
if !ok {
return fmt.Errorf("%s %q does not exist", pluginType.Name(), ep.Name)
}
if !reflect.TypeOf(pg).Implements(pluginType) {
return fmt.Errorf("plugin %q does not extend %s plugin", ep.Name, pluginType.Name())
}
if set.Has(ep.Name) {
return fmt.Errorf("plugin %q already registered as %q", ep.Name, pluginType.Name())
}
set.Insert(ep.Name)
newPlugins := reflect.Append(plugins, reflect.ValueOf(pg))
plugins.Set(newPlugins)
}
return nil
}
// QueueSortFunc returns the function to sort pods in scheduling queue
func (f *framework) QueueSortFunc() LessFunc {
if f == nil {
// If framework is nil, simply keep their order unchanged.
// NOTE: this is primarily for tests.
return func(_, _ *PodInfo) bool { return false }
}
if len(f.queueSortPlugins) == 0 {
panic("No QueueSort plugin is registered in the framework.")
}
// Only one QueueSort plugin can be enabled.
return f.queueSortPlugins[0].Less
}
// RunPreFilterPlugins runs the set of configured PreFilter plugins. It returns
// *Status and its code is set to non-success if any of the plugins returns
// anything but Success. If a non-success status is returned, then the scheduling
// cycle is aborted.
func (f *framework) RunPreFilterPlugins(ctx context.Context, state *CycleState, pod *v1.Pod) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(preFilter, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.preFilterPlugins {
status = f.runPreFilterPlugin(ctx, pl, state, pod)
if !status.IsSuccess() {
if status.IsUnschedulable() {
msg := fmt.Sprintf("rejected by %q at prefilter: %v", pl.Name(), status.Message())
klog.V(4).Infof(msg)
return NewStatus(status.Code(), msg)
}
msg := fmt.Sprintf("error while running %q prefilter plugin for pod %q: %v", pl.Name(), pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runPreFilterPlugin(ctx context.Context, pl PreFilterPlugin, state *CycleState, pod *v1.Pod) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.PreFilter(ctx, state, pod)
}
startTime := time.Now()
status := pl.PreFilter(ctx, state, pod)
f.metricsRecorder.observePluginDurationAsync(preFilter, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunPreFilterExtensionAddPod calls the AddPod interface for the set of configured
// PreFilter plugins. It returns directly if any of the plugins return any
// status other than Success.
func (f *framework) RunPreFilterExtensionAddPod(
ctx context.Context,
state *CycleState,
podToSchedule *v1.Pod,
podToAdd *v1.Pod,
nodeInfo *schedulernodeinfo.NodeInfo,
) (status *Status) {
for _, pl := range f.preFilterPlugins {
if pl.PreFilterExtensions() == nil {
continue
}
status = f.runPreFilterExtensionAddPod(ctx, pl, state, podToSchedule, podToAdd, nodeInfo)
if !status.IsSuccess() {
msg := fmt.Sprintf("error while running AddPod for plugin %q while scheduling pod %q: %v",
pl.Name(), podToSchedule.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runPreFilterExtensionAddPod(ctx context.Context, pl PreFilterPlugin, state *CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.PreFilterExtensions().AddPod(ctx, state, podToSchedule, podToAdd, nodeInfo)
}
startTime := time.Now()
status := pl.PreFilterExtensions().AddPod(ctx, state, podToSchedule, podToAdd, nodeInfo)
f.metricsRecorder.observePluginDurationAsync(preFilterExtensionAddPod, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunPreFilterExtensionRemovePod calls the RemovePod interface for the set of configured
// PreFilter plugins. It returns directly if any of the plugins return any
// status other than Success.
func (f *framework) RunPreFilterExtensionRemovePod(
ctx context.Context,
state *CycleState,
podToSchedule *v1.Pod,
podToRemove *v1.Pod,
nodeInfo *schedulernodeinfo.NodeInfo,
) (status *Status) {
for _, pl := range f.preFilterPlugins {
if pl.PreFilterExtensions() == nil {
continue
}
status = f.runPreFilterExtensionRemovePod(ctx, pl, state, podToSchedule, podToRemove, nodeInfo)
if !status.IsSuccess() {
msg := fmt.Sprintf("error while running RemovePod for plugin %q while scheduling pod %q: %v",
pl.Name(), podToSchedule.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runPreFilterExtensionRemovePod(ctx context.Context, pl PreFilterPlugin, state *CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.PreFilterExtensions().RemovePod(ctx, state, podToSchedule, podToAdd, nodeInfo)
}
startTime := time.Now()
status := pl.PreFilterExtensions().RemovePod(ctx, state, podToSchedule, podToAdd, nodeInfo)
f.metricsRecorder.observePluginDurationAsync(preFilterExtensionRemovePod, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunFilterPlugins runs the set of configured Filter plugins for pod on
// the given node. If any of these plugins doesn't return "Success", the
// given node is not suitable for running pod.
// Meanwhile, the failure message and status are set for the given node.
func (f *framework) RunFilterPlugins(
ctx context.Context,
state *CycleState,
pod *v1.Pod,
nodeInfo *schedulernodeinfo.NodeInfo,
) PluginToStatus {
var firstFailedStatus *Status
statuses := make(PluginToStatus)
for _, pl := range f.filterPlugins {
pluginStatus := f.runFilterPlugin(ctx, pl, state, pod, nodeInfo)
if len(statuses) == 0 {
firstFailedStatus = pluginStatus
}
if !pluginStatus.IsSuccess() {
if !pluginStatus.IsUnschedulable() {
// Filter plugins are not supposed to return any status other than
// Success or Unschedulable.
firstFailedStatus = NewStatus(Error, fmt.Sprintf("running %q filter plugin for pod %q: %v", pl.Name(), pod.Name, pluginStatus.Message()))
return map[string]*Status{pl.Name(): firstFailedStatus}
}
statuses[pl.Name()] = pluginStatus
if !f.runAllFilters {
// Exit early if we don't need to run all filters.
return statuses
}
}
}
return statuses
}
func (f *framework) runFilterPlugin(ctx context.Context, pl FilterPlugin, state *CycleState, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.Filter(ctx, state, pod, nodeInfo)
}
startTime := time.Now()
status := pl.Filter(ctx, state, pod, nodeInfo)
f.metricsRecorder.observePluginDurationAsync(Filter, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunPreScorePlugins runs the set of configured pre-score plugins. If any
// of these plugins returns any status other than "Success", the given pod is rejected.
func (f *framework) RunPreScorePlugins(
ctx context.Context,
state *CycleState,
pod *v1.Pod,
nodes []*v1.Node,
) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(preScore, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.preScorePlugins {
status = f.runPreScorePlugin(ctx, pl, state, pod, nodes)
if !status.IsSuccess() {
msg := fmt.Sprintf("error while running %q prescore plugin for pod %q: %v", pl.Name(), pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runPreScorePlugin(ctx context.Context, pl PreScorePlugin, state *CycleState, pod *v1.Pod, nodes []*v1.Node) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.PreScore(ctx, state, pod, nodes)
}
startTime := time.Now()
status := pl.PreScore(ctx, state, pod, nodes)
f.metricsRecorder.observePluginDurationAsync(preScore, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunScorePlugins runs the set of configured scoring plugins. It returns a list that
// stores for each scoring plugin name the corresponding NodeScoreList(s).
// It also returns *Status, which is set to non-success if any of the plugins returns
// a non-success status.
func (f *framework) RunScorePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodes []*v1.Node) (ps PluginToNodeScores, status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(score, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
pluginToNodeScores := make(PluginToNodeScores, len(f.scorePlugins))
for _, pl := range f.scorePlugins {
pluginToNodeScores[pl.Name()] = make(NodeScoreList, len(nodes))
}
ctx, cancel := context.WithCancel(ctx)
errCh := schedutil.NewErrorChannel()
// Run Score method for each node in parallel.
workqueue.ParallelizeUntil(ctx, 16, len(nodes), func(index int) {
for _, pl := range f.scorePlugins {
nodeName := nodes[index].Name
s, status := f.runScorePlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
errCh.SendErrorWithCancel(fmt.Errorf(status.Message()), cancel)
return
}
pluginToNodeScores[pl.Name()][index] = NodeScore{
Name: nodeName,
Score: int64(s),
}
}
})
if err := errCh.ReceiveError(); err != nil {
msg := fmt.Sprintf("error while running score plugin for pod %q: %v", pod.Name, err)
klog.Error(msg)
return nil, NewStatus(Error, msg)
}
// Run NormalizeScore method for each ScorePlugin in parallel.
workqueue.ParallelizeUntil(ctx, 16, len(f.scorePlugins), func(index int) {
pl := f.scorePlugins[index]
nodeScoreList := pluginToNodeScores[pl.Name()]
if pl.ScoreExtensions() == nil {
return
}
status := f.runScoreExtension(ctx, pl, state, pod, nodeScoreList)
if !status.IsSuccess() {
err := fmt.Errorf("normalize score plugin %q failed with error %v", pl.Name(), status.Message())
errCh.SendErrorWithCancel(err, cancel)
return
}
})
if err := errCh.ReceiveError(); err != nil {
msg := fmt.Sprintf("error while running normalize score plugin for pod %q: %v", pod.Name, err)
klog.Error(msg)
return nil, NewStatus(Error, msg)
}
// Apply score defaultWeights for each ScorePlugin in parallel.
workqueue.ParallelizeUntil(ctx, 16, len(f.scorePlugins), func(index int) {
pl := f.scorePlugins[index]
// Score plugins' weight has been checked when they are initialized.
weight := f.pluginNameToWeightMap[pl.Name()]
nodeScoreList := pluginToNodeScores[pl.Name()]
for i, nodeScore := range nodeScoreList {
// return error if score plugin returns invalid score.
if nodeScore.Score > int64(MaxNodeScore) || nodeScore.Score < int64(MinNodeScore) {
err := fmt.Errorf("score plugin %q returns an invalid score %v, it should in the range of [%v, %v] after normalizing", pl.Name(), nodeScore.Score, MinNodeScore, MaxNodeScore)
errCh.SendErrorWithCancel(err, cancel)
return
}
nodeScoreList[i].Score = nodeScore.Score * int64(weight)
}
})
if err := errCh.ReceiveError(); err != nil {
msg := fmt.Sprintf("error while applying score defaultWeights for pod %q: %v", pod.Name, err)
klog.Error(msg)
return nil, NewStatus(Error, msg)
}
return pluginToNodeScores, nil
}
func (f *framework) runScorePlugin(ctx context.Context, pl ScorePlugin, state *CycleState, pod *v1.Pod, nodeName string) (int64, *Status) {
if !state.ShouldRecordPluginMetrics() {
return pl.Score(ctx, state, pod, nodeName)
}
startTime := time.Now()
s, status := pl.Score(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(score, pl.Name(), status, metrics.SinceInSeconds(startTime))
return s, status
}
func (f *framework) runScoreExtension(ctx context.Context, pl ScorePlugin, state *CycleState, pod *v1.Pod, nodeScoreList NodeScoreList) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.ScoreExtensions().NormalizeScore(ctx, state, pod, nodeScoreList)
}
startTime := time.Now()
status := pl.ScoreExtensions().NormalizeScore(ctx, state, pod, nodeScoreList)
f.metricsRecorder.observePluginDurationAsync(scoreExtensionNormalize, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunPreBindPlugins runs the set of configured prebind plugins. It returns a
// failure (bool) if any of the plugins returns an error. It also returns an
// error containing the rejection message or the error occurred in the plugin.
func (f *framework) RunPreBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(preBind, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.preBindPlugins {
status = f.runPreBindPlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
msg := fmt.Sprintf("error while running %q prebind plugin for pod %q: %v", pl.Name(), pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runPreBindPlugin(ctx context.Context, pl PreBindPlugin, state *CycleState, pod *v1.Pod, nodeName string) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.PreBind(ctx, state, pod, nodeName)
}
startTime := time.Now()
status := pl.PreBind(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(preBind, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunBindPlugins runs the set of configured bind plugins until one returns a non `Skip` status.
func (f *framework) RunBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(bind, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
if len(f.bindPlugins) == 0 {
return NewStatus(Skip, "")
}
for _, bp := range f.bindPlugins {
status = f.runBindPlugin(ctx, bp, state, pod, nodeName)
if status != nil && status.Code() == Skip {
continue
}
if !status.IsSuccess() {
msg := fmt.Sprintf("plugin %q failed to bind pod \"%v/%v\": %v", bp.Name(), pod.Namespace, pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
return status
}
return status
}
func (f *framework) runBindPlugin(ctx context.Context, bp BindPlugin, state *CycleState, pod *v1.Pod, nodeName string) *Status {
if !state.ShouldRecordPluginMetrics() {
return bp.Bind(ctx, state, pod, nodeName)
}
startTime := time.Now()
status := bp.Bind(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(bind, bp.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunPostBindPlugins runs the set of configured postbind plugins.
func (f *framework) RunPostBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(postBind, Success.String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.postBindPlugins {
f.runPostBindPlugin(ctx, pl, state, pod, nodeName)
}
}
func (f *framework) runPostBindPlugin(ctx context.Context, pl PostBindPlugin, state *CycleState, pod *v1.Pod, nodeName string) {
if !state.ShouldRecordPluginMetrics() {
pl.PostBind(ctx, state, pod, nodeName)
return
}
startTime := time.Now()
pl.PostBind(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(postBind, pl.Name(), nil, metrics.SinceInSeconds(startTime))
}
// RunReservePlugins runs the set of configured reserve plugins. If any of these
// plugins returns an error, it does not continue running the remaining ones and
// returns the error. In such case, pod will not be scheduled.
func (f *framework) RunReservePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(reserve, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.reservePlugins {
status = f.runReservePlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
msg := fmt.Sprintf("error while running %q reserve plugin for pod %q: %v", pl.Name(), pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
return nil
}
func (f *framework) runReservePlugin(ctx context.Context, pl ReservePlugin, state *CycleState, pod *v1.Pod, nodeName string) *Status {
if !state.ShouldRecordPluginMetrics() {
return pl.Reserve(ctx, state, pod, nodeName)
}
startTime := time.Now()
status := pl.Reserve(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(reserve, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status
}
// RunUnreservePlugins runs the set of configured unreserve plugins.
func (f *framework) RunUnreservePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(unreserve, Success.String()).Observe(metrics.SinceInSeconds(startTime))
}()
for _, pl := range f.unreservePlugins {
f.runUnreservePlugin(ctx, pl, state, pod, nodeName)
}
}
func (f *framework) runUnreservePlugin(ctx context.Context, pl UnreservePlugin, state *CycleState, pod *v1.Pod, nodeName string) {
if !state.ShouldRecordPluginMetrics() {
pl.Unreserve(ctx, state, pod, nodeName)
return
}
startTime := time.Now()
pl.Unreserve(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(unreserve, pl.Name(), nil, metrics.SinceInSeconds(startTime))
}
// RunPermitPlugins runs the set of configured permit plugins. If any of these
// plugins returns a status other than "Success" or "Wait", it does not continue
// running the remaining plugins and returns an error. Otherwise, if any of the
// plugins returns "Wait", then this function will create and add waiting pod
// to a map of currently waiting pods and return status with "Wait" code.
// Pod will remain waiting pod for the minimum duration returned by the permit plugins.
func (f *framework) RunPermitPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) (status *Status) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(permit, status.Code().String()).Observe(metrics.SinceInSeconds(startTime))
}()
pluginsWaitTime := make(map[string]time.Duration)
statusCode := Success
for _, pl := range f.permitPlugins {
status, timeout := f.runPermitPlugin(ctx, pl, state, pod, nodeName)
if !status.IsSuccess() {
if status.IsUnschedulable() {
msg := fmt.Sprintf("rejected pod %q by permit plugin %q: %v", pod.Name, pl.Name(), status.Message())
klog.V(4).Infof(msg)
return NewStatus(status.Code(), msg)
}
if status.Code() == Wait {
// Not allowed to be greater than maxTimeout.
if timeout > maxTimeout {
timeout = maxTimeout
}
pluginsWaitTime[pl.Name()] = timeout
statusCode = Wait
} else {
msg := fmt.Sprintf("error while running %q permit plugin for pod %q: %v", pl.Name(), pod.Name, status.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
}
}
if statusCode == Wait {
waitingPod := newWaitingPod(pod, pluginsWaitTime)
f.waitingPods.add(waitingPod)
msg := fmt.Sprintf("one or more plugins asked to wait and no plugin rejected pod %q", pod.Name)
klog.V(4).Infof(msg)
return NewStatus(Wait, msg)
}
return nil
}
func (f *framework) runPermitPlugin(ctx context.Context, pl PermitPlugin, state *CycleState, pod *v1.Pod, nodeName string) (*Status, time.Duration) {
if !state.ShouldRecordPluginMetrics() {
return pl.Permit(ctx, state, pod, nodeName)
}
startTime := time.Now()
status, timeout := pl.Permit(ctx, state, pod, nodeName)
f.metricsRecorder.observePluginDurationAsync(permit, pl.Name(), status, metrics.SinceInSeconds(startTime))
return status, timeout
}
// WaitOnPermit will block, if the pod is a waiting pod, until the waiting pod is rejected or allowed.
func (f *framework) WaitOnPermit(ctx context.Context, pod *v1.Pod) (status *Status) {
waitingPod := f.waitingPods.get(pod.UID)
if waitingPod == nil {
return nil
}
defer f.waitingPods.remove(pod.UID)
klog.V(4).Infof("pod %q waiting on permit", pod.Name)
startTime := time.Now()
s := <-waitingPod.s
metrics.PermitWaitDuration.WithLabelValues(s.Code().String()).Observe(metrics.SinceInSeconds(startTime))
if !s.IsSuccess() {
if s.IsUnschedulable() {
msg := fmt.Sprintf("pod %q rejected while waiting on permit: %v", pod.Name, s.Message())
klog.V(4).Infof(msg)
return NewStatus(s.Code(), msg)
}
msg := fmt.Sprintf("error received while waiting on permit for pod %q: %v", pod.Name, s.Message())
klog.Error(msg)
return NewStatus(Error, msg)
}
return nil
}
// SnapshotSharedLister returns the scheduler's SharedLister of the latest NodeInfo
// snapshot. The snapshot is taken at the beginning of a scheduling cycle and remains
// unchanged until a pod finishes "Reserve". There is no guarantee that the information
// remains unchanged after "Reserve".
func (f *framework) SnapshotSharedLister() schedulerlisters.SharedLister {
return f.snapshotSharedLister
}
// IterateOverWaitingPods acquires a read lock and iterates over the WaitingPods map.
func (f *framework) IterateOverWaitingPods(callback func(WaitingPod)) {
f.waitingPods.iterate(callback)
}
// GetWaitingPod returns a reference to a WaitingPod given its UID.
func (f *framework) GetWaitingPod(uid types.UID) WaitingPod {
if wp := f.waitingPods.get(uid); wp != nil {
return wp
}
return nil // Returning nil instead of *waitingPod(nil).
}
// RejectWaitingPod rejects a WaitingPod given its UID.
func (f *framework) RejectWaitingPod(uid types.UID) {
waitingPod := f.waitingPods.get(uid)
if waitingPod != nil {
waitingPod.Reject("removed")
}
}
// HasFilterPlugins returns true if at least one filter plugin is defined.
func (f *framework) HasFilterPlugins() bool {
return len(f.filterPlugins) > 0
}
// HasScorePlugins returns true if at least one score plugin is defined.
func (f *framework) HasScorePlugins() bool {
return len(f.scorePlugins) > 0
}
// ListPlugins returns a map of extension point name to plugin names configured at each extension
// point. Returns nil if no plugins where configred.
func (f *framework) ListPlugins() map[string][]config.Plugin {
m := make(map[string][]config.Plugin)
for _, e := range f.getExtensionPoints(&config.Plugins{}) {
plugins := reflect.ValueOf(e.slicePtr).Elem()
extName := plugins.Type().Elem().Name()
var cfgs []config.Plugin
for i := 0; i < plugins.Len(); i++ {
name := plugins.Index(i).Interface().(Plugin).Name()
p := config.Plugin{Name: name}
if extName == "ScorePlugin" {
// Weights apply only to score plugins.
p.Weight = int32(f.pluginNameToWeightMap[name])
}
cfgs = append(cfgs, p)
}
if len(cfgs) > 0 {
m[extName] = cfgs
}
}
if len(m) > 0 {
return m
}
return nil
}
// ClientSet returns a kubernetes clientset.
func (f *framework) ClientSet() clientset.Interface {
return f.clientSet
}
// SharedInformerFactory returns a shared informer factory.
func (f *framework) SharedInformerFactory() informers.SharedInformerFactory {
return f.informerFactory
}
// VolumeBinder returns the volume binder used by scheduler.
func (f *framework) VolumeBinder() scheduling.SchedulerVolumeBinder {
return f.volumeBinder
}
func (f *framework) pluginsNeeded(plugins *config.Plugins) map[string]config.Plugin {
pgMap := make(map[string]config.Plugin)
if plugins == nil {
return pgMap
}
find := func(pgs *config.PluginSet) {
if pgs == nil {
return
}
for _, pg := range pgs.Enabled {
pgMap[pg.Name] = pg
}
}
for _, e := range f.getExtensionPoints(plugins) {
find(e.plugins)
}
return pgMap
}

View File

@ -0,0 +1,525 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This file defines the scheduling framework plugin interfaces.
package v1alpha1
import (
"context"
"errors"
"math"
"strings"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/pkg/controller/volume/scheduling"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// NodeScoreList declares a list of nodes and their scores.
type NodeScoreList []NodeScore
// NodeScore is a struct with node name and score.
type NodeScore struct {
Name string
Score int64
}
// PluginToNodeScores declares a map from plugin name to its NodeScoreList.
type PluginToNodeScores map[string]NodeScoreList
// NodeToStatusMap declares map from node name to its status.
type NodeToStatusMap map[string]*Status
// Code is the Status code/type which is returned from plugins.
type Code int
// These are predefined codes used in a Status.
const (
// Success means that plugin ran correctly and found pod schedulable.
// NOTE: A nil status is also considered as "Success".
Success Code = iota
// Error is used for internal plugin errors, unexpected input, etc.
Error
// Unschedulable is used when a plugin finds a pod unschedulable. The scheduler might attempt to
// preempt other pods to get this pod scheduled. Use UnschedulableAndUnresolvable to make the
// scheduler skip preemption.
// The accompanying status message should explain why the pod is unschedulable.
Unschedulable
// UnschedulableAndUnresolvable is used when a (pre-)filter plugin finds a pod unschedulable and
// preemption would not change anything. Plugins should return Unschedulable if it is possible
// that the pod can get scheduled with preemption.
// The accompanying status message should explain why the pod is unschedulable.
UnschedulableAndUnresolvable
// Wait is used when a permit plugin finds a pod scheduling should wait.
Wait
// Skip is used when a bind plugin chooses to skip binding.
Skip
)
// This list should be exactly the same as the codes iota defined above in the same order.
var codes = []string{"Success", "Error", "Unschedulable", "UnschedulableAndUnresolvable", "Wait", "Skip"}
func (c Code) String() string {
return codes[c]
}
const (
// MaxNodeScore is the maximum score a Score plugin is expected to return.
MaxNodeScore int64 = 100
// MinNodeScore is the minimum score a Score plugin is expected to return.
MinNodeScore int64 = 0
// MaxTotalScore is the maximum total score.
MaxTotalScore int64 = math.MaxInt64
)
// Status indicates the result of running a plugin. It consists of a code and a
// message. When the status code is not `Success`, the reasons should explain why.
// NOTE: A nil Status is also considered as Success.
type Status struct {
code Code
reasons []string
}
// Code returns code of the Status.
func (s *Status) Code() Code {
if s == nil {
return Success
}
return s.code
}
// Message returns a concatenated message on reasons of the Status.
func (s *Status) Message() string {
if s == nil {
return ""
}
return strings.Join(s.reasons, ", ")
}
// Reasons returns reasons of the Status.
func (s *Status) Reasons() []string {
return s.reasons
}
// AppendReason appends given reason to the Status.
func (s *Status) AppendReason(reason string) {
s.reasons = append(s.reasons, reason)
}
// IsSuccess returns true if and only if "Status" is nil or Code is "Success".
func (s *Status) IsSuccess() bool {
return s.Code() == Success
}
// IsUnschedulable returns true if "Status" is Unschedulable (Unschedulable or UnschedulableAndUnresolvable).
func (s *Status) IsUnschedulable() bool {
code := s.Code()
return code == Unschedulable || code == UnschedulableAndUnresolvable
}
// AsError returns nil if the status is a success; otherwise returns an "error" object
// with a concatenated message on reasons of the Status.
func (s *Status) AsError() error {
if s.IsSuccess() {
return nil
}
return errors.New(s.Message())
}
// NewStatus makes a Status out of the given arguments and returns its pointer.
func NewStatus(code Code, reasons ...string) *Status {
return &Status{
code: code,
reasons: reasons,
}
}
// PluginToStatus maps plugin name to status. Currently used to identify which Filter plugin
// returned which status.
type PluginToStatus map[string]*Status
// Merge merges the statuses in the map into one. The resulting status code have the following
// precedence: Error, UnschedulableAndUnresolvable, Unschedulable.
func (p PluginToStatus) Merge() *Status {
if len(p) == 0 {
return nil
}
finalStatus := NewStatus(Success)
var hasError, hasUnschedulableAndUnresolvable, hasUnschedulable bool
for _, s := range p {
if s.Code() == Error {
hasError = true
} else if s.Code() == UnschedulableAndUnresolvable {
hasUnschedulableAndUnresolvable = true
} else if s.Code() == Unschedulable {
hasUnschedulable = true
}
finalStatus.code = s.Code()
for _, r := range s.reasons {
finalStatus.AppendReason(r)
}
}
if hasError {
finalStatus.code = Error
} else if hasUnschedulableAndUnresolvable {
finalStatus.code = UnschedulableAndUnresolvable
} else if hasUnschedulable {
finalStatus.code = Unschedulable
}
return finalStatus
}
// WaitingPod represents a pod currently waiting in the permit phase.
type WaitingPod interface {
// GetPod returns a reference to the waiting pod.
GetPod() *v1.Pod
// GetPendingPlugins returns a list of pending permit plugin's name.
GetPendingPlugins() []string
// Allow declares the waiting pod is allowed to be scheduled by plugin pluginName.
// If this is the last remaining plugin to allow, then a success signal is delivered
// to unblock the pod.
Allow(pluginName string)
// Reject declares the waiting pod unschedulable.
Reject(msg string)
}
// Plugin is the parent type for all the scheduling framework plugins.
type Plugin interface {
Name() string
}
// PodInfo is a wrapper to a Pod with additional information for purposes such as tracking
// the timestamp when it's added to the queue or recording per-pod metrics.
type PodInfo struct {
Pod *v1.Pod
// The time pod added to the scheduling queue.
Timestamp time.Time
// Number of schedule attempts before successfully scheduled.
// It's used to record the # attempts metric.
Attempts int
// The time when the pod is added to the queue for the first time. The pod may be added
// back to the queue multiple times before it's successfully scheduled.
// It shouldn't be updated once initialized. It's used to record the e2e scheduling
// latency for a pod.
InitialAttemptTimestamp time.Time
}
// DeepCopy returns a deep copy of the PodInfo object.
func (podInfo *PodInfo) DeepCopy() *PodInfo {
return &PodInfo{
Pod: podInfo.Pod.DeepCopy(),
Timestamp: podInfo.Timestamp,
Attempts: podInfo.Attempts,
InitialAttemptTimestamp: podInfo.InitialAttemptTimestamp,
}
}
// LessFunc is the function to sort pod info
type LessFunc func(podInfo1, podInfo2 *PodInfo) bool
// QueueSortPlugin is an interface that must be implemented by "QueueSort" plugins.
// These plugins are used to sort pods in the scheduling queue. Only one queue sort
// plugin may be enabled at a time.
type QueueSortPlugin interface {
Plugin
// Less are used to sort pods in the scheduling queue.
Less(*PodInfo, *PodInfo) bool
}
// PreFilterExtensions is an interface that is included in plugins that allow specifying
// callbacks to make incremental updates to its supposedly pre-calculated
// state.
type PreFilterExtensions interface {
// AddPod is called by the framework while trying to evaluate the impact
// of adding podToAdd to the node while scheduling podToSchedule.
AddPod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status
// RemovePod is called by the framework while trying to evaluate the impact
// of removing podToRemove from the node while scheduling podToSchedule.
RemovePod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podToRemove *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status
}
// PreFilterPlugin is an interface that must be implemented by "prefilter" plugins.
// These plugins are called at the beginning of the scheduling cycle.
type PreFilterPlugin interface {
Plugin
// PreFilter is called at the beginning of the scheduling cycle. All PreFilter
// plugins must return success or the pod will be rejected.
PreFilter(ctx context.Context, state *CycleState, p *v1.Pod) *Status
// PreFilterExtensions returns a PreFilterExtensions interface if the plugin implements one,
// or nil if it does not. A Pre-filter plugin can provide extensions to incrementally
// modify its pre-processed info. The framework guarantees that the extensions
// AddPod/RemovePod will only be called after PreFilter, possibly on a cloned
// CycleState, and may call those functions more than once before calling
// Filter again on a specific node.
PreFilterExtensions() PreFilterExtensions
}
// FilterPlugin is an interface for Filter plugins. These plugins are called at the
// filter extension point for filtering out hosts that cannot run a pod.
// This concept used to be called 'predicate' in the original scheduler.
// These plugins should return "Success", "Unschedulable" or "Error" in Status.code.
// However, the scheduler accepts other valid codes as well.
// Anything other than "Success" will lead to exclusion of the given host from
// running the pod.
type FilterPlugin interface {
Plugin
// Filter is called by the scheduling framework.
// All FilterPlugins should return "Success" to declare that
// the given node fits the pod. If Filter doesn't return "Success",
// please refer scheduler/algorithm/predicates/error.go
// to set error message.
// For the node being evaluated, Filter plugins should look at the passed
// nodeInfo reference for this particular node's information (e.g., pods
// considered to be running on the node) instead of looking it up in the
// NodeInfoSnapshot because we don't guarantee that they will be the same.
// For example, during preemption, we may pass a copy of the original
// nodeInfo object that has some pods removed from it to evaluate the
// possibility of preempting them to schedule the target pod.
Filter(ctx context.Context, state *CycleState, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status
}
// PreScorePlugin is an interface for Pre-score plugin. Pre-score is an
// informational extension point. Plugins will be called with a list of nodes
// that passed the filtering phase. A plugin may use this data to update internal
// state or to generate logs/metrics.
type PreScorePlugin interface {
Plugin
// PreScore is called by the scheduling framework after a list of nodes
// passed the filtering phase. All prescore plugins must return success or
// the pod will be rejected
PreScore(ctx context.Context, state *CycleState, pod *v1.Pod, nodes []*v1.Node) *Status
}
// ScoreExtensions is an interface for Score extended functionality.
type ScoreExtensions interface {
// NormalizeScore is called for all node scores produced by the same plugin's "Score"
// method. A successful run of NormalizeScore will update the scores list and return
// a success status.
NormalizeScore(ctx context.Context, state *CycleState, p *v1.Pod, scores NodeScoreList) *Status
}
// ScorePlugin is an interface that must be implemented by "score" plugins to rank
// nodes that passed the filtering phase.
type ScorePlugin interface {
Plugin
// Score is called on each filtered node. It must return success and an integer
// indicating the rank of the node. All scoring plugins must return success or
// the pod will be rejected.
Score(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) (int64, *Status)
// ScoreExtensions returns a ScoreExtensions interface if it implements one, or nil if does not.
ScoreExtensions() ScoreExtensions
}
// ReservePlugin is an interface for Reserve plugins. These plugins are called
// at the reservation point. These are meant to update the state of the plugin.
// This concept used to be called 'assume' in the original scheduler.
// These plugins should return only Success or Error in Status.code. However,
// the scheduler accepts other valid codes as well. Anything other than Success
// will lead to rejection of the pod.
type ReservePlugin interface {
Plugin
// Reserve is called by the scheduling framework when the scheduler cache is
// updated.
Reserve(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status
}
// PreBindPlugin is an interface that must be implemented by "prebind" plugins.
// These plugins are called before a pod being scheduled.
type PreBindPlugin interface {
Plugin
// PreBind is called before binding a pod. All prebind plugins must return
// success or the pod will be rejected and won't be sent for binding.
PreBind(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status
}
// PostBindPlugin is an interface that must be implemented by "postbind" plugins.
// These plugins are called after a pod is successfully bound to a node.
type PostBindPlugin interface {
Plugin
// PostBind is called after a pod is successfully bound. These plugins are
// informational. A common application of this extension point is for cleaning
// up. If a plugin needs to clean-up its state after a pod is scheduled and
// bound, PostBind is the extension point that it should register.
PostBind(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string)
}
// UnreservePlugin is an interface for Unreserve plugins. This is an informational
// extension point. If a pod was reserved and then rejected in a later phase, then
// un-reserve plugins will be notified. Un-reserve plugins should clean up state
// associated with the reserved Pod.
type UnreservePlugin interface {
Plugin
// Unreserve is called by the scheduling framework when a reserved pod was
// rejected in a later phase.
Unreserve(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string)
}
// PermitPlugin is an interface that must be implemented by "permit" plugins.
// These plugins are called before a pod is bound to a node.
type PermitPlugin interface {
Plugin
// Permit is called before binding a pod (and before prebind plugins). Permit
// plugins are used to prevent or delay the binding of a Pod. A permit plugin
// must return success or wait with timeout duration, or the pod will be rejected.
// The pod will also be rejected if the wait timeout or the pod is rejected while
// waiting. Note that if the plugin returns "wait", the framework will wait only
// after running the remaining plugins given that no other plugin rejects the pod.
Permit(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) (*Status, time.Duration)
}
// BindPlugin is an interface that must be implemented by "bind" plugins. Bind
// plugins are used to bind a pod to a Node.
type BindPlugin interface {
Plugin
// Bind plugins will not be called until all pre-bind plugins have completed. Each
// bind plugin is called in the configured order. A bind plugin may choose whether
// or not to handle the given Pod. If a bind plugin chooses to handle a Pod, the
// remaining bind plugins are skipped. When a bind plugin does not handle a pod,
// it must return Skip in its Status code. If a bind plugin returns an Error, the
// pod is rejected and will not be bound.
Bind(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status
}
// Framework manages the set of plugins in use by the scheduling framework.
// Configured plugins are called at specified points in a scheduling context.
type Framework interface {
FrameworkHandle
// QueueSortFunc returns the function to sort pods in scheduling queue
QueueSortFunc() LessFunc
// RunPreFilterPlugins runs the set of configured prefilter plugins. It returns
// *Status and its code is set to non-success if any of the plugins returns
// anything but Success. If a non-success status is returned, then the scheduling
// cycle is aborted.
RunPreFilterPlugins(ctx context.Context, state *CycleState, pod *v1.Pod) *Status
// RunFilterPlugins runs the set of configured filter plugins for pod on
// the given node. Note that for the node being evaluated, the passed nodeInfo
// reference could be different from the one in NodeInfoSnapshot map (e.g., pods
// considered to be running on the node could be different). For example, during
// preemption, we may pass a copy of the original nodeInfo object that has some pods
// removed from it to evaluate the possibility of preempting them to
// schedule the target pod.
RunFilterPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) PluginToStatus
// RunPreFilterExtensionAddPod calls the AddPod interface for the set of configured
// PreFilter plugins. It returns directly if any of the plugins return any
// status other than Success.
RunPreFilterExtensionAddPod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status
// RunPreFilterExtensionRemovePod calls the RemovePod interface for the set of configured
// PreFilter plugins. It returns directly if any of the plugins return any
// status other than Success.
RunPreFilterExtensionRemovePod(ctx context.Context, state *CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) *Status
// RunPreScorePlugins runs the set of configured pre-score plugins. If any
// of these plugins returns any status other than "Success", the given pod is rejected.
RunPreScorePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodes []*v1.Node) *Status
// RunScorePlugins runs the set of configured scoring plugins. It returns a map that
// stores for each scoring plugin name the corresponding NodeScoreList(s).
// It also returns *Status, which is set to non-success if any of the plugins returns
// a non-success status.
RunScorePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodes []*v1.Node) (PluginToNodeScores, *Status)
// RunPreBindPlugins runs the set of configured prebind plugins. It returns
// *Status and its code is set to non-success if any of the plugins returns
// anything but Success. If the Status code is "Unschedulable", it is
// considered as a scheduling check failure, otherwise, it is considered as an
// internal error. In either case the pod is not going to be bound.
RunPreBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
// RunPostBindPlugins runs the set of configured postbind plugins.
RunPostBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string)
// RunReservePlugins runs the set of configured reserve plugins. If any of these
// plugins returns an error, it does not continue running the remaining ones and
// returns the error. In such case, pod will not be scheduled.
RunReservePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
// RunUnreservePlugins runs the set of configured unreserve plugins.
RunUnreservePlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string)
// RunPermitPlugins runs the set of configured permit plugins. If any of these
// plugins returns a status other than "Success" or "Wait", it does not continue
// running the remaining plugins and returns an error. Otherwise, if any of the
// plugins returns "Wait", then this function will create and add waiting pod
// to a map of currently waiting pods and return status with "Wait" code.
// Pod will remain waiting pod for the minimum duration returned by the permit plugins.
RunPermitPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
// WaitOnPermit will block, if the pod is a waiting pod, until the waiting pod is rejected or allowed.
WaitOnPermit(ctx context.Context, pod *v1.Pod) *Status
// RunBindPlugins runs the set of configured bind plugins. A bind plugin may choose
// whether or not to handle the given Pod. If a bind plugin chooses to skip the
// binding, it should return code=5("skip") status. Otherwise, it should return "Error"
// or "Success". If none of the plugins handled binding, RunBindPlugins returns
// code=5("skip") status.
RunBindPlugins(ctx context.Context, state *CycleState, pod *v1.Pod, nodeName string) *Status
// HasFilterPlugins returns true if at least one filter plugin is defined.
HasFilterPlugins() bool
// HasScorePlugins returns true if at least one score plugin is defined.
HasScorePlugins() bool
// ListPlugins returns a map of extension point name to list of configured Plugins.
ListPlugins() map[string][]config.Plugin
}
// FrameworkHandle provides data and some tools that plugins can use. It is
// passed to the plugin factories at the time of plugin initialization. Plugins
// must store and use this handle to call framework functions.
type FrameworkHandle interface {
// SnapshotSharedLister returns listers from the latest NodeInfo Snapshot. The snapshot
// is taken at the beginning of a scheduling cycle and remains unchanged until
// a pod finishes "Permit" point. There is no guarantee that the information
// remains unchanged in the binding phase of scheduling, so plugins in the binding
// cycle (pre-bind/bind/post-bind/un-reserve plugin) should not use it,
// otherwise a concurrent read/write error might occur, they should use scheduler
// cache instead.
SnapshotSharedLister() schedulerlisters.SharedLister
// IterateOverWaitingPods acquires a read lock and iterates over the WaitingPods map.
IterateOverWaitingPods(callback func(WaitingPod))
// GetWaitingPod returns a waiting pod given its UID.
GetWaitingPod(uid types.UID) WaitingPod
// RejectWaitingPod rejects a waiting pod given its UID.
RejectWaitingPod(uid types.UID)
// ClientSet returns a kubernetes clientSet.
ClientSet() clientset.Interface
SharedInformerFactory() informers.SharedInformerFactory
// VolumeBinder returns the volume binder used by scheduler.
VolumeBinder() scheduling.SchedulerVolumeBinder
}

View File

@ -0,0 +1,101 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"time"
k8smetrics "k8s.io/component-base/metrics"
"k8s.io/kubernetes/pkg/scheduler/metrics"
)
// frameworkMetric is the data structure passed in the buffer channel between the main framework thread
// and the metricsRecorder goroutine.
type frameworkMetric struct {
metric *k8smetrics.HistogramVec
labelValues []string
value float64
}
// metricRecorder records framework metrics in a separate goroutine to avoid overhead in the critical path.
type metricsRecorder struct {
// bufferCh is a channel that serves as a metrics buffer before the metricsRecorder goroutine reports it.
bufferCh chan *frameworkMetric
// if bufferSize is reached, incoming metrics will be discarded.
bufferSize int
// how often the recorder runs to flush the metrics.
interval time.Duration
// stopCh is used to stop the goroutine which periodically flushes metrics. It's currently only
// used in tests.
stopCh chan struct{}
// isStoppedCh indicates whether the goroutine is stopped. It's used in tests only to make sure
// the metric flushing goroutine is stopped so that tests can collect metrics for verification.
isStoppedCh chan struct{}
}
func newMetricsRecorder(bufferSize int, interval time.Duration) *metricsRecorder {
recorder := &metricsRecorder{
bufferCh: make(chan *frameworkMetric, bufferSize),
bufferSize: bufferSize,
interval: interval,
stopCh: make(chan struct{}),
isStoppedCh: make(chan struct{}),
}
go recorder.run()
return recorder
}
// observePluginDurationAsync observes the plugin_execution_duration_seconds metric.
// The metric will be flushed to Prometheus asynchronously.
func (r *metricsRecorder) observePluginDurationAsync(extensionPoint, pluginName string, status *Status, value float64) {
newMetric := &frameworkMetric{
metric: metrics.PluginExecutionDuration,
labelValues: []string{pluginName, extensionPoint, status.Code().String()},
value: value,
}
select {
case r.bufferCh <- newMetric:
default:
}
}
// run flushes buffered metrics into Prometheus every second.
func (r *metricsRecorder) run() {
for {
select {
case <-r.stopCh:
close(r.isStoppedCh)
return
default:
}
r.flushMetrics()
time.Sleep(r.interval)
}
}
// flushMetrics tries to clean up the bufferCh by reading at most bufferSize metrics.
func (r *metricsRecorder) flushMetrics() {
for i := 0; i < r.bufferSize; i++ {
select {
case m := <-r.bufferCh:
m.metric.WithLabelValues(m.labelValues...).Observe(m.value)
default:
return
}
}
}

View File

@ -0,0 +1,80 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"fmt"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/json"
"sigs.k8s.io/yaml"
)
// PluginFactory is a function that builds a plugin.
type PluginFactory = func(configuration *runtime.Unknown, f FrameworkHandle) (Plugin, error)
// DecodeInto decodes configuration whose type is *runtime.Unknown to the interface into.
func DecodeInto(configuration *runtime.Unknown, into interface{}) error {
if configuration == nil || configuration.Raw == nil {
return nil
}
switch configuration.ContentType {
// If ContentType is empty, it means ContentTypeJSON by default.
case runtime.ContentTypeJSON, "":
return json.Unmarshal(configuration.Raw, into)
case runtime.ContentTypeYAML:
return yaml.Unmarshal(configuration.Raw, into)
default:
return fmt.Errorf("not supported content type %s", configuration.ContentType)
}
}
// Registry is a collection of all available plugins. The framework uses a
// registry to enable and initialize configured plugins.
// All plugins must be in the registry before initializing the framework.
type Registry map[string]PluginFactory
// Register adds a new plugin to the registry. If a plugin with the same name
// exists, it returns an error.
func (r Registry) Register(name string, factory PluginFactory) error {
if _, ok := r[name]; ok {
return fmt.Errorf("a plugin named %v already exists", name)
}
r[name] = factory
return nil
}
// Unregister removes an existing plugin from the registry. If no plugin with
// the provided name exists, it returns an error.
func (r Registry) Unregister(name string) error {
if _, ok := r[name]; !ok {
return fmt.Errorf("no plugin named %v exists", name)
}
delete(r, name)
return nil
}
// Merge merges the provided registry to the current one.
func (r Registry) Merge(in Registry) error {
for name, factory := range in {
if err := r.Register(name, factory); err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,164 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"fmt"
"sync"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
)
// waitingPodsMap a thread-safe map used to maintain pods waiting in the permit phase.
type waitingPodsMap struct {
pods map[types.UID]*waitingPod
mu sync.RWMutex
}
// newWaitingPodsMap returns a new waitingPodsMap.
func newWaitingPodsMap() *waitingPodsMap {
return &waitingPodsMap{
pods: make(map[types.UID]*waitingPod),
}
}
// add a new WaitingPod to the map.
func (m *waitingPodsMap) add(wp *waitingPod) {
m.mu.Lock()
defer m.mu.Unlock()
m.pods[wp.GetPod().UID] = wp
}
// remove a WaitingPod from the map.
func (m *waitingPodsMap) remove(uid types.UID) {
m.mu.Lock()
defer m.mu.Unlock()
delete(m.pods, uid)
}
// get a WaitingPod from the map.
func (m *waitingPodsMap) get(uid types.UID) *waitingPod {
m.mu.RLock()
defer m.mu.RUnlock()
return m.pods[uid]
}
// iterate acquires a read lock and iterates over the WaitingPods map.
func (m *waitingPodsMap) iterate(callback func(WaitingPod)) {
m.mu.RLock()
defer m.mu.RUnlock()
for _, v := range m.pods {
callback(v)
}
}
// waitingPod represents a pod waiting in the permit phase.
type waitingPod struct {
pod *v1.Pod
pendingPlugins map[string]*time.Timer
s chan *Status
mu sync.RWMutex
}
var _ WaitingPod = &waitingPod{}
// newWaitingPod returns a new waitingPod instance.
func newWaitingPod(pod *v1.Pod, pluginsMaxWaitTime map[string]time.Duration) *waitingPod {
wp := &waitingPod{
pod: pod,
// Allow() and Reject() calls are non-blocking. This property is guaranteed
// by using non-blocking send to this channel. This channel has a buffer of size 1
// to ensure that non-blocking send will not be ignored - possible situation when
// receiving from this channel happens after non-blocking send.
s: make(chan *Status, 1),
}
wp.pendingPlugins = make(map[string]*time.Timer, len(pluginsMaxWaitTime))
// The time.AfterFunc calls wp.Reject which iterates through pendingPlugins map. Acquire the
// lock here so that time.AfterFunc can only execute after newWaitingPod finishes.
wp.mu.Lock()
defer wp.mu.Unlock()
for k, v := range pluginsMaxWaitTime {
plugin, waitTime := k, v
wp.pendingPlugins[plugin] = time.AfterFunc(waitTime, func() {
msg := fmt.Sprintf("rejected due to timeout after waiting %v at plugin %v",
waitTime, plugin)
wp.Reject(msg)
})
}
return wp
}
// GetPod returns a reference to the waiting pod.
func (w *waitingPod) GetPod() *v1.Pod {
return w.pod
}
// GetPendingPlugins returns a list of pending permit plugin's name.
func (w *waitingPod) GetPendingPlugins() []string {
w.mu.RLock()
defer w.mu.RUnlock()
plugins := make([]string, 0, len(w.pendingPlugins))
for p := range w.pendingPlugins {
plugins = append(plugins, p)
}
return plugins
}
// Allow declares the waiting pod is allowed to be scheduled by plugin pluginName.
// If this is the last remaining plugin to allow, then a success signal is delivered
// to unblock the pod.
func (w *waitingPod) Allow(pluginName string) {
w.mu.Lock()
defer w.mu.Unlock()
if timer, exist := w.pendingPlugins[pluginName]; exist {
timer.Stop()
delete(w.pendingPlugins, pluginName)
}
// Only signal success status after all plugins have allowed
if len(w.pendingPlugins) != 0 {
return
}
// The select clause works as a non-blocking send.
// If there is no receiver, it's a no-op (default case).
select {
case w.s <- NewStatus(Success, ""):
default:
}
}
// Reject declares the waiting pod unschedulable.
func (w *waitingPod) Reject(msg string) {
w.mu.RLock()
defer w.mu.RUnlock()
for _, timer := range w.pendingPlugins {
timer.Stop()
}
// The select clause works as a non-blocking send.
// If there is no receiver, it's a no-op (default case).
select {
case w.s <- NewStatus(Unschedulable, msg):
default:
}
}

Some files were not shown because too many files have changed in this diff Show More