mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-13 18:43:34 +00:00
vendor files
This commit is contained in:
95
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/BUILD
generated
vendored
Normal file
95
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/BUILD
generated
vendored
Normal file
@ -0,0 +1,95 @@
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_library",
|
||||
"go_test",
|
||||
)
|
||||
|
||||
go_test(
|
||||
name = "go_default_test",
|
||||
srcs = [
|
||||
"eviction_manager_test.go",
|
||||
"helpers_test.go",
|
||||
],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/eviction",
|
||||
library = ":go_default_library",
|
||||
deps = [
|
||||
"//pkg/apis/core:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
|
||||
"//pkg/kubelet/cm:go_default_library",
|
||||
"//pkg/kubelet/eviction/api:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/quota:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/clock:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"doc.go",
|
||||
"eviction_manager.go",
|
||||
"helpers.go",
|
||||
"threshold_notifier_unsupported.go",
|
||||
"types.go",
|
||||
] + select({
|
||||
"@io_bazel_rules_go//go/platform:linux_amd64": [
|
||||
"threshold_notifier_linux.go",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/eviction",
|
||||
deps = [
|
||||
"//pkg/api/v1/resource:go_default_library",
|
||||
"//pkg/apis/core/v1/helper/qos:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
|
||||
"//pkg/kubelet/cm:go_default_library",
|
||||
"//pkg/kubelet/eviction/api:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/metrics:go_default_library",
|
||||
"//pkg/kubelet/pod:go_default_library",
|
||||
"//pkg/kubelet/server/stats:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/kubelet/util/format:go_default_library",
|
||||
"//plugin/pkg/scheduler/util:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/clock:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
||||
] + select({
|
||||
"@io_bazel_rules_go//go/platform:linux_amd64": [
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [
|
||||
":package-srcs",
|
||||
"//pkg/kubelet/eviction/api:all-srcs",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
)
|
4
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/OWNERS
generated
vendored
Normal file
4
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/OWNERS
generated
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
approvers:
|
||||
- derekwaynecarr
|
||||
- vishh
|
||||
- dchen1107
|
26
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/api/BUILD
generated
vendored
Normal file
26
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/api/BUILD
generated
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_library",
|
||||
)
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["types.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/eviction/api",
|
||||
deps = ["//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
)
|
100
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/api/types.go
generated
vendored
Normal file
100
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/api/types.go
generated
vendored
Normal file
@ -0,0 +1,100 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
)
|
||||
|
||||
// Signal defines a signal that can trigger eviction of pods on a node.
|
||||
type Signal string
|
||||
|
||||
const (
|
||||
// SignalMemoryAvailable is memory available (i.e. capacity - workingSet), in bytes.
|
||||
SignalMemoryAvailable Signal = "memory.available"
|
||||
// SignalNodeFsAvailable is amount of storage available on filesystem that kubelet uses for volumes, daemon logs, etc.
|
||||
SignalNodeFsAvailable Signal = "nodefs.available"
|
||||
// SignalNodeFsInodesFree is amount of inodes available on filesystem that kubelet uses for volumes, daemon logs, etc.
|
||||
SignalNodeFsInodesFree Signal = "nodefs.inodesFree"
|
||||
// SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images and container writable layers.
|
||||
SignalImageFsAvailable Signal = "imagefs.available"
|
||||
// SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images and container writeable layers.
|
||||
SignalImageFsInodesFree Signal = "imagefs.inodesFree"
|
||||
// SignalAllocatableMemoryAvailable is amount of memory available for pod allocation (i.e. allocatable - workingSet (of pods), in bytes.
|
||||
SignalAllocatableMemoryAvailable Signal = "allocatableMemory.available"
|
||||
// SignalAllocatableNodeFsAvailable is amount of local storage available for pod allocation
|
||||
SignalAllocatableNodeFsAvailable Signal = "allocatableNodeFs.available"
|
||||
)
|
||||
|
||||
// ThresholdOperator is the operator used to express a Threshold.
|
||||
type ThresholdOperator string
|
||||
|
||||
const (
|
||||
// OpLessThan is the operator that expresses a less than operator.
|
||||
OpLessThan ThresholdOperator = "LessThan"
|
||||
)
|
||||
|
||||
// OpForSignal maps Signals to ThresholdOperators.
|
||||
// Today, the only supported operator is "LessThan". This may change in the future,
|
||||
// for example if "consumed" (as opposed to "available") type signals are added.
|
||||
// In both cases the directionality of the threshold is implicit to the signal type
|
||||
// (for a given signal, the decision to evict will be made when crossing the threshold
|
||||
// from either above or below, never both). There is thus no reason to expose the
|
||||
// operator in the Kubelet's public API. Instead, we internally map signal types to operators.
|
||||
var OpForSignal = map[Signal]ThresholdOperator{
|
||||
SignalMemoryAvailable: OpLessThan,
|
||||
SignalNodeFsAvailable: OpLessThan,
|
||||
SignalNodeFsInodesFree: OpLessThan,
|
||||
SignalImageFsAvailable: OpLessThan,
|
||||
SignalImageFsInodesFree: OpLessThan,
|
||||
SignalAllocatableMemoryAvailable: OpLessThan,
|
||||
SignalAllocatableNodeFsAvailable: OpLessThan,
|
||||
}
|
||||
|
||||
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity
|
||||
type ThresholdValue struct {
|
||||
// The following fields are exclusive. Only the topmost non-zero field is used.
|
||||
|
||||
// Quantity is a quantity associated with the signal that is evaluated against the specified operator.
|
||||
Quantity *resource.Quantity
|
||||
// Percentage represents the usage percentage over the total resource that is evaluated against the specified operator.
|
||||
Percentage float32
|
||||
}
|
||||
|
||||
// Threshold defines a metric for when eviction should occur.
|
||||
type Threshold struct {
|
||||
// Signal defines the entity that was measured.
|
||||
Signal Signal
|
||||
// Operator represents a relationship of a signal to a value.
|
||||
Operator ThresholdOperator
|
||||
// Value is the threshold the resource is evaluated against.
|
||||
Value ThresholdValue
|
||||
// GracePeriod represents the amount of time that a threshold must be met before eviction is triggered.
|
||||
GracePeriod time.Duration
|
||||
// MinReclaim represents the minimum amount of resource to reclaim if the threshold is met.
|
||||
MinReclaim *ThresholdValue
|
||||
}
|
||||
|
||||
// GetThresholdQuantity returns the expected quantity value for a thresholdValue
|
||||
func GetThresholdQuantity(value ThresholdValue, capacity *resource.Quantity) *resource.Quantity {
|
||||
if value.Quantity != nil {
|
||||
return value.Quantity.Copy()
|
||||
}
|
||||
return resource.NewQuantity(int64(float64(capacity.Value())*float64(value.Percentage)), resource.BinarySI)
|
||||
}
|
19
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/doc.go
generated
vendored
Normal file
19
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/doc.go
generated
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package eviction is responsible for enforcing eviction thresholds to maintain
|
||||
// node stability.
|
||||
package eviction // import "k8s.io/kubernetes/pkg/kubelet/eviction"
|
584
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/eviction_manager.go
generated
vendored
Normal file
584
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/eviction_manager.go
generated
vendored
Normal file
@ -0,0 +1,584 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package eviction
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/util/clock"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/record"
|
||||
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
|
||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
|
||||
"k8s.io/kubernetes/pkg/kubelet/server/stats"
|
||||
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
||||
)
|
||||
|
||||
const (
|
||||
podCleanupTimeout = 30 * time.Second
|
||||
podCleanupPollFreq = time.Second
|
||||
)
|
||||
|
||||
// managerImpl implements Manager
|
||||
type managerImpl struct {
|
||||
// used to track time
|
||||
clock clock.Clock
|
||||
// config is how the manager is configured
|
||||
config Config
|
||||
// the function to invoke to kill a pod
|
||||
killPodFunc KillPodFunc
|
||||
// the interface that knows how to do image gc
|
||||
imageGC ImageGC
|
||||
// the interface that knows how to do image gc
|
||||
containerGC ContainerGC
|
||||
// protects access to internal state
|
||||
sync.RWMutex
|
||||
// node conditions are the set of conditions present
|
||||
nodeConditions []v1.NodeConditionType
|
||||
// captures when a node condition was last observed based on a threshold being met
|
||||
nodeConditionsLastObservedAt nodeConditionsObservedAt
|
||||
// nodeRef is a reference to the node
|
||||
nodeRef *v1.ObjectReference
|
||||
// used to record events about the node
|
||||
recorder record.EventRecorder
|
||||
// used to measure usage stats on system
|
||||
summaryProvider stats.SummaryProvider
|
||||
// records when a threshold was first observed
|
||||
thresholdsFirstObservedAt thresholdsObservedAt
|
||||
// records the set of thresholds that have been met (including graceperiod) but not yet resolved
|
||||
thresholdsMet []evictionapi.Threshold
|
||||
// resourceToRankFunc maps a resource to ranking function for that resource.
|
||||
resourceToRankFunc map[v1.ResourceName]rankFunc
|
||||
// resourceToNodeReclaimFuncs maps a resource to an ordered list of functions that know how to reclaim that resource.
|
||||
resourceToNodeReclaimFuncs map[v1.ResourceName]nodeReclaimFuncs
|
||||
// last observations from synchronize
|
||||
lastObservations signalObservations
|
||||
// notifiersInitialized indicates if the threshold notifiers have been initialized (i.e. synchronize() has been called once)
|
||||
notifiersInitialized bool
|
||||
// dedicatedImageFs indicates if imagefs is on a separate device from the rootfs
|
||||
dedicatedImageFs *bool
|
||||
}
|
||||
|
||||
// ensure it implements the required interface
|
||||
var _ Manager = &managerImpl{}
|
||||
|
||||
// NewManager returns a configured Manager and an associated admission handler to enforce eviction configuration.
|
||||
func NewManager(
|
||||
summaryProvider stats.SummaryProvider,
|
||||
config Config,
|
||||
killPodFunc KillPodFunc,
|
||||
imageGC ImageGC,
|
||||
containerGC ContainerGC,
|
||||
recorder record.EventRecorder,
|
||||
nodeRef *v1.ObjectReference,
|
||||
clock clock.Clock) (Manager, lifecycle.PodAdmitHandler) {
|
||||
manager := &managerImpl{
|
||||
clock: clock,
|
||||
killPodFunc: killPodFunc,
|
||||
imageGC: imageGC,
|
||||
containerGC: containerGC,
|
||||
config: config,
|
||||
recorder: recorder,
|
||||
summaryProvider: summaryProvider,
|
||||
nodeRef: nodeRef,
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
dedicatedImageFs: nil,
|
||||
}
|
||||
return manager, manager
|
||||
}
|
||||
|
||||
// Admit rejects a pod if its not safe to admit for node stability.
|
||||
func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
if len(m.nodeConditions) == 0 {
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
}
|
||||
// Admit Critical pods even under resource pressure since they are required for system stability.
|
||||
// https://github.com/kubernetes/kubernetes/issues/40573 has more details.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) && kubelettypes.IsCriticalPod(attrs.Pod) {
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
}
|
||||
// the node has memory pressure, admit if not best-effort
|
||||
if hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure) {
|
||||
notBestEffort := v1.PodQOSBestEffort != v1qos.GetPodQOS(attrs.Pod)
|
||||
if notBestEffort {
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
}
|
||||
}
|
||||
|
||||
// reject pods when under memory pressure (if pod is best effort), or if under disk pressure.
|
||||
glog.Warningf("Failed to admit pod %s - node has conditions: %v", format.Pod(attrs.Pod), m.nodeConditions)
|
||||
return lifecycle.PodAdmitResult{
|
||||
Admit: false,
|
||||
Reason: reason,
|
||||
Message: fmt.Sprintf(message, m.nodeConditions),
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts the control loop to observe and response to low compute resources.
|
||||
func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, capacityProvider CapacityProvider, monitoringInterval time.Duration) {
|
||||
// start the eviction manager monitoring
|
||||
go func() {
|
||||
for {
|
||||
if evictedPods := m.synchronize(diskInfoProvider, podFunc, capacityProvider); evictedPods != nil {
|
||||
glog.Infof("eviction manager: pods %s evicted, waiting for pod to be cleaned up", format.Pods(evictedPods))
|
||||
m.waitForPodsCleanup(podCleanedUpFunc, evictedPods)
|
||||
} else {
|
||||
time.Sleep(monitoringInterval)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// IsUnderMemoryPressure returns true if the node is under memory pressure.
|
||||
func (m *managerImpl) IsUnderMemoryPressure() bool {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
return hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure)
|
||||
}
|
||||
|
||||
// IsUnderDiskPressure returns true if the node is under disk pressure.
|
||||
func (m *managerImpl) IsUnderDiskPressure() bool {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
return hasNodeCondition(m.nodeConditions, v1.NodeDiskPressure)
|
||||
}
|
||||
|
||||
func startMemoryThresholdNotifier(thresholds []evictionapi.Threshold, observations signalObservations, hard bool, handler thresholdNotifierHandlerFunc) error {
|
||||
for _, threshold := range thresholds {
|
||||
if threshold.Signal != evictionapi.SignalMemoryAvailable || hard != isHardEvictionThreshold(threshold) {
|
||||
continue
|
||||
}
|
||||
observed, found := observations[evictionapi.SignalMemoryAvailable]
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
cgroups, err := cm.GetCgroupSubsystems()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO add support for eviction from --cgroup-root
|
||||
cgpath, found := cgroups.MountPoints["memory"]
|
||||
if !found || len(cgpath) == 0 {
|
||||
return fmt.Errorf("memory cgroup mount point not found")
|
||||
}
|
||||
attribute := "memory.usage_in_bytes"
|
||||
quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity)
|
||||
usageThreshold := resource.NewQuantity(observed.capacity.Value(), resource.DecimalSI)
|
||||
usageThreshold.Sub(*quantity)
|
||||
description := fmt.Sprintf("<%s available", formatThresholdValue(threshold.Value))
|
||||
memcgThresholdNotifier, err := NewMemCGThresholdNotifier(cgpath, attribute, usageThreshold.String(), description, handler)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
go memcgThresholdNotifier.Start(wait.NeverStop)
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// synchronize is the main control loop that enforces eviction thresholds.
|
||||
// Returns the pod that was killed, or nil if no pod was killed.
|
||||
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, capacityProvider CapacityProvider) []*v1.Pod {
|
||||
// if we have nothing to do, just return
|
||||
thresholds := m.config.Thresholds
|
||||
if len(thresholds) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
glog.V(3).Infof("eviction manager: synchronize housekeeping")
|
||||
// build the ranking functions (if not yet known)
|
||||
// TODO: have a function in cadvisor that lets us know if global housekeeping has completed
|
||||
if m.dedicatedImageFs == nil {
|
||||
hasImageFs, ok := diskInfoProvider.HasDedicatedImageFs()
|
||||
if ok != nil {
|
||||
return nil
|
||||
}
|
||||
m.dedicatedImageFs = &hasImageFs
|
||||
m.resourceToRankFunc = buildResourceToRankFunc(hasImageFs)
|
||||
m.resourceToNodeReclaimFuncs = buildResourceToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs)
|
||||
}
|
||||
|
||||
activePods := podFunc()
|
||||
// make observations and get a function to derive pod usage stats relative to those observations.
|
||||
observations, statsFunc, err := makeSignalObservations(m.summaryProvider, capacityProvider, activePods)
|
||||
if err != nil {
|
||||
glog.Errorf("eviction manager: unexpected err: %v", err)
|
||||
return nil
|
||||
}
|
||||
debugLogObservations("observations", observations)
|
||||
|
||||
// attempt to create a threshold notifier to improve eviction response time
|
||||
if m.config.KernelMemcgNotification && !m.notifiersInitialized {
|
||||
glog.Infof("eviction manager attempting to integrate with kernel memcg notification api")
|
||||
m.notifiersInitialized = true
|
||||
// start soft memory notification
|
||||
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) {
|
||||
glog.Infof("soft memory eviction threshold crossed at %s", desc)
|
||||
// TODO wait grace period for soft memory limit
|
||||
m.synchronize(diskInfoProvider, podFunc, capacityProvider)
|
||||
})
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: failed to create hard memory threshold notifier: %v", err)
|
||||
}
|
||||
// start hard memory notification
|
||||
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, true, func(desc string) {
|
||||
glog.Infof("hard memory eviction threshold crossed at %s", desc)
|
||||
m.synchronize(diskInfoProvider, podFunc, capacityProvider)
|
||||
})
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: failed to create soft memory threshold notifier: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// determine the set of thresholds met independent of grace period
|
||||
thresholds = thresholdsMet(thresholds, observations, false)
|
||||
debugLogThresholdsWithObservation("thresholds - ignoring grace period", thresholds, observations)
|
||||
|
||||
// determine the set of thresholds previously met that have not yet satisfied the associated min-reclaim
|
||||
if len(m.thresholdsMet) > 0 {
|
||||
thresholdsNotYetResolved := thresholdsMet(m.thresholdsMet, observations, true)
|
||||
thresholds = mergeThresholds(thresholds, thresholdsNotYetResolved)
|
||||
}
|
||||
debugLogThresholdsWithObservation("thresholds - reclaim not satisfied", thresholds, observations)
|
||||
|
||||
// track when a threshold was first observed
|
||||
now := m.clock.Now()
|
||||
thresholdsFirstObservedAt := thresholdsFirstObservedAt(thresholds, m.thresholdsFirstObservedAt, now)
|
||||
|
||||
// the set of node conditions that are triggered by currently observed thresholds
|
||||
nodeConditions := nodeConditions(thresholds)
|
||||
if len(nodeConditions) > 0 {
|
||||
glog.V(3).Infof("eviction manager: node conditions - observed: %v", nodeConditions)
|
||||
}
|
||||
|
||||
// track when a node condition was last observed
|
||||
nodeConditionsLastObservedAt := nodeConditionsLastObservedAt(nodeConditions, m.nodeConditionsLastObservedAt, now)
|
||||
|
||||
// node conditions report true if it has been observed within the transition period window
|
||||
nodeConditions = nodeConditionsObservedSince(nodeConditionsLastObservedAt, m.config.PressureTransitionPeriod, now)
|
||||
if len(nodeConditions) > 0 {
|
||||
glog.V(3).Infof("eviction manager: node conditions - transition period not met: %v", nodeConditions)
|
||||
}
|
||||
|
||||
// determine the set of thresholds we need to drive eviction behavior (i.e. all grace periods are met)
|
||||
thresholds = thresholdsMetGracePeriod(thresholdsFirstObservedAt, now)
|
||||
debugLogThresholdsWithObservation("thresholds - grace periods satisified", thresholds, observations)
|
||||
|
||||
// update internal state
|
||||
m.Lock()
|
||||
m.nodeConditions = nodeConditions
|
||||
m.thresholdsFirstObservedAt = thresholdsFirstObservedAt
|
||||
m.nodeConditionsLastObservedAt = nodeConditionsLastObservedAt
|
||||
m.thresholdsMet = thresholds
|
||||
|
||||
// determine the set of thresholds whose stats have been updated since the last sync
|
||||
thresholds = thresholdsUpdatedStats(thresholds, observations, m.lastObservations)
|
||||
debugLogThresholdsWithObservation("thresholds - updated stats", thresholds, observations)
|
||||
|
||||
m.lastObservations = observations
|
||||
m.Unlock()
|
||||
|
||||
// evict pods if there is a resource usage violation from local volume temporary storage
|
||||
// If eviction happens in localVolumeEviction function, skip the rest of eviction action
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
if evictedPods := m.localStorageEviction(activePods); len(evictedPods) > 0 {
|
||||
return evictedPods
|
||||
}
|
||||
}
|
||||
|
||||
// determine the set of resources under starvation
|
||||
starvedResources := getStarvedResources(thresholds)
|
||||
if len(starvedResources) == 0 {
|
||||
glog.V(3).Infof("eviction manager: no resources are starved")
|
||||
return nil
|
||||
}
|
||||
|
||||
// rank the resources to reclaim by eviction priority
|
||||
sort.Sort(byEvictionPriority(starvedResources))
|
||||
resourceToReclaim := starvedResources[0]
|
||||
glog.Warningf("eviction manager: attempting to reclaim %v", resourceToReclaim)
|
||||
|
||||
// determine if this is a soft or hard eviction associated with the resource
|
||||
softEviction := isSoftEvictionThresholds(thresholds, resourceToReclaim)
|
||||
|
||||
// record an event about the resources we are now attempting to reclaim via eviction
|
||||
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
|
||||
|
||||
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
|
||||
if m.reclaimNodeLevelResources(resourceToReclaim, observations) {
|
||||
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
|
||||
return nil
|
||||
}
|
||||
|
||||
glog.Infof("eviction manager: must evict pod(s) to reclaim %v", resourceToReclaim)
|
||||
|
||||
// rank the pods for eviction
|
||||
rank, ok := m.resourceToRankFunc[resourceToReclaim]
|
||||
if !ok {
|
||||
glog.Errorf("eviction manager: no ranking function for resource %s", resourceToReclaim)
|
||||
return nil
|
||||
}
|
||||
|
||||
// the only candidates viable for eviction are those pods that had anything running.
|
||||
if len(activePods) == 0 {
|
||||
glog.Errorf("eviction manager: eviction thresholds have been met, but no pods are active to evict")
|
||||
return nil
|
||||
}
|
||||
|
||||
// rank the running pods for eviction for the specified resource
|
||||
rank(activePods, statsFunc)
|
||||
|
||||
glog.Infof("eviction manager: pods ranked for eviction: %s", format.Pods(activePods))
|
||||
|
||||
//record age of metrics for met thresholds that we are using for evictions.
|
||||
for _, t := range thresholds {
|
||||
timeObserved := observations[t.Signal].time
|
||||
if !timeObserved.IsZero() {
|
||||
metrics.EvictionStatsAge.WithLabelValues(string(t.Signal)).Observe(metrics.SinceInMicroseconds(timeObserved.Time))
|
||||
}
|
||||
}
|
||||
|
||||
// we kill at most a single pod during each eviction interval
|
||||
for i := range activePods {
|
||||
pod := activePods[i]
|
||||
// If the pod is marked as critical and static, and support for critical pod annotations is enabled,
|
||||
// do not evict such pods. Static pods are not re-admitted after evictions.
|
||||
// https://github.com/kubernetes/kubernetes/issues/40573 has more details.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
|
||||
kubelettypes.IsCriticalPod(pod) && kubepod.IsStaticPod(pod) {
|
||||
continue
|
||||
}
|
||||
status := v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
Message: fmt.Sprintf(message, resourceToReclaim),
|
||||
Reason: reason,
|
||||
}
|
||||
// record that we are evicting the pod
|
||||
m.recorder.Eventf(pod, v1.EventTypeWarning, reason, fmt.Sprintf(message, resourceToReclaim))
|
||||
gracePeriodOverride := int64(0)
|
||||
if softEviction {
|
||||
gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
|
||||
}
|
||||
// this is a blocking call and should only return when the pod and its containers are killed.
|
||||
err := m.killPodFunc(pod, status, &gracePeriodOverride)
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: error while evicting pod %s: %v", format.Pod(pod), err)
|
||||
}
|
||||
return []*v1.Pod{pod}
|
||||
}
|
||||
glog.Infof("eviction manager: unable to evict any pods from the node")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods []*v1.Pod) {
|
||||
timeout := m.clock.NewTimer(podCleanupTimeout)
|
||||
tick := m.clock.Tick(podCleanupPollFreq)
|
||||
for {
|
||||
select {
|
||||
case <-timeout.C():
|
||||
glog.Warningf("eviction manager: timed out waiting for pods %s to be cleaned up", format.Pods(pods))
|
||||
return
|
||||
case <-tick:
|
||||
for i, pod := range pods {
|
||||
if !podCleanedUpFunc(pod) {
|
||||
break
|
||||
}
|
||||
if i == len(pods)-1 {
|
||||
glog.Infof("eviction manager: pods %s successfully cleaned up", format.Pods(pods))
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
|
||||
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, observations signalObservations) bool {
|
||||
nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim]
|
||||
for _, nodeReclaimFunc := range nodeReclaimFuncs {
|
||||
// attempt to reclaim the pressured resource.
|
||||
reclaimed, err := nodeReclaimFunc()
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: unexpected error when attempting to reduce %v pressure: %v", resourceToReclaim, err)
|
||||
}
|
||||
// update our local observations based on the amount reported to have been reclaimed.
|
||||
// note: this is optimistic, other things could have been still consuming the pressured resource in the interim.
|
||||
for _, signal := range resourceClaimToSignal[resourceToReclaim] {
|
||||
value, ok := observations[signal]
|
||||
if !ok {
|
||||
glog.Errorf("eviction manager: unable to find value associated with signal %v", signal)
|
||||
continue
|
||||
}
|
||||
value.available.Add(*reclaimed)
|
||||
}
|
||||
// evaluate all current thresholds to see if with adjusted observations, we think we have met min reclaim goals
|
||||
if len(thresholdsMet(m.thresholdsMet, observations, true)) == 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// localStorageEviction checks the EmptyDir volume usage for each pod and determine whether it exceeds the specified limit and needs
|
||||
// to be evicted. It also checks every container in the pod, if the container overlay usage exceeds the limit, the pod will be evicted too.
|
||||
func (m *managerImpl) localStorageEviction(pods []*v1.Pod) []*v1.Pod {
|
||||
summary, err := m.summaryProvider.Get()
|
||||
if err != nil {
|
||||
glog.Errorf("Could not get summary provider")
|
||||
return nil
|
||||
}
|
||||
|
||||
statsFunc := cachedStatsFunc(summary.Pods)
|
||||
evicted := []*v1.Pod{}
|
||||
for _, pod := range pods {
|
||||
podStats, ok := statsFunc(pod)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if m.emptyDirLimitEviction(podStats, pod) {
|
||||
evicted = append(evicted, pod)
|
||||
continue
|
||||
}
|
||||
|
||||
if m.podEphemeralStorageLimitEviction(podStats, pod) {
|
||||
evicted = append(evicted, pod)
|
||||
continue
|
||||
}
|
||||
|
||||
if m.containerEphemeralStorageLimitEviction(podStats, pod) {
|
||||
evicted = append(evicted, pod)
|
||||
}
|
||||
}
|
||||
|
||||
return evicted
|
||||
}
|
||||
|
||||
func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
|
||||
podVolumeUsed := make(map[string]*resource.Quantity)
|
||||
for _, volume := range podStats.VolumeStats {
|
||||
podVolumeUsed[volume.Name] = resource.NewQuantity(int64(*volume.UsedBytes), resource.BinarySI)
|
||||
}
|
||||
for i := range pod.Spec.Volumes {
|
||||
source := &pod.Spec.Volumes[i].VolumeSource
|
||||
if source.EmptyDir != nil {
|
||||
size := source.EmptyDir.SizeLimit
|
||||
used := podVolumeUsed[pod.Spec.Volumes[i].Name]
|
||||
if used != nil && size != nil && size.Sign() == 1 && used.Cmp(*size) > 0 {
|
||||
// the emptyDir usage exceeds the size limit, evict the pod
|
||||
return m.evictPod(pod, v1.ResourceName("EmptyDir"), fmt.Sprintf("emptyDir usage exceeds the limit %q", size.String()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
|
||||
_, podLimits := apiv1resource.PodRequestsAndLimits(pod)
|
||||
_, found := podLimits[v1.ResourceEphemeralStorage]
|
||||
if !found {
|
||||
return false
|
||||
}
|
||||
|
||||
podEphemeralStorageTotalUsage := &resource.Quantity{}
|
||||
fsStatsSet := []fsStatsType{}
|
||||
if *m.dedicatedImageFs {
|
||||
fsStatsSet = []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}
|
||||
} else {
|
||||
fsStatsSet = []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}
|
||||
}
|
||||
podEphemeralUsage, err := podLocalEphemeralStorageUsage(podStats, pod, fsStatsSet)
|
||||
if err != nil {
|
||||
glog.Errorf("eviction manager: error getting pod disk usage %v", err)
|
||||
return false
|
||||
}
|
||||
|
||||
podEphemeralStorageTotalUsage.Add(podEphemeralUsage[resourceDisk])
|
||||
if podEphemeralStorageTotalUsage.Cmp(podLimits[v1.ResourceEphemeralStorage]) > 0 {
|
||||
// the total usage of pod exceeds the total size limit of containers, evict the pod
|
||||
return m.evictPod(pod, v1.ResourceEphemeralStorage, fmt.Sprintf("pod ephemeral local storage usage exceeds the total limit of containers %v", podLimits[v1.ResourceEphemeralStorage]))
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
|
||||
thresholdsMap := make(map[string]*resource.Quantity)
|
||||
for _, container := range pod.Spec.Containers {
|
||||
ephemeralLimit := container.Resources.Limits.StorageEphemeral()
|
||||
if ephemeralLimit != nil && ephemeralLimit.Value() != 0 {
|
||||
thresholdsMap[container.Name] = ephemeralLimit
|
||||
}
|
||||
}
|
||||
|
||||
for _, containerStat := range podStats.Containers {
|
||||
containerUsed := diskUsage(containerStat.Logs)
|
||||
if !*m.dedicatedImageFs {
|
||||
containerUsed.Add(*diskUsage(containerStat.Rootfs))
|
||||
}
|
||||
|
||||
if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
|
||||
if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
|
||||
return m.evictPod(pod, v1.ResourceEphemeralStorage, fmt.Sprintf("container's ephemeral local storage usage exceeds the limit %q", ephemeralStorageThreshold.String()))
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *managerImpl) evictPod(pod *v1.Pod, resourceName v1.ResourceName, evictMsg string) bool {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
|
||||
kubelettypes.IsCriticalPod(pod) && kubepod.IsStaticPod(pod) {
|
||||
glog.Errorf("eviction manager: cannot evict a critical pod %s", format.Pod(pod))
|
||||
return false
|
||||
}
|
||||
status := v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
Message: fmt.Sprintf(message, resourceName),
|
||||
Reason: reason,
|
||||
}
|
||||
// record that we are evicting the pod
|
||||
m.recorder.Eventf(pod, v1.EventTypeWarning, reason, evictMsg)
|
||||
gracePeriod := int64(0)
|
||||
err := m.killPodFunc(pod, status, &gracePeriod)
|
||||
if err != nil {
|
||||
glog.Errorf("eviction manager: pod %s failed to evict %v", format.Pod(pod), err)
|
||||
} else {
|
||||
glog.Infof("eviction manager: pod %s is evicted successfully", format.Pod(pod))
|
||||
}
|
||||
return true
|
||||
}
|
1445
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/eviction_manager_test.go
generated
vendored
Normal file
1445
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/eviction_manager_test.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1088
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/helpers.go
generated
vendored
Normal file
1088
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/helpers.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1914
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/helpers_test.go
generated
vendored
Normal file
1914
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/helpers_test.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
117
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/threshold_notifier_linux.go
generated
vendored
Normal file
117
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/threshold_notifier_linux.go
generated
vendored
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package eviction
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type memcgThresholdNotifier struct {
|
||||
watchfd int
|
||||
controlfd int
|
||||
eventfd int
|
||||
handler thresholdNotifierHandlerFunc
|
||||
description string
|
||||
}
|
||||
|
||||
var _ ThresholdNotifier = &memcgThresholdNotifier{}
|
||||
|
||||
// NewMemCGThresholdNotifier sends notifications when a cgroup threshold
|
||||
// is crossed (in either direction) for a given cgroup attribute
|
||||
func NewMemCGThresholdNotifier(path, attribute, threshold, description string, handler thresholdNotifierHandlerFunc) (ThresholdNotifier, error) {
|
||||
watchfd, err := unix.Open(fmt.Sprintf("%s/%s", path, attribute), unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
unix.Close(watchfd)
|
||||
}
|
||||
}()
|
||||
controlfd, err := unix.Open(fmt.Sprintf("%s/cgroup.event_control", path), unix.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
unix.Close(controlfd)
|
||||
}
|
||||
}()
|
||||
eventfd, err := unix.Eventfd(0, unix.EFD_CLOEXEC)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if eventfd < 0 {
|
||||
err = fmt.Errorf("eventfd call failed")
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
unix.Close(eventfd)
|
||||
}
|
||||
}()
|
||||
glog.V(2).Infof("eviction: setting notification threshold to %s", threshold)
|
||||
config := fmt.Sprintf("%d %d %s", eventfd, watchfd, threshold)
|
||||
_, err = unix.Write(controlfd, []byte(config))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &memcgThresholdNotifier{
|
||||
watchfd: watchfd,
|
||||
controlfd: controlfd,
|
||||
eventfd: eventfd,
|
||||
handler: handler,
|
||||
description: description,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getThresholdEvents(eventfd int, eventCh chan<- struct{}, stopCh <-chan struct{}) {
|
||||
for {
|
||||
buf := make([]byte, 8)
|
||||
_, err := unix.Read(eventfd, buf)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case eventCh <- struct{}{}:
|
||||
case <-stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *memcgThresholdNotifier) Start(stopCh <-chan struct{}) {
|
||||
eventCh := make(chan struct{})
|
||||
go getThresholdEvents(n.eventfd, eventCh, stopCh)
|
||||
for {
|
||||
select {
|
||||
case <-stopCh:
|
||||
glog.V(2).Infof("eviction: stopping threshold notifier")
|
||||
unix.Close(n.watchfd)
|
||||
unix.Close(n.controlfd)
|
||||
unix.Close(n.eventfd)
|
||||
return
|
||||
case <-eventCh:
|
||||
glog.V(2).Infof("eviction: threshold crossed")
|
||||
n.handler(n.description)
|
||||
}
|
||||
}
|
||||
}
|
27
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/threshold_notifier_unsupported.go
generated
vendored
Normal file
27
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/threshold_notifier_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
// +build !linux
|
||||
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package eviction
|
||||
|
||||
import "fmt"
|
||||
|
||||
// NewMemCGThresholdNotifier sends notifications when a cgroup threshold
|
||||
// is crossed (in either direction) for a given cgroup attribute
|
||||
func NewMemCGThresholdNotifier(path, attribute, threshold, description string, handler thresholdNotifierHandlerFunc) (ThresholdNotifier, error) {
|
||||
return nil, fmt.Errorf("threshold notification not supported")
|
||||
}
|
147
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/types.go
generated
vendored
Normal file
147
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/types.go
generated
vendored
Normal file
@ -0,0 +1,147 @@
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package eviction
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
)
|
||||
|
||||
// fsStatsType defines the types of filesystem stats to collect.
|
||||
type fsStatsType string
|
||||
|
||||
const (
|
||||
// fsStatsLocalVolumeSource identifies stats for pod local volume sources.
|
||||
fsStatsLocalVolumeSource fsStatsType = "localVolumeSource"
|
||||
// fsStatsLogs identifies stats for pod logs.
|
||||
fsStatsLogs fsStatsType = "logs"
|
||||
// fsStatsRoot identifies stats for pod container writable layers.
|
||||
fsStatsRoot fsStatsType = "root"
|
||||
)
|
||||
|
||||
// Config holds information about how eviction is configured.
|
||||
type Config struct {
|
||||
// PressureTransitionPeriod is duration the kubelet has to wait before transititioning out of a pressure condition.
|
||||
PressureTransitionPeriod time.Duration
|
||||
// Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met.
|
||||
MaxPodGracePeriodSeconds int64
|
||||
// Thresholds define the set of conditions monitored to trigger eviction.
|
||||
Thresholds []evictionapi.Threshold
|
||||
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
|
||||
KernelMemcgNotification bool
|
||||
}
|
||||
|
||||
// Manager evaluates when an eviction threshold for node stability has been met on the node.
|
||||
type Manager interface {
|
||||
// Start starts the control loop to monitor eviction thresholds at specified interval.
|
||||
Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, capacityProvider CapacityProvider, monitoringInterval time.Duration)
|
||||
|
||||
// IsUnderMemoryPressure returns true if the node is under memory pressure.
|
||||
IsUnderMemoryPressure() bool
|
||||
|
||||
// IsUnderDiskPressure returns true if the node is under disk pressure.
|
||||
IsUnderDiskPressure() bool
|
||||
}
|
||||
|
||||
// DiskInfoProvider is responsible for informing the manager how disk is configured.
|
||||
type DiskInfoProvider interface {
|
||||
// HasDedicatedImageFs returns true if the imagefs is on a separate device from the rootfs.
|
||||
HasDedicatedImageFs() (bool, error)
|
||||
}
|
||||
|
||||
// CapacityProvider is responsible for providing the resource capacity and reservation information
|
||||
type CapacityProvider interface {
|
||||
// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
|
||||
GetCapacity() v1.ResourceList
|
||||
// GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling.
|
||||
GetNodeAllocatableReservation() v1.ResourceList
|
||||
}
|
||||
|
||||
// ImageGC is responsible for performing garbage collection of unused images.
|
||||
type ImageGC interface {
|
||||
// DeleteUnusedImages deletes unused images and returns the number of bytes freed, and an error.
|
||||
// This returns the bytes freed even if an error is returned.
|
||||
DeleteUnusedImages() (int64, error)
|
||||
}
|
||||
|
||||
// ContainerGC is responsible for performing garbage collection of unused containers.
|
||||
type ContainerGC interface {
|
||||
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
|
||||
// It returns an error if it is unsuccessful.
|
||||
DeleteAllUnusedContainers() error
|
||||
}
|
||||
|
||||
// KillPodFunc kills a pod.
|
||||
// The pod status is updated, and then it is killed with the specified grace period.
|
||||
// This function must block until either the pod is killed or an error is encountered.
|
||||
// Arguments:
|
||||
// pod - the pod to kill
|
||||
// status - the desired status to associate with the pod (i.e. why its killed)
|
||||
// gracePeriodOverride - the grace period override to use instead of what is on the pod spec
|
||||
type KillPodFunc func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error
|
||||
|
||||
// ActivePodsFunc returns pods bound to the kubelet that are active (i.e. non-terminal state)
|
||||
type ActivePodsFunc func() []*v1.Pod
|
||||
|
||||
// PodCleanedUpFunc returns true if all resources associated with a pod have been reclaimed.
|
||||
type PodCleanedUpFunc func(*v1.Pod) bool
|
||||
|
||||
// statsFunc returns the usage stats if known for an input pod.
|
||||
type statsFunc func(pod *v1.Pod) (statsapi.PodStats, bool)
|
||||
|
||||
// rankFunc sorts the pods in eviction order
|
||||
type rankFunc func(pods []*v1.Pod, stats statsFunc)
|
||||
|
||||
// signalObservation is the observed resource usage
|
||||
type signalObservation struct {
|
||||
// The resource capacity
|
||||
capacity *resource.Quantity
|
||||
// The available resource
|
||||
available *resource.Quantity
|
||||
// Time at which the observation was taken
|
||||
time metav1.Time
|
||||
}
|
||||
|
||||
// signalObservations maps a signal to an observed quantity
|
||||
type signalObservations map[evictionapi.Signal]signalObservation
|
||||
|
||||
// thresholdsObservedAt maps a threshold to a time that it was observed
|
||||
type thresholdsObservedAt map[evictionapi.Threshold]time.Time
|
||||
|
||||
// nodeConditionsObservedAt maps a node condition to a time that it was observed
|
||||
type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time
|
||||
|
||||
// nodeReclaimFunc is a function that knows how to reclaim a resource from the node without impacting pods.
|
||||
// Returns the quantity of resources reclaimed and an error, if applicable.
|
||||
// nodeReclaimFunc return the resources reclaimed even if an error occurs.
|
||||
type nodeReclaimFunc func() (*resource.Quantity, error)
|
||||
|
||||
// nodeReclaimFuncs is an ordered list of nodeReclaimFunc
|
||||
type nodeReclaimFuncs []nodeReclaimFunc
|
||||
|
||||
// thresholdNotifierHandlerFunc is a function that takes action in response to a crossed threshold
|
||||
type thresholdNotifierHandlerFunc func(thresholdDescription string)
|
||||
|
||||
// ThresholdNotifier notifies the user when an attribute crosses a threshold value
|
||||
type ThresholdNotifier interface {
|
||||
Start(stopCh <-chan struct{})
|
||||
}
|
Reference in New Issue
Block a user