vendor cleanup: remove unused,non-go and test files

This commit is contained in:
Madhu Rajanna
2019-01-16 00:05:52 +05:30
parent 52cf4aa902
commit b10ba188e7
15421 changed files with 17 additions and 4208853 deletions

View File

@ -1,126 +0,0 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_test(
name = "go_default_test",
srcs = [
"eviction_manager_test.go",
"helpers_test.go",
"memory_threshold_notifier_test.go",
"mock_threshold_notifier_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//vendor/github.com/stretchr/testify/mock:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/clock:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"eviction_manager.go",
"helpers.go",
"memory_threshold_notifier.go",
"types.go",
] + select({
"@io_bazel_rules_go//go/platform:android": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:darwin": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:dragonfly": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:freebsd": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:linux": [
"threshold_notifier_linux.go",
],
"@io_bazel_rules_go//go/platform:nacl": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:netbsd": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:openbsd": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:plan9": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:solaris": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:windows": [
"threshold_notifier_unsupported.go",
],
"//conditions:default": [],
}),
importpath = "k8s.io/kubernetes/pkg/kubelet/eviction",
deps = [
"//pkg/api/v1/resource:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/apis/core/v1/helper/qos:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/pod:go_default_library",
"//pkg/kubelet/server/stats:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/clock:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],
}),
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//pkg/kubelet/eviction/api:all-srcs",
],
tags = ["automanaged"],
)

View File

@ -1,4 +0,0 @@
approvers:
- derekwaynecarr
- vishh
- dchen1107

View File

@ -1,26 +0,0 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["types.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/eviction/api",
deps = ["//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -1,98 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
"time"
"k8s.io/apimachinery/pkg/api/resource"
)
// Signal defines a signal that can trigger eviction of pods on a node.
type Signal string
const (
// SignalMemoryAvailable is memory available (i.e. capacity - workingSet), in bytes.
SignalMemoryAvailable Signal = "memory.available"
// SignalNodeFsAvailable is amount of storage available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsAvailable Signal = "nodefs.available"
// SignalNodeFsInodesFree is amount of inodes available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsInodesFree Signal = "nodefs.inodesFree"
// SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images and container writable layers.
SignalImageFsAvailable Signal = "imagefs.available"
// SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images and container writeable layers.
SignalImageFsInodesFree Signal = "imagefs.inodesFree"
// SignalAllocatableMemoryAvailable is amount of memory available for pod allocation (i.e. allocatable - workingSet (of pods), in bytes.
SignalAllocatableMemoryAvailable Signal = "allocatableMemory.available"
// SignalPIDAvailable is amount of PID available for pod allocation
SignalPIDAvailable Signal = "pid.available"
)
// ThresholdOperator is the operator used to express a Threshold.
type ThresholdOperator string
const (
// OpLessThan is the operator that expresses a less than operator.
OpLessThan ThresholdOperator = "LessThan"
)
// OpForSignal maps Signals to ThresholdOperators.
// Today, the only supported operator is "LessThan". This may change in the future,
// for example if "consumed" (as opposed to "available") type signals are added.
// In both cases the directionality of the threshold is implicit to the signal type
// (for a given signal, the decision to evict will be made when crossing the threshold
// from either above or below, never both). There is thus no reason to expose the
// operator in the Kubelet's public API. Instead, we internally map signal types to operators.
var OpForSignal = map[Signal]ThresholdOperator{
SignalMemoryAvailable: OpLessThan,
SignalNodeFsAvailable: OpLessThan,
SignalNodeFsInodesFree: OpLessThan,
SignalImageFsAvailable: OpLessThan,
SignalImageFsInodesFree: OpLessThan,
}
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity
type ThresholdValue struct {
// The following fields are exclusive. Only the topmost non-zero field is used.
// Quantity is a quantity associated with the signal that is evaluated against the specified operator.
Quantity *resource.Quantity
// Percentage represents the usage percentage over the total resource that is evaluated against the specified operator.
Percentage float32
}
// Threshold defines a metric for when eviction should occur.
type Threshold struct {
// Signal defines the entity that was measured.
Signal Signal
// Operator represents a relationship of a signal to a value.
Operator ThresholdOperator
// Value is the threshold the resource is evaluated against.
Value ThresholdValue
// GracePeriod represents the amount of time that a threshold must be met before eviction is triggered.
GracePeriod time.Duration
// MinReclaim represents the minimum amount of resource to reclaim if the threshold is met.
MinReclaim *ThresholdValue
}
// GetThresholdQuantity returns the expected quantity value for a thresholdValue
func GetThresholdQuantity(value ThresholdValue, capacity *resource.Quantity) *resource.Quantity {
if value.Quantity != nil {
return value.Quantity.Copy()
}
return resource.NewQuantity(int64(float64(capacity.Value())*float64(value.Percentage)), resource.BinarySI)
}

View File

@ -1,19 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package eviction is responsible for enforcing eviction thresholds to maintain
// node stability.
package eviction // import "k8s.io/kubernetes/pkg/kubelet/eviction"

View File

@ -1,567 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package eviction
import (
"fmt"
"sort"
"sync"
"time"
"github.com/golang/glog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/clock"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record"
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/server/stats"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
)
const (
podCleanupTimeout = 30 * time.Second
podCleanupPollFreq = time.Second
)
// managerImpl implements Manager
type managerImpl struct {
// used to track time
clock clock.Clock
// config is how the manager is configured
config Config
// the function to invoke to kill a pod
killPodFunc KillPodFunc
// the interface that knows how to do image gc
imageGC ImageGC
// the interface that knows how to do container gc
containerGC ContainerGC
// protects access to internal state
sync.RWMutex
// node conditions are the set of conditions present
nodeConditions []v1.NodeConditionType
// captures when a node condition was last observed based on a threshold being met
nodeConditionsLastObservedAt nodeConditionsObservedAt
// nodeRef is a reference to the node
nodeRef *v1.ObjectReference
// used to record events about the node
recorder record.EventRecorder
// used to measure usage stats on system
summaryProvider stats.SummaryProvider
// records when a threshold was first observed
thresholdsFirstObservedAt thresholdsObservedAt
// records the set of thresholds that have been met (including graceperiod) but not yet resolved
thresholdsMet []evictionapi.Threshold
// signalToRankFunc maps a resource to ranking function for that resource.
signalToRankFunc map[evictionapi.Signal]rankFunc
// signalToNodeReclaimFuncs maps a resource to an ordered list of functions that know how to reclaim that resource.
signalToNodeReclaimFuncs map[evictionapi.Signal]nodeReclaimFuncs
// last observations from synchronize
lastObservations signalObservations
// dedicatedImageFs indicates if imagefs is on a separate device from the rootfs
dedicatedImageFs *bool
// thresholdNotifiers is a list of memory threshold notifiers which each notify for a memory eviction threshold
thresholdNotifiers []ThresholdNotifier
// thresholdsLastUpdated is the last time the thresholdNotifiers were updated.
thresholdsLastUpdated time.Time
}
// ensure it implements the required interface
var _ Manager = &managerImpl{}
// NewManager returns a configured Manager and an associated admission handler to enforce eviction configuration.
func NewManager(
summaryProvider stats.SummaryProvider,
config Config,
killPodFunc KillPodFunc,
imageGC ImageGC,
containerGC ContainerGC,
recorder record.EventRecorder,
nodeRef *v1.ObjectReference,
clock clock.Clock,
) (Manager, lifecycle.PodAdmitHandler) {
manager := &managerImpl{
clock: clock,
killPodFunc: killPodFunc,
imageGC: imageGC,
containerGC: containerGC,
config: config,
recorder: recorder,
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
dedicatedImageFs: nil,
thresholdNotifiers: []ThresholdNotifier{},
}
return manager, manager
}
// Admit rejects a pod if its not safe to admit for node stability.
func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
m.RLock()
defer m.RUnlock()
if len(m.nodeConditions) == 0 {
return lifecycle.PodAdmitResult{Admit: true}
}
// Admit Critical pods even under resource pressure since they are required for system stability.
// https://github.com/kubernetes/kubernetes/issues/40573 has more details.
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) && kubelettypes.IsCriticalPod(attrs.Pod) {
return lifecycle.PodAdmitResult{Admit: true}
}
// the node has memory pressure, admit if not best-effort
if hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure) {
notBestEffort := v1.PodQOSBestEffort != v1qos.GetPodQOS(attrs.Pod)
if notBestEffort {
return lifecycle.PodAdmitResult{Admit: true}
}
// When node has memory pressure and TaintNodesByCondition is enabled, check BestEffort Pod's toleration:
// admit it if tolerates memory pressure taint, fail for other tolerations, e.g. OutOfDisk.
if utilfeature.DefaultFeatureGate.Enabled(features.TaintNodesByCondition) &&
v1helper.TolerationsTolerateTaint(attrs.Pod.Spec.Tolerations, &v1.Taint{
Key: algorithm.TaintNodeMemoryPressure,
Effect: v1.TaintEffectNoSchedule,
}) {
return lifecycle.PodAdmitResult{Admit: true}
}
}
// reject pods when under memory pressure (if pod is best effort), or if under disk pressure.
glog.Warningf("Failed to admit pod %s - node has conditions: %v", format.Pod(attrs.Pod), m.nodeConditions)
return lifecycle.PodAdmitResult{
Admit: false,
Reason: Reason,
Message: fmt.Sprintf(nodeLowMessageFmt, m.nodeConditions),
}
}
// Start starts the control loop to observe and response to low compute resources.
func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration) {
thresholdHandler := func(message string) {
glog.Infof(message)
m.synchronize(diskInfoProvider, podFunc)
}
if m.config.KernelMemcgNotification {
for _, threshold := range m.config.Thresholds {
if threshold.Signal == evictionapi.SignalMemoryAvailable || threshold.Signal == evictionapi.SignalAllocatableMemoryAvailable {
notifier, err := NewMemoryThresholdNotifier(threshold, m.config.PodCgroupRoot, &CgroupNotifierFactory{}, thresholdHandler)
if err != nil {
glog.Warningf("eviction manager: failed to create memory threshold notifier: %v", err)
} else {
go notifier.Start()
m.thresholdNotifiers = append(m.thresholdNotifiers, notifier)
}
}
}
}
// start the eviction manager monitoring
go func() {
for {
if evictedPods := m.synchronize(diskInfoProvider, podFunc); evictedPods != nil {
glog.Infof("eviction manager: pods %s evicted, waiting for pod to be cleaned up", format.Pods(evictedPods))
m.waitForPodsCleanup(podCleanedUpFunc, evictedPods)
} else {
time.Sleep(monitoringInterval)
}
}
}()
}
// IsUnderMemoryPressure returns true if the node is under memory pressure.
func (m *managerImpl) IsUnderMemoryPressure() bool {
m.RLock()
defer m.RUnlock()
return hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure)
}
// IsUnderDiskPressure returns true if the node is under disk pressure.
func (m *managerImpl) IsUnderDiskPressure() bool {
m.RLock()
defer m.RUnlock()
return hasNodeCondition(m.nodeConditions, v1.NodeDiskPressure)
}
// IsUnderPIDPressure returns true if the node is under PID pressure.
func (m *managerImpl) IsUnderPIDPressure() bool {
m.RLock()
defer m.RUnlock()
return hasNodeCondition(m.nodeConditions, v1.NodePIDPressure)
}
// synchronize is the main control loop that enforces eviction thresholds.
// Returns the pod that was killed, or nil if no pod was killed.
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
// if we have nothing to do, just return
thresholds := m.config.Thresholds
if len(thresholds) == 0 && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
return nil
}
glog.V(3).Infof("eviction manager: synchronize housekeeping")
// build the ranking functions (if not yet known)
// TODO: have a function in cadvisor that lets us know if global housekeeping has completed
if m.dedicatedImageFs == nil {
hasImageFs, ok := diskInfoProvider.HasDedicatedImageFs()
if ok != nil {
return nil
}
m.dedicatedImageFs = &hasImageFs
m.signalToRankFunc = buildSignalToRankFunc(hasImageFs)
m.signalToNodeReclaimFuncs = buildSignalToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs)
}
activePods := podFunc()
updateStats := true
summary, err := m.summaryProvider.Get(updateStats)
if err != nil {
glog.Errorf("eviction manager: failed to get get summary stats: %v", err)
return nil
}
if m.clock.Since(m.thresholdsLastUpdated) > notifierRefreshInterval {
m.thresholdsLastUpdated = m.clock.Now()
for _, notifier := range m.thresholdNotifiers {
if err := notifier.UpdateThreshold(summary); err != nil {
glog.Warningf("eviction manager: failed to update %s: %v", notifier.Description(), err)
}
}
}
// make observations and get a function to derive pod usage stats relative to those observations.
observations, statsFunc := makeSignalObservations(summary)
debugLogObservations("observations", observations)
// determine the set of thresholds met independent of grace period
thresholds = thresholdsMet(thresholds, observations, false)
debugLogThresholdsWithObservation("thresholds - ignoring grace period", thresholds, observations)
// determine the set of thresholds previously met that have not yet satisfied the associated min-reclaim
if len(m.thresholdsMet) > 0 {
thresholdsNotYetResolved := thresholdsMet(m.thresholdsMet, observations, true)
thresholds = mergeThresholds(thresholds, thresholdsNotYetResolved)
}
debugLogThresholdsWithObservation("thresholds - reclaim not satisfied", thresholds, observations)
// track when a threshold was first observed
now := m.clock.Now()
thresholdsFirstObservedAt := thresholdsFirstObservedAt(thresholds, m.thresholdsFirstObservedAt, now)
// the set of node conditions that are triggered by currently observed thresholds
nodeConditions := nodeConditions(thresholds)
if len(nodeConditions) > 0 {
glog.V(3).Infof("eviction manager: node conditions - observed: %v", nodeConditions)
}
// track when a node condition was last observed
nodeConditionsLastObservedAt := nodeConditionsLastObservedAt(nodeConditions, m.nodeConditionsLastObservedAt, now)
// node conditions report true if it has been observed within the transition period window
nodeConditions = nodeConditionsObservedSince(nodeConditionsLastObservedAt, m.config.PressureTransitionPeriod, now)
if len(nodeConditions) > 0 {
glog.V(3).Infof("eviction manager: node conditions - transition period not met: %v", nodeConditions)
}
// determine the set of thresholds we need to drive eviction behavior (i.e. all grace periods are met)
thresholds = thresholdsMetGracePeriod(thresholdsFirstObservedAt, now)
debugLogThresholdsWithObservation("thresholds - grace periods satisified", thresholds, observations)
// update internal state
m.Lock()
m.nodeConditions = nodeConditions
m.thresholdsFirstObservedAt = thresholdsFirstObservedAt
m.nodeConditionsLastObservedAt = nodeConditionsLastObservedAt
m.thresholdsMet = thresholds
// determine the set of thresholds whose stats have been updated since the last sync
thresholds = thresholdsUpdatedStats(thresholds, observations, m.lastObservations)
debugLogThresholdsWithObservation("thresholds - updated stats", thresholds, observations)
m.lastObservations = observations
m.Unlock()
// evict pods if there is a resource usage violation from local volume temporary storage
// If eviction happens in localStorageEviction function, skip the rest of eviction action
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
if evictedPods := m.localStorageEviction(summary, activePods); len(evictedPods) > 0 {
return evictedPods
}
}
if len(thresholds) == 0 {
glog.V(3).Infof("eviction manager: no resources are starved")
return nil
}
// rank the thresholds by eviction priority
sort.Sort(byEvictionPriority(thresholds))
thresholdToReclaim := thresholds[0]
resourceToReclaim, found := signalToResource[thresholdToReclaim.Signal]
if !found {
glog.V(3).Infof("eviction manager: threshold %s was crossed, but reclaim is not implemented for this threshold.", thresholdToReclaim.Signal)
return nil
}
glog.Warningf("eviction manager: attempting to reclaim %v", resourceToReclaim)
// record an event about the resources we are now attempting to reclaim via eviction
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
if m.reclaimNodeLevelResources(thresholdToReclaim.Signal, resourceToReclaim) {
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
return nil
}
glog.Infof("eviction manager: must evict pod(s) to reclaim %v", resourceToReclaim)
// rank the pods for eviction
rank, ok := m.signalToRankFunc[thresholdToReclaim.Signal]
if !ok {
glog.Errorf("eviction manager: no ranking function for signal %s", thresholdToReclaim.Signal)
return nil
}
// the only candidates viable for eviction are those pods that had anything running.
if len(activePods) == 0 {
glog.Errorf("eviction manager: eviction thresholds have been met, but no pods are active to evict")
return nil
}
// rank the running pods for eviction for the specified resource
rank(activePods, statsFunc)
glog.Infof("eviction manager: pods ranked for eviction: %s", format.Pods(activePods))
//record age of metrics for met thresholds that we are using for evictions.
for _, t := range thresholds {
timeObserved := observations[t.Signal].time
if !timeObserved.IsZero() {
metrics.EvictionStatsAge.WithLabelValues(string(t.Signal)).Observe(metrics.SinceInMicroseconds(timeObserved.Time))
}
}
// we kill at most a single pod during each eviction interval
for i := range activePods {
pod := activePods[i]
gracePeriodOverride := int64(0)
if !isHardEvictionThreshold(thresholdToReclaim) {
gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
}
message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc)
if m.evictPod(pod, gracePeriodOverride, message, annotations) {
return []*v1.Pod{pod}
}
}
glog.Infof("eviction manager: unable to evict any pods from the node")
return nil
}
func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods []*v1.Pod) {
timeout := m.clock.NewTimer(podCleanupTimeout)
defer timeout.Stop()
ticker := m.clock.NewTicker(podCleanupPollFreq)
defer ticker.Stop()
for {
select {
case <-timeout.C():
glog.Warningf("eviction manager: timed out waiting for pods %s to be cleaned up", format.Pods(pods))
return
case <-ticker.C():
for i, pod := range pods {
if !podCleanedUpFunc(pod) {
break
}
if i == len(pods)-1 {
glog.Infof("eviction manager: pods %s successfully cleaned up", format.Pods(pods))
return
}
}
}
}
}
// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
func (m *managerImpl) reclaimNodeLevelResources(signalToReclaim evictionapi.Signal, resourceToReclaim v1.ResourceName) bool {
nodeReclaimFuncs := m.signalToNodeReclaimFuncs[signalToReclaim]
for _, nodeReclaimFunc := range nodeReclaimFuncs {
// attempt to reclaim the pressured resource.
if err := nodeReclaimFunc(); err != nil {
glog.Warningf("eviction manager: unexpected error when attempting to reduce %v pressure: %v", resourceToReclaim, err)
}
}
if len(nodeReclaimFuncs) > 0 {
summary, err := m.summaryProvider.Get(true)
if err != nil {
glog.Errorf("eviction manager: failed to get get summary stats after resource reclaim: %v", err)
return false
}
// make observations and get a function to derive pod usage stats relative to those observations.
observations, _ := makeSignalObservations(summary)
debugLogObservations("observations after resource reclaim", observations)
// determine the set of thresholds met independent of grace period
thresholds := thresholdsMet(m.config.Thresholds, observations, false)
debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations)
if len(thresholds) == 0 {
return true
}
}
return false
}
// localStorageEviction checks the EmptyDir volume usage for each pod and determine whether it exceeds the specified limit and needs
// to be evicted. It also checks every container in the pod, if the container overlay usage exceeds the limit, the pod will be evicted too.
func (m *managerImpl) localStorageEviction(summary *statsapi.Summary, pods []*v1.Pod) []*v1.Pod {
statsFunc := cachedStatsFunc(summary.Pods)
evicted := []*v1.Pod{}
for _, pod := range pods {
podStats, ok := statsFunc(pod)
if !ok {
continue
}
if m.emptyDirLimitEviction(podStats, pod) {
evicted = append(evicted, pod)
continue
}
if m.podEphemeralStorageLimitEviction(podStats, pod) {
evicted = append(evicted, pod)
continue
}
if m.containerEphemeralStorageLimitEviction(podStats, pod) {
evicted = append(evicted, pod)
}
}
return evicted
}
func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
podVolumeUsed := make(map[string]*resource.Quantity)
for _, volume := range podStats.VolumeStats {
podVolumeUsed[volume.Name] = resource.NewQuantity(int64(*volume.UsedBytes), resource.BinarySI)
}
for i := range pod.Spec.Volumes {
source := &pod.Spec.Volumes[i].VolumeSource
if source.EmptyDir != nil {
size := source.EmptyDir.SizeLimit
used := podVolumeUsed[pod.Spec.Volumes[i].Name]
if used != nil && size != nil && size.Sign() == 1 && used.Cmp(*size) > 0 {
// the emptyDir usage exceeds the size limit, evict the pod
return m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil)
}
}
}
return false
}
func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
_, podLimits := apiv1resource.PodRequestsAndLimits(pod)
_, found := podLimits[v1.ResourceEphemeralStorage]
if !found {
return false
}
podEphemeralStorageTotalUsage := &resource.Quantity{}
fsStatsSet := []fsStatsType{}
if *m.dedicatedImageFs {
fsStatsSet = []fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}
} else {
fsStatsSet = []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}
}
podEphemeralUsage, err := podLocalEphemeralStorageUsage(podStats, pod, fsStatsSet)
if err != nil {
glog.Errorf("eviction manager: error getting pod disk usage %v", err)
return false
}
podEphemeralStorageTotalUsage.Add(podEphemeralUsage[v1.ResourceEphemeralStorage])
podEphemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage]
if podEphemeralStorageTotalUsage.Cmp(podEphemeralStorageLimit) > 0 {
// the total usage of pod exceeds the total size limit of containers, evict the pod
return m.evictPod(pod, 0, fmt.Sprintf(podEphemeralStorageMessageFmt, podEphemeralStorageLimit.String()), nil)
}
return false
}
func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.PodStats, pod *v1.Pod) bool {
thresholdsMap := make(map[string]*resource.Quantity)
for _, container := range pod.Spec.Containers {
ephemeralLimit := container.Resources.Limits.StorageEphemeral()
if ephemeralLimit != nil && ephemeralLimit.Value() != 0 {
thresholdsMap[container.Name] = ephemeralLimit
}
}
for _, containerStat := range podStats.Containers {
containerUsed := diskUsage(containerStat.Logs)
if !*m.dedicatedImageFs {
containerUsed.Add(*diskUsage(containerStat.Rootfs))
}
if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
return m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil)
}
}
}
return false
}
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string) bool {
// If the pod is marked as critical and static, and support for critical pod annotations is enabled,
// do not evict such pods. Static pods are not re-admitted after evictions.
// https://github.com/kubernetes/kubernetes/issues/40573 has more details.
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
kubelettypes.IsCriticalPod(pod) && kubepod.IsStaticPod(pod) {
glog.Errorf("eviction manager: cannot evict a critical static pod %s", format.Pod(pod))
return false
}
status := v1.PodStatus{
Phase: v1.PodFailed,
Message: evictMsg,
Reason: Reason,
}
// record that we are evicting the pod
m.recorder.AnnotatedEventf(pod, annotations, v1.EventTypeWarning, Reason, evictMsg)
// this is a blocking call and should only return when the pod and its containers are killed.
err := m.killPodFunc(pod, status, &gracePeriodOverride)
if err != nil {
glog.Errorf("eviction manager: pod %s failed to evict %v", format.Pod(pod), err)
} else {
glog.Infof("eviction manager: pod %s is evicted successfully", format.Pod(pod))
}
return true
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,135 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package eviction
import (
"fmt"
"time"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/api/resource"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/cm"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
const (
memoryUsageAttribute = "memory.usage_in_bytes"
// this prevents constantly updating the memcg notifier if synchronize
// is run frequently.
notifierRefreshInterval = 10 * time.Second
)
type memoryThresholdNotifier struct {
threshold evictionapi.Threshold
cgroupPath string
events chan struct{}
factory NotifierFactory
handler func(string)
notifier CgroupNotifier
}
var _ ThresholdNotifier = &memoryThresholdNotifier{}
// NewMemoryThresholdNotifier creates a ThresholdNotifier which is designed to respond to the given threshold.
// UpdateThreshold must be called once before the threshold will be active.
func NewMemoryThresholdNotifier(threshold evictionapi.Threshold, cgroupRoot string, factory NotifierFactory, handler func(string)) (ThresholdNotifier, error) {
cgroups, err := cm.GetCgroupSubsystems()
if err != nil {
return nil, err
}
cgpath, found := cgroups.MountPoints["memory"]
if !found || len(cgpath) == 0 {
return nil, fmt.Errorf("memory cgroup mount point not found")
}
if isAllocatableEvictionThreshold(threshold) {
// for allocatable thresholds, point the cgroup notifier at the allocatable cgroup
cgpath += cgroupRoot
}
return &memoryThresholdNotifier{
threshold: threshold,
cgroupPath: cgpath,
events: make(chan struct{}),
handler: handler,
factory: factory,
}, nil
}
func (m *memoryThresholdNotifier) Start() {
glog.Infof("eviction manager: created %s", m.Description())
for range m.events {
m.handler(fmt.Sprintf("eviction manager: %s crossed", m.Description()))
}
}
func (m *memoryThresholdNotifier) UpdateThreshold(summary *statsapi.Summary) error {
memoryStats := summary.Node.Memory
if isAllocatableEvictionThreshold(m.threshold) {
allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods)
if err != nil {
return err
}
memoryStats = allocatableContainer.Memory
}
if memoryStats == nil || memoryStats.UsageBytes == nil || memoryStats.WorkingSetBytes == nil || memoryStats.AvailableBytes == nil {
return fmt.Errorf("summary was incomplete. Expected MemoryStats and all subfields to be non-nil, but got %+v", memoryStats)
}
// Set threshold on usage to capacity - eviction_hard + inactive_file,
// since we want to be notified when working_set = capacity - eviction_hard
inactiveFile := resource.NewQuantity(int64(*memoryStats.UsageBytes-*memoryStats.WorkingSetBytes), resource.BinarySI)
capacity := resource.NewQuantity(int64(*memoryStats.AvailableBytes+*memoryStats.WorkingSetBytes), resource.BinarySI)
evictionThresholdQuantity := evictionapi.GetThresholdQuantity(m.threshold.Value, capacity)
memcgThreshold := capacity.DeepCopy()
memcgThreshold.Sub(*evictionThresholdQuantity)
memcgThreshold.Add(*inactiveFile)
glog.V(3).Infof("eviction manager: setting %s to %s\n", m.Description(), memcgThreshold.String())
if m.notifier != nil {
m.notifier.Stop()
}
newNotifier, err := m.factory.NewCgroupNotifier(m.cgroupPath, memoryUsageAttribute, memcgThreshold.Value())
if err != nil {
return err
}
m.notifier = newNotifier
go m.notifier.Start(m.events)
return nil
}
func (m *memoryThresholdNotifier) Description() string {
var hard, allocatable string
if isHardEvictionThreshold(m.threshold) {
hard = "hard "
} else {
hard = "soft "
}
if isAllocatableEvictionThreshold(m.threshold) {
allocatable = "allocatable "
}
return fmt.Sprintf("%s%smemory eviction threshold", hard, allocatable)
}
var _ NotifierFactory = &CgroupNotifierFactory{}
// CgroupNotifierFactory knows how to make CgroupNotifiers which integrate with the kernel
type CgroupNotifierFactory struct{}
// NewCgroupNotifier implements the NotifierFactory interface
func (n *CgroupNotifierFactory) NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) {
return NewCgroupNotifier(path, attribute, threshold)
}

View File

@ -1,270 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package eviction
import (
"fmt"
"strings"
"sync"
"testing"
"time"
"k8s.io/apimachinery/pkg/api/resource"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
const testCgroupPath = "/sys/fs/cgroups/memory"
func nodeSummary(available, workingSet, usage resource.Quantity, allocatable bool) *statsapi.Summary {
availableBytes := uint64(available.Value())
workingSetBytes := uint64(workingSet.Value())
usageBytes := uint64(usage.Value())
memoryStats := statsapi.MemoryStats{
AvailableBytes: &availableBytes,
WorkingSetBytes: &workingSetBytes,
UsageBytes: &usageBytes,
}
if allocatable {
return &statsapi.Summary{
Node: statsapi.NodeStats{
SystemContainers: []statsapi.ContainerStats{
{
Name: statsapi.SystemContainerPods,
Memory: &memoryStats,
},
},
},
}
}
return &statsapi.Summary{
Node: statsapi.NodeStats{
Memory: &memoryStats,
},
}
}
func newTestMemoryThresholdNotifier(threshold evictionapi.Threshold, factory NotifierFactory, handler func(string)) *memoryThresholdNotifier {
return &memoryThresholdNotifier{
threshold: threshold,
cgroupPath: testCgroupPath,
events: make(chan struct{}),
factory: factory,
handler: handler,
}
}
func TestUpdateThreshold(t *testing.T) {
testCases := []struct {
description string
available resource.Quantity
workingSet resource.Quantity
usage resource.Quantity
evictionThreshold evictionapi.Threshold
expectedThreshold resource.Quantity
updateThresholdErr error
expectErr bool
}{
{
description: "node level threshold",
available: resource.MustParse("3Gi"),
usage: resource.MustParse("2Gi"),
workingSet: resource.MustParse("1Gi"),
evictionThreshold: evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
expectedThreshold: resource.MustParse("4Gi"),
updateThresholdErr: nil,
expectErr: false,
},
{
description: "allocatable threshold",
available: resource.MustParse("4Gi"),
usage: resource.MustParse("3Gi"),
workingSet: resource.MustParse("1Gi"),
evictionThreshold: evictionapi.Threshold{
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
expectedThreshold: resource.MustParse("6Gi"),
updateThresholdErr: nil,
expectErr: false,
},
{
description: "error updating node level threshold",
available: resource.MustParse("3Gi"),
usage: resource.MustParse("2Gi"),
workingSet: resource.MustParse("1Gi"),
evictionThreshold: evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
expectedThreshold: resource.MustParse("4Gi"),
updateThresholdErr: fmt.Errorf("unexpected error"),
expectErr: true,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
notifierFactory := &MockNotifierFactory{}
notifier := &MockCgroupNotifier{}
m := newTestMemoryThresholdNotifier(tc.evictionThreshold, notifierFactory, nil)
notifierFactory.On("NewCgroupNotifier", testCgroupPath, memoryUsageAttribute, tc.expectedThreshold.Value()).Return(notifier, tc.updateThresholdErr)
var events chan<- struct{}
events = m.events
notifier.On("Start", events).Return()
err := m.UpdateThreshold(nodeSummary(tc.available, tc.workingSet, tc.usage, isAllocatableEvictionThreshold(tc.evictionThreshold)))
if err != nil && !tc.expectErr {
t.Errorf("Unexpected error updating threshold: %v", err)
} else if err == nil && tc.expectErr {
t.Errorf("Expected error updating threshold, but got nil")
}
if !tc.expectErr {
notifierFactory.AssertNumberOfCalls(t, "NewCgroupNotifier", 1)
}
})
}
}
func TestStart(t *testing.T) {
noResources := resource.MustParse("0")
threshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: &noResources,
},
}
notifier := &MockCgroupNotifier{}
notifierFactory := &MockNotifierFactory{}
var wg sync.WaitGroup
wg.Add(4)
m := newTestMemoryThresholdNotifier(threshold, notifierFactory, func(string) {
wg.Done()
})
notifierFactory.On("NewCgroupNotifier", testCgroupPath, memoryUsageAttribute, int64(0)).Return(notifier, nil)
var events chan<- struct{}
events = m.events
notifier.On("Start", events).Return()
notifier.On("Stop").Return()
err := m.UpdateThreshold(nodeSummary(noResources, noResources, noResources, isAllocatableEvictionThreshold(threshold)))
if err != nil {
t.Errorf("Unexpected error updating threshold: %v", err)
}
notifierFactory.AssertNumberOfCalls(t, "NewCgroupNotifier", 1)
go m.Start()
for i := 0; i < 4; i++ {
m.events <- struct{}{}
}
wg.Wait()
}
func TestThresholdDescription(t *testing.T) {
testCases := []struct {
description string
evictionThreshold evictionapi.Threshold
expectedSubstrings []string
omittedSubstrings []string
}{
{
description: "hard node level threshold",
evictionThreshold: evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
expectedSubstrings: []string{"hard"},
omittedSubstrings: []string{"allocatable", "soft"},
},
{
description: "soft node level threshold",
evictionThreshold: evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
GracePeriod: time.Minute * 2,
},
expectedSubstrings: []string{"soft"},
omittedSubstrings: []string{"allocatable", "hard"},
},
{
description: "hard allocatable threshold",
evictionThreshold: evictionapi.Threshold{
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
GracePeriod: time.Minute * 2,
},
expectedSubstrings: []string{"soft", "allocatable"},
omittedSubstrings: []string{"hard"},
},
{
description: "soft allocatable threshold",
evictionThreshold: evictionapi.Threshold{
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
expectedSubstrings: []string{"hard", "allocatable"},
omittedSubstrings: []string{"soft"},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
m := &memoryThresholdNotifier{
notifier: &MockCgroupNotifier{},
threshold: tc.evictionThreshold,
cgroupPath: testCgroupPath,
}
desc := m.Description()
for _, expected := range tc.expectedSubstrings {
if !strings.Contains(desc, expected) {
t.Errorf("expected description for notifier with threshold %+v to contain %s, but it did not", tc.evictionThreshold, expected)
}
}
for _, omitted := range tc.omittedSubstrings {
if strings.Contains(desc, omitted) {
t.Errorf("expected description for notifier with threshold %+v NOT to contain %s, but it did", tc.evictionThreshold, omitted)
}
}
})
}
}

View File

@ -1,98 +0,0 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package eviction
import (
mock "github.com/stretchr/testify/mock"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
)
// MockCgroupNotifier is a mock implementation of the CgroupNotifier interface
type MockCgroupNotifier struct {
mock.Mock
}
// Start implements the NotifierFactory interface
func (m *MockCgroupNotifier) Start(a0 chan<- struct{}) {
m.Called(a0)
}
// Stop implements the NotifierFactory interface
func (m *MockCgroupNotifier) Stop() {
m.Called()
}
// MockNotifierFactory is a mock of the NotifierFactory interface
type MockNotifierFactory struct {
mock.Mock
}
// NewCgroupNotifier implements the NotifierFactory interface
func (m *MockNotifierFactory) NewCgroupNotifier(a0, a1 string, a2 int64) (CgroupNotifier, error) {
ret := m.Called(a0, a1, a2)
var r0 CgroupNotifier
if rf, ok := ret.Get(0).(func(string, string, int64) CgroupNotifier); ok {
r0 = rf(a0, a1, a2)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(CgroupNotifier)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string, string, int64) error); ok {
r1 = rf(a0, a1, a2)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockThresholdNotifier is a mock implementation of the ThresholdNotifier interface
type MockThresholdNotifier struct {
mock.Mock
}
// Start implements the ThresholdNotifier interface
func (m *MockThresholdNotifier) Start() {
m.Called()
}
// UpdateThreshold implements the ThresholdNotifier interface
func (m *MockThresholdNotifier) UpdateThreshold(a0 *statsapi.Summary) error {
ret := m.Called(a0)
var r0 error
if rf, ok := ret.Get(0).(func(*statsapi.Summary) error); ok {
r0 = rf(a0)
} else {
r0 = ret.Error(0)
}
return r0
}
// Description implements the ThresholdNotifier interface
func (m *MockThresholdNotifier) Description() string {
ret := m.Called()
var r0 string
if rf, ok := ret.Get(0).(func() string); ok {
r0 = rf()
} else {
r0 = ret.String(0)
}
return r0
}

View File

@ -1,185 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package eviction
import (
"fmt"
"sync"
"time"
"github.com/golang/glog"
"golang.org/x/sys/unix"
)
const (
// eventSize is the number of bytes returned by a successful read from an eventfd
// see http://man7.org/linux/man-pages/man2/eventfd.2.html for more information
eventSize = 8
// numFdEvents is the number of events we can record at once.
// If EpollWait finds more than this, they will be missed.
numFdEvents = 6
)
type linuxCgroupNotifier struct {
eventfd int
epfd int
stop chan struct{}
stopLock sync.Mutex
}
var _ CgroupNotifier = &linuxCgroupNotifier{}
// NewCgroupNotifier returns a linuxCgroupNotifier, which performs cgroup control operations required
// to receive notifications from the cgroup when the threshold is crossed in either direction.
func NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) {
var watchfd, eventfd, epfd, controlfd int
var err error
watchfd, err = unix.Open(fmt.Sprintf("%s/%s", path, attribute), unix.O_RDONLY, 0)
if err != nil {
return nil, err
}
defer unix.Close(watchfd)
controlfd, err = unix.Open(fmt.Sprintf("%s/cgroup.event_control", path), unix.O_WRONLY, 0)
if err != nil {
return nil, err
}
defer unix.Close(controlfd)
eventfd, err = unix.Eventfd(0, unix.EFD_CLOEXEC)
if err != nil {
return nil, err
}
if eventfd < 0 {
err = fmt.Errorf("eventfd call failed")
return nil, err
}
defer func() {
// Close eventfd if we get an error later in initialization
if err != nil {
unix.Close(eventfd)
}
}()
epfd, err = unix.EpollCreate1(0)
if err != nil {
return nil, err
}
if epfd < 0 {
err = fmt.Errorf("EpollCreate1 call failed")
return nil, err
}
defer func() {
// Close epfd if we get an error later in initialization
if err != nil {
unix.Close(epfd)
}
}()
config := fmt.Sprintf("%d %d %d", eventfd, watchfd, threshold)
_, err = unix.Write(controlfd, []byte(config))
if err != nil {
return nil, err
}
return &linuxCgroupNotifier{
eventfd: eventfd,
epfd: epfd,
stop: make(chan struct{}),
}, nil
}
func (n *linuxCgroupNotifier) Start(eventCh chan<- struct{}) {
err := unix.EpollCtl(n.epfd, unix.EPOLL_CTL_ADD, n.eventfd, &unix.EpollEvent{
Fd: int32(n.eventfd),
Events: unix.EPOLLIN,
})
if err != nil {
glog.Warningf("eviction manager: error adding epoll eventfd: %v", err)
return
}
for {
select {
case <-n.stop:
return
default:
}
event, err := wait(n.epfd, n.eventfd, notifierRefreshInterval)
if err != nil {
glog.Warningf("eviction manager: error while waiting for memcg events: %v", err)
return
} else if !event {
// Timeout on wait. This is expected if the threshold was not crossed
continue
}
// Consume the event from the eventfd
buf := make([]byte, eventSize)
_, err = unix.Read(n.eventfd, buf)
if err != nil {
glog.Warningf("eviction manager: error reading memcg events: %v", err)
return
}
eventCh <- struct{}{}
}
}
// wait waits up to notifierRefreshInterval for an event on the Epoll FD for the
// eventfd we are concerned about. It returns an error if one occurrs, and true
// if the consumer should read from the eventfd.
func wait(epfd, eventfd int, timeout time.Duration) (bool, error) {
events := make([]unix.EpollEvent, numFdEvents+1)
timeoutMS := int(timeout / time.Millisecond)
n, err := unix.EpollWait(epfd, events, timeoutMS)
if n == -1 {
if err == unix.EINTR {
// Interrupt, ignore the error
return false, nil
}
return false, err
}
if n == 0 {
// Timeout
return false, nil
}
if n > numFdEvents {
return false, fmt.Errorf("epoll_wait returned more events than we know what to do with")
}
for _, event := range events[:n] {
if event.Fd == int32(eventfd) {
if event.Events&unix.EPOLLHUP != 0 || event.Events&unix.EPOLLERR != 0 || event.Events&unix.EPOLLIN != 0 {
// EPOLLHUP: should not happen, but if it does, treat it as a wakeup.
// EPOLLERR: If an error is waiting on the file descriptor, we should pretend
// something is ready to read, and let unix.Read pick up the error.
// EPOLLIN: There is data to read.
return true, nil
}
}
}
// An event occurred that we don't care about.
return false, nil
}
func (n *linuxCgroupNotifier) Stop() {
n.stopLock.Lock()
defer n.stopLock.Unlock()
select {
case <-n.stop:
// the linuxCgroupNotifier is already stopped
return
default:
}
unix.Close(n.eventfd)
unix.Close(n.epfd)
close(n.stop)
}

View File

@ -1,33 +0,0 @@
// +build !linux
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package eviction
import "github.com/golang/glog"
// NewCgroupNotifier creates a cgroup notifier that does nothing because cgroups do not exist on non-linux systems.
func NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) {
glog.V(5).Infof("cgroup notifications not supported")
return &unsupportedThresholdNotifier{}, nil
}
type unsupportedThresholdNotifier struct{}
func (*unsupportedThresholdNotifier) Start(_ chan<- struct{}) {}
func (*unsupportedThresholdNotifier) Stop() {}

View File

@ -1,160 +0,0 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package eviction
import (
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
// fsStatsType defines the types of filesystem stats to collect.
type fsStatsType string
const (
// fsStatsLocalVolumeSource identifies stats for pod local volume sources.
fsStatsLocalVolumeSource fsStatsType = "localVolumeSource"
// fsStatsLogs identifies stats for pod logs.
fsStatsLogs fsStatsType = "logs"
// fsStatsRoot identifies stats for pod container writable layers.
fsStatsRoot fsStatsType = "root"
)
// Config holds information about how eviction is configured.
type Config struct {
// PressureTransitionPeriod is duration the kubelet has to wait before transititioning out of a pressure condition.
PressureTransitionPeriod time.Duration
// Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met.
MaxPodGracePeriodSeconds int64
// Thresholds define the set of conditions monitored to trigger eviction.
Thresholds []evictionapi.Threshold
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
KernelMemcgNotification bool
// PodCgroupRoot is the cgroup which contains all pods.
PodCgroupRoot string
}
// Manager evaluates when an eviction threshold for node stability has been met on the node.
type Manager interface {
// Start starts the control loop to monitor eviction thresholds at specified interval.
Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration)
// IsUnderMemoryPressure returns true if the node is under memory pressure.
IsUnderMemoryPressure() bool
// IsUnderDiskPressure returns true if the node is under disk pressure.
IsUnderDiskPressure() bool
// IsUnderPIDPressure returns true if the node is under PID pressure.
IsUnderPIDPressure() bool
}
// DiskInfoProvider is responsible for informing the manager how disk is configured.
type DiskInfoProvider interface {
// HasDedicatedImageFs returns true if the imagefs is on a separate device from the rootfs.
HasDedicatedImageFs() (bool, error)
}
// ImageGC is responsible for performing garbage collection of unused images.
type ImageGC interface {
// DeleteUnusedImages deletes unused images.
DeleteUnusedImages() error
}
// ContainerGC is responsible for performing garbage collection of unused containers.
type ContainerGC interface {
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
DeleteAllUnusedContainers() error
}
// KillPodFunc kills a pod.
// The pod status is updated, and then it is killed with the specified grace period.
// This function must block until either the pod is killed or an error is encountered.
// Arguments:
// pod - the pod to kill
// status - the desired status to associate with the pod (i.e. why its killed)
// gracePeriodOverride - the grace period override to use instead of what is on the pod spec
type KillPodFunc func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error
// ActivePodsFunc returns pods bound to the kubelet that are active (i.e. non-terminal state)
type ActivePodsFunc func() []*v1.Pod
// PodCleanedUpFunc returns true if all resources associated with a pod have been reclaimed.
type PodCleanedUpFunc func(*v1.Pod) bool
// statsFunc returns the usage stats if known for an input pod.
type statsFunc func(pod *v1.Pod) (statsapi.PodStats, bool)
// rankFunc sorts the pods in eviction order
type rankFunc func(pods []*v1.Pod, stats statsFunc)
// signalObservation is the observed resource usage
type signalObservation struct {
// The resource capacity
capacity *resource.Quantity
// The available resource
available *resource.Quantity
// Time at which the observation was taken
time metav1.Time
}
// signalObservations maps a signal to an observed quantity
type signalObservations map[evictionapi.Signal]signalObservation
// thresholdsObservedAt maps a threshold to a time that it was observed
type thresholdsObservedAt map[evictionapi.Threshold]time.Time
// nodeConditionsObservedAt maps a node condition to a time that it was observed
type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time
// nodeReclaimFunc is a function that knows how to reclaim a resource from the node without impacting pods.
type nodeReclaimFunc func() error
// nodeReclaimFuncs is an ordered list of nodeReclaimFunc
type nodeReclaimFuncs []nodeReclaimFunc
// CgroupNotifier generates events from cgroup events
type CgroupNotifier interface {
// Start causes the CgroupNotifier to begin notifying on the eventCh
Start(eventCh chan<- struct{})
// Stop stops all processes and cleans up file descriptors associated with the CgroupNotifier
Stop()
}
// NotifierFactory creates CgroupNotifer
type NotifierFactory interface {
// NewCgroupNotifier creates a CgroupNotifier that creates events when the threshold
// on the attribute in the cgroup specified by the path is crossed.
NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error)
}
// ThresholdNotifier manages CgroupNotifiers based on memory eviction thresholds, and performs a function
// when memory eviction thresholds are crossed
type ThresholdNotifier interface {
// Start calls the notifier function when the CgroupNotifier notifies the ThresholdNotifier that an event occurred
Start()
// UpdateThreshold updates the memory cgroup threshold based on the metrics provided.
// Calling UpdateThreshold with recent metrics allows the ThresholdNotifier to trigger at the
// eviction threshold more accurately
UpdateThreshold(summary *statsapi.Summary) error
// Description produces a relevant string describing the Memory Threshold Notifier
Description() string
}