mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-13 18:43:34 +00:00
vendor update for CSI 0.3.0
This commit is contained in:
8
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/BUILD
generated
vendored
8
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/BUILD
generated
vendored
@ -11,6 +11,8 @@ go_test(
|
||||
srcs = [
|
||||
"eviction_manager_test.go",
|
||||
"helpers_test.go",
|
||||
"memory_threshold_notifier_test.go",
|
||||
"mock_threshold_notifier_test.go",
|
||||
],
|
||||
embed = [":go_default_library"],
|
||||
deps = [
|
||||
@ -20,7 +22,7 @@ go_test(
|
||||
"//pkg/kubelet/eviction/api:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/quota:go_default_library",
|
||||
"//vendor/github.com/stretchr/testify/mock:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
@ -37,6 +39,7 @@ go_library(
|
||||
"doc.go",
|
||||
"eviction_manager.go",
|
||||
"helpers.go",
|
||||
"memory_threshold_notifier.go",
|
||||
"types.go",
|
||||
] + select({
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
@ -77,6 +80,7 @@ go_library(
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/eviction",
|
||||
deps = [
|
||||
"//pkg/api/v1/resource:go_default_library",
|
||||
"//pkg/apis/core/v1/helper:go_default_library",
|
||||
"//pkg/apis/core/v1/helper/qos:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
|
||||
@ -88,13 +92,13 @@ go_library(
|
||||
"//pkg/kubelet/server/stats:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/kubelet/util/format:go_default_library",
|
||||
"//pkg/scheduler/algorithm:go_default_library",
|
||||
"//pkg/scheduler/util:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/clock:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//vendor/k8s.io/client-go/tools/record:go_default_library",
|
||||
] + select({
|
||||
|
14
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/api/types.go
generated
vendored
14
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/api/types.go
generated
vendored
@ -38,8 +38,6 @@ const (
|
||||
SignalImageFsInodesFree Signal = "imagefs.inodesFree"
|
||||
// SignalAllocatableMemoryAvailable is amount of memory available for pod allocation (i.e. allocatable - workingSet (of pods), in bytes.
|
||||
SignalAllocatableMemoryAvailable Signal = "allocatableMemory.available"
|
||||
// SignalAllocatableNodeFsAvailable is amount of local storage available for pod allocation
|
||||
SignalAllocatableNodeFsAvailable Signal = "allocatableNodeFs.available"
|
||||
// SignalPIDAvailable is amount of PID available for pod allocation
|
||||
SignalPIDAvailable Signal = "pid.available"
|
||||
)
|
||||
@ -60,13 +58,11 @@ const (
|
||||
// from either above or below, never both). There is thus no reason to expose the
|
||||
// operator in the Kubelet's public API. Instead, we internally map signal types to operators.
|
||||
var OpForSignal = map[Signal]ThresholdOperator{
|
||||
SignalMemoryAvailable: OpLessThan,
|
||||
SignalNodeFsAvailable: OpLessThan,
|
||||
SignalNodeFsInodesFree: OpLessThan,
|
||||
SignalImageFsAvailable: OpLessThan,
|
||||
SignalImageFsInodesFree: OpLessThan,
|
||||
SignalAllocatableMemoryAvailable: OpLessThan,
|
||||
SignalAllocatableNodeFsAvailable: OpLessThan,
|
||||
SignalMemoryAvailable: OpLessThan,
|
||||
SignalNodeFsAvailable: OpLessThan,
|
||||
SignalNodeFsInodesFree: OpLessThan,
|
||||
SignalImageFsAvailable: OpLessThan,
|
||||
SignalImageFsInodesFree: OpLessThan,
|
||||
}
|
||||
|
||||
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity
|
||||
|
205
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/eviction_manager.go
generated
vendored
205
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/eviction_manager.go
generated
vendored
@ -27,14 +27,13 @@ import (
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/util/clock"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/record"
|
||||
apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
@ -42,6 +41,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/server/stats"
|
||||
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
||||
"k8s.io/kubernetes/pkg/scheduler/algorithm"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -77,16 +77,18 @@ type managerImpl struct {
|
||||
thresholdsFirstObservedAt thresholdsObservedAt
|
||||
// records the set of thresholds that have been met (including graceperiod) but not yet resolved
|
||||
thresholdsMet []evictionapi.Threshold
|
||||
// resourceToRankFunc maps a resource to ranking function for that resource.
|
||||
resourceToRankFunc map[v1.ResourceName]rankFunc
|
||||
// resourceToNodeReclaimFuncs maps a resource to an ordered list of functions that know how to reclaim that resource.
|
||||
resourceToNodeReclaimFuncs map[v1.ResourceName]nodeReclaimFuncs
|
||||
// signalToRankFunc maps a resource to ranking function for that resource.
|
||||
signalToRankFunc map[evictionapi.Signal]rankFunc
|
||||
// signalToNodeReclaimFuncs maps a resource to an ordered list of functions that know how to reclaim that resource.
|
||||
signalToNodeReclaimFuncs map[evictionapi.Signal]nodeReclaimFuncs
|
||||
// last observations from synchronize
|
||||
lastObservations signalObservations
|
||||
// notifiersInitialized indicates if the threshold notifiers have been initialized (i.e. synchronize() has been called once)
|
||||
notifiersInitialized bool
|
||||
// dedicatedImageFs indicates if imagefs is on a separate device from the rootfs
|
||||
dedicatedImageFs *bool
|
||||
// thresholdNotifiers is a list of memory threshold notifiers which each notify for a memory eviction threshold
|
||||
thresholdNotifiers []ThresholdNotifier
|
||||
// thresholdsLastUpdated is the last time the thresholdNotifiers were updated.
|
||||
thresholdsLastUpdated time.Time
|
||||
}
|
||||
|
||||
// ensure it implements the required interface
|
||||
@ -115,6 +117,7 @@ func NewManager(
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
dedicatedImageFs: nil,
|
||||
thresholdNotifiers: []ThresholdNotifier{},
|
||||
}
|
||||
return manager, manager
|
||||
}
|
||||
@ -137,19 +140,46 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
|
||||
if notBestEffort {
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
}
|
||||
|
||||
// When node has memory pressure and TaintNodesByCondition is enabled, check BestEffort Pod's toleration:
|
||||
// admit it if tolerates memory pressure taint, fail for other tolerations, e.g. OutOfDisk.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.TaintNodesByCondition) &&
|
||||
v1helper.TolerationsTolerateTaint(attrs.Pod.Spec.Tolerations, &v1.Taint{
|
||||
Key: algorithm.TaintNodeMemoryPressure,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
}) {
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
}
|
||||
}
|
||||
|
||||
// reject pods when under memory pressure (if pod is best effort), or if under disk pressure.
|
||||
glog.Warningf("Failed to admit pod %s - node has conditions: %v", format.Pod(attrs.Pod), m.nodeConditions)
|
||||
return lifecycle.PodAdmitResult{
|
||||
Admit: false,
|
||||
Reason: reason,
|
||||
Message: fmt.Sprintf(message, m.nodeConditions),
|
||||
Reason: Reason,
|
||||
Message: fmt.Sprintf(nodeLowMessageFmt, m.nodeConditions),
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts the control loop to observe and response to low compute resources.
|
||||
func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration) {
|
||||
thresholdHandler := func(message string) {
|
||||
glog.Infof(message)
|
||||
m.synchronize(diskInfoProvider, podFunc)
|
||||
}
|
||||
if m.config.KernelMemcgNotification {
|
||||
for _, threshold := range m.config.Thresholds {
|
||||
if threshold.Signal == evictionapi.SignalMemoryAvailable || threshold.Signal == evictionapi.SignalAllocatableMemoryAvailable {
|
||||
notifier, err := NewMemoryThresholdNotifier(threshold, m.config.PodCgroupRoot, &CgroupNotifierFactory{}, thresholdHandler)
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: failed to create memory threshold notifier: %v", err)
|
||||
} else {
|
||||
go notifier.Start()
|
||||
m.thresholdNotifiers = append(m.thresholdNotifiers, notifier)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// start the eviction manager monitoring
|
||||
go func() {
|
||||
for {
|
||||
@ -184,39 +214,6 @@ func (m *managerImpl) IsUnderPIDPressure() bool {
|
||||
return hasNodeCondition(m.nodeConditions, v1.NodePIDPressure)
|
||||
}
|
||||
|
||||
func startMemoryThresholdNotifier(thresholds []evictionapi.Threshold, observations signalObservations, hard bool, handler thresholdNotifierHandlerFunc) error {
|
||||
for _, threshold := range thresholds {
|
||||
if threshold.Signal != evictionapi.SignalMemoryAvailable || hard != isHardEvictionThreshold(threshold) {
|
||||
continue
|
||||
}
|
||||
observed, found := observations[evictionapi.SignalMemoryAvailable]
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
cgroups, err := cm.GetCgroupSubsystems()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO add support for eviction from --cgroup-root
|
||||
cgpath, found := cgroups.MountPoints["memory"]
|
||||
if !found || len(cgpath) == 0 {
|
||||
return fmt.Errorf("memory cgroup mount point not found")
|
||||
}
|
||||
attribute := "memory.usage_in_bytes"
|
||||
quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity)
|
||||
usageThreshold := resource.NewQuantity(observed.capacity.Value(), resource.DecimalSI)
|
||||
usageThreshold.Sub(*quantity)
|
||||
description := fmt.Sprintf("<%s available", formatThresholdValue(threshold.Value))
|
||||
memcgThresholdNotifier, err := NewMemCGThresholdNotifier(cgpath, attribute, usageThreshold.String(), description, handler)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
go memcgThresholdNotifier.Start(wait.NeverStop)
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// synchronize is the main control loop that enforces eviction thresholds.
|
||||
// Returns the pod that was killed, or nil if no pod was killed.
|
||||
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
|
||||
@ -235,8 +232,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
return nil
|
||||
}
|
||||
m.dedicatedImageFs = &hasImageFs
|
||||
m.resourceToRankFunc = buildResourceToRankFunc(hasImageFs)
|
||||
m.resourceToNodeReclaimFuncs = buildResourceToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs)
|
||||
m.signalToRankFunc = buildSignalToRankFunc(hasImageFs)
|
||||
m.signalToNodeReclaimFuncs = buildSignalToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs)
|
||||
}
|
||||
|
||||
activePods := podFunc()
|
||||
@ -247,33 +244,19 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
return nil
|
||||
}
|
||||
|
||||
if m.clock.Since(m.thresholdsLastUpdated) > notifierRefreshInterval {
|
||||
m.thresholdsLastUpdated = m.clock.Now()
|
||||
for _, notifier := range m.thresholdNotifiers {
|
||||
if err := notifier.UpdateThreshold(summary); err != nil {
|
||||
glog.Warningf("eviction manager: failed to update %s: %v", notifier.Description(), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// make observations and get a function to derive pod usage stats relative to those observations.
|
||||
observations, statsFunc := makeSignalObservations(summary)
|
||||
debugLogObservations("observations", observations)
|
||||
|
||||
// attempt to create a threshold notifier to improve eviction response time
|
||||
if m.config.KernelMemcgNotification && !m.notifiersInitialized {
|
||||
glog.Infof("eviction manager attempting to integrate with kernel memcg notification api")
|
||||
m.notifiersInitialized = true
|
||||
// start soft memory notification
|
||||
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) {
|
||||
glog.Infof("soft memory eviction threshold crossed at %s", desc)
|
||||
// TODO wait grace period for soft memory limit
|
||||
m.synchronize(diskInfoProvider, podFunc)
|
||||
})
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: failed to create soft memory threshold notifier: %v", err)
|
||||
}
|
||||
// start hard memory notification
|
||||
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, true, func(desc string) {
|
||||
glog.Infof("hard memory eviction threshold crossed at %s", desc)
|
||||
m.synchronize(diskInfoProvider, podFunc)
|
||||
})
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: failed to create hard memory threshold notifier: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// determine the set of thresholds met independent of grace period
|
||||
thresholds = thresholdsMet(thresholds, observations, false)
|
||||
debugLogThresholdsWithObservation("thresholds - ignoring grace period", thresholds, observations)
|
||||
@ -330,26 +313,26 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
}
|
||||
}
|
||||
|
||||
// determine the set of resources under starvation
|
||||
starvedResources := getStarvedResources(thresholds)
|
||||
if len(starvedResources) == 0 {
|
||||
if len(thresholds) == 0 {
|
||||
glog.V(3).Infof("eviction manager: no resources are starved")
|
||||
return nil
|
||||
}
|
||||
|
||||
// rank the resources to reclaim by eviction priority
|
||||
sort.Sort(byEvictionPriority(starvedResources))
|
||||
resourceToReclaim := starvedResources[0]
|
||||
// rank the thresholds by eviction priority
|
||||
sort.Sort(byEvictionPriority(thresholds))
|
||||
thresholdToReclaim := thresholds[0]
|
||||
resourceToReclaim, found := signalToResource[thresholdToReclaim.Signal]
|
||||
if !found {
|
||||
glog.V(3).Infof("eviction manager: threshold %s was crossed, but reclaim is not implemented for this threshold.", thresholdToReclaim.Signal)
|
||||
return nil
|
||||
}
|
||||
glog.Warningf("eviction manager: attempting to reclaim %v", resourceToReclaim)
|
||||
|
||||
// determine if this is a soft or hard eviction associated with the resource
|
||||
softEviction := isSoftEvictionThresholds(thresholds, resourceToReclaim)
|
||||
|
||||
// record an event about the resources we are now attempting to reclaim via eviction
|
||||
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
|
||||
|
||||
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
|
||||
if m.reclaimNodeLevelResources(resourceToReclaim) {
|
||||
if m.reclaimNodeLevelResources(thresholdToReclaim.Signal, resourceToReclaim) {
|
||||
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
|
||||
return nil
|
||||
}
|
||||
@ -357,9 +340,9 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
glog.Infof("eviction manager: must evict pod(s) to reclaim %v", resourceToReclaim)
|
||||
|
||||
// rank the pods for eviction
|
||||
rank, ok := m.resourceToRankFunc[resourceToReclaim]
|
||||
rank, ok := m.signalToRankFunc[thresholdToReclaim.Signal]
|
||||
if !ok {
|
||||
glog.Errorf("eviction manager: no ranking function for resource %s", resourceToReclaim)
|
||||
glog.Errorf("eviction manager: no ranking function for signal %s", thresholdToReclaim.Signal)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -385,30 +368,14 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
// we kill at most a single pod during each eviction interval
|
||||
for i := range activePods {
|
||||
pod := activePods[i]
|
||||
// If the pod is marked as critical and static, and support for critical pod annotations is enabled,
|
||||
// do not evict such pods. Static pods are not re-admitted after evictions.
|
||||
// https://github.com/kubernetes/kubernetes/issues/40573 has more details.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
|
||||
kubelettypes.IsCriticalPod(pod) && kubepod.IsStaticPod(pod) {
|
||||
continue
|
||||
}
|
||||
status := v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
Message: fmt.Sprintf(message, resourceToReclaim),
|
||||
Reason: reason,
|
||||
}
|
||||
// record that we are evicting the pod
|
||||
m.recorder.Eventf(pod, v1.EventTypeWarning, reason, fmt.Sprintf(message, resourceToReclaim))
|
||||
gracePeriodOverride := int64(0)
|
||||
if softEviction {
|
||||
if !isHardEvictionThreshold(thresholdToReclaim) {
|
||||
gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
|
||||
}
|
||||
// this is a blocking call and should only return when the pod and its containers are killed.
|
||||
err := m.killPodFunc(pod, status, &gracePeriodOverride)
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: error while evicting pod %s: %v", format.Pod(pod), err)
|
||||
message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc)
|
||||
if m.evictPod(pod, gracePeriodOverride, message, annotations) {
|
||||
return []*v1.Pod{pod}
|
||||
}
|
||||
return []*v1.Pod{pod}
|
||||
}
|
||||
glog.Infof("eviction manager: unable to evict any pods from the node")
|
||||
return nil
|
||||
@ -416,13 +383,15 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
||||
|
||||
func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods []*v1.Pod) {
|
||||
timeout := m.clock.NewTimer(podCleanupTimeout)
|
||||
tick := m.clock.Tick(podCleanupPollFreq)
|
||||
defer timeout.Stop()
|
||||
ticker := m.clock.NewTicker(podCleanupPollFreq)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-timeout.C():
|
||||
glog.Warningf("eviction manager: timed out waiting for pods %s to be cleaned up", format.Pods(pods))
|
||||
return
|
||||
case <-tick:
|
||||
case <-ticker.C():
|
||||
for i, pod := range pods {
|
||||
if !podCleanedUpFunc(pod) {
|
||||
break
|
||||
@ -437,8 +406,8 @@ func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods
|
||||
}
|
||||
|
||||
// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
|
||||
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName) bool {
|
||||
nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim]
|
||||
func (m *managerImpl) reclaimNodeLevelResources(signalToReclaim evictionapi.Signal, resourceToReclaim v1.ResourceName) bool {
|
||||
nodeReclaimFuncs := m.signalToNodeReclaimFuncs[signalToReclaim]
|
||||
for _, nodeReclaimFunc := range nodeReclaimFuncs {
|
||||
// attempt to reclaim the pressured resource.
|
||||
if err := nodeReclaimFunc(); err != nil {
|
||||
@ -509,7 +478,7 @@ func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.
|
||||
used := podVolumeUsed[pod.Spec.Volumes[i].Name]
|
||||
if used != nil && size != nil && size.Sign() == 1 && used.Cmp(*size) > 0 {
|
||||
// the emptyDir usage exceeds the size limit, evict the pod
|
||||
return m.evictPod(pod, v1.ResourceName("EmptyDir"), fmt.Sprintf("emptyDir usage exceeds the limit %q", size.String()))
|
||||
return m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -537,10 +506,11 @@ func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStat
|
||||
return false
|
||||
}
|
||||
|
||||
podEphemeralStorageTotalUsage.Add(podEphemeralUsage[resourceDisk])
|
||||
if podEphemeralStorageTotalUsage.Cmp(podLimits[v1.ResourceEphemeralStorage]) > 0 {
|
||||
podEphemeralStorageTotalUsage.Add(podEphemeralUsage[v1.ResourceEphemeralStorage])
|
||||
podEphemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage]
|
||||
if podEphemeralStorageTotalUsage.Cmp(podEphemeralStorageLimit) > 0 {
|
||||
// the total usage of pod exceeds the total size limit of containers, evict the pod
|
||||
return m.evictPod(pod, v1.ResourceEphemeralStorage, fmt.Sprintf("pod ephemeral local storage usage exceeds the total limit of containers %v", podLimits[v1.ResourceEphemeralStorage]))
|
||||
return m.evictPod(pod, 0, fmt.Sprintf(podEphemeralStorageMessageFmt, podEphemeralStorageLimit.String()), nil)
|
||||
}
|
||||
return false
|
||||
}
|
||||
@ -562,7 +532,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P
|
||||
|
||||
if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
|
||||
if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
|
||||
return m.evictPod(pod, v1.ResourceEphemeralStorage, fmt.Sprintf("container's ephemeral local storage usage exceeds the limit %q", ephemeralStorageThreshold.String()))
|
||||
return m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil)
|
||||
|
||||
}
|
||||
}
|
||||
@ -570,21 +540,24 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *managerImpl) evictPod(pod *v1.Pod, resourceName v1.ResourceName, evictMsg string) bool {
|
||||
func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg string, annotations map[string]string) bool {
|
||||
// If the pod is marked as critical and static, and support for critical pod annotations is enabled,
|
||||
// do not evict such pods. Static pods are not re-admitted after evictions.
|
||||
// https://github.com/kubernetes/kubernetes/issues/40573 has more details.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
|
||||
kubelettypes.IsCriticalPod(pod) && kubepod.IsStaticPod(pod) {
|
||||
glog.Errorf("eviction manager: cannot evict a critical pod %s", format.Pod(pod))
|
||||
glog.Errorf("eviction manager: cannot evict a critical static pod %s", format.Pod(pod))
|
||||
return false
|
||||
}
|
||||
status := v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
Message: fmt.Sprintf(message, resourceName),
|
||||
Reason: reason,
|
||||
Message: evictMsg,
|
||||
Reason: Reason,
|
||||
}
|
||||
// record that we are evicting the pod
|
||||
m.recorder.Eventf(pod, v1.EventTypeWarning, reason, evictMsg)
|
||||
gracePeriod := int64(0)
|
||||
err := m.killPodFunc(pod, status, &gracePeriod)
|
||||
m.recorder.AnnotatedEventf(pod, annotations, v1.EventTypeWarning, Reason, evictMsg)
|
||||
// this is a blocking call and should only return when the pod and its containers are killed.
|
||||
err := m.killPodFunc(pod, status, &gracePeriodOverride)
|
||||
if err != nil {
|
||||
glog.Errorf("eviction manager: pod %s failed to evict %v", format.Pod(pod), err)
|
||||
} else {
|
||||
|
72
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/eviction_manager_test.go
generated
vendored
72
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/eviction_manager_test.go
generated
vendored
@ -17,6 +17,7 @@ limitations under the License.
|
||||
package eviction
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -1434,3 +1435,74 @@ func TestAllocatableMemoryPressure(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateMemcgThreshold(t *testing.T) {
|
||||
activePodsFunc := func() []*v1.Pod {
|
||||
return []*v1.Pod{}
|
||||
}
|
||||
|
||||
fakeClock := clock.NewFakeClock(time.Now())
|
||||
podKiller := &mockPodKiller{}
|
||||
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
|
||||
diskGC := &mockDiskGC{err: nil}
|
||||
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
|
||||
|
||||
config := Config{
|
||||
MaxPodGracePeriodSeconds: 5,
|
||||
PressureTransitionPeriod: time.Minute * 5,
|
||||
Thresholds: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("1Gi"),
|
||||
},
|
||||
},
|
||||
},
|
||||
PodCgroupRoot: "kubepods",
|
||||
}
|
||||
summaryProvider := &fakeSummaryProvider{result: makeMemoryStats("2Gi", map[*v1.Pod]statsapi.PodStats{})}
|
||||
|
||||
thresholdNotifier := &MockThresholdNotifier{}
|
||||
thresholdNotifier.On("UpdateThreshold", summaryProvider.result).Return(nil).Twice()
|
||||
|
||||
manager := &managerImpl{
|
||||
clock: fakeClock,
|
||||
killPodFunc: podKiller.killPodNow,
|
||||
imageGC: diskGC,
|
||||
containerGC: diskGC,
|
||||
config: config,
|
||||
recorder: &record.FakeRecorder{},
|
||||
summaryProvider: summaryProvider,
|
||||
nodeRef: nodeRef,
|
||||
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
|
||||
thresholdsFirstObservedAt: thresholdsObservedAt{},
|
||||
thresholdNotifiers: []ThresholdNotifier{thresholdNotifier},
|
||||
}
|
||||
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
// The UpdateThreshold method should have been called once, since this is the first run.
|
||||
thresholdNotifier.AssertNumberOfCalls(t, "UpdateThreshold", 1)
|
||||
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
// The UpdateThreshold method should not have been called again, since not enough time has passed
|
||||
thresholdNotifier.AssertNumberOfCalls(t, "UpdateThreshold", 1)
|
||||
|
||||
fakeClock.Step(2 * notifierRefreshInterval)
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
// The UpdateThreshold method should be called again since enough time has passed
|
||||
thresholdNotifier.AssertNumberOfCalls(t, "UpdateThreshold", 2)
|
||||
|
||||
// new memory threshold notifier that returns an error
|
||||
thresholdNotifier = &MockThresholdNotifier{}
|
||||
thresholdNotifier.On("UpdateThreshold", summaryProvider.result).Return(fmt.Errorf("error updating threshold"))
|
||||
thresholdNotifier.On("Description").Return("mock thresholdNotifier").Once()
|
||||
manager.thresholdNotifiers = []ThresholdNotifier{thresholdNotifier}
|
||||
|
||||
fakeClock.Step(2 * notifierRefreshInterval)
|
||||
manager.synchronize(diskInfoProvider, activePodsFunc)
|
||||
// The UpdateThreshold method should be called because at least notifierRefreshInterval time has passed.
|
||||
// The Description method should be called because UpdateThreshold returned an error
|
||||
thresholdNotifier.AssertNumberOfCalls(t, "UpdateThreshold", 1)
|
||||
thresholdNotifier.AssertNumberOfCalls(t, "Description", 1)
|
||||
}
|
||||
|
230
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/helpers.go
generated
vendored
230
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/helpers.go
generated
vendored
@ -36,22 +36,26 @@ import (
|
||||
|
||||
const (
|
||||
unsupportedEvictionSignal = "unsupported eviction signal %v"
|
||||
// the reason reported back in status.
|
||||
reason = "Evicted"
|
||||
// the message associated with the reason.
|
||||
message = "The node was low on resource: %v."
|
||||
// disk, in bytes. internal to this module, used to account for local disk usage.
|
||||
resourceDisk v1.ResourceName = "disk"
|
||||
// Reason is the reason reported back in status.
|
||||
Reason = "Evicted"
|
||||
// nodeLowMessageFmt is the message for evictions due to resource pressure.
|
||||
nodeLowMessageFmt = "The node was low on resource: %v. "
|
||||
// containerMessageFmt provides additional information for containers exceeding requests
|
||||
containerMessageFmt = "Container %s was using %s, which exceeds its request of %s. "
|
||||
// containerEphemeralStorageMessageFmt provides additional information for containers which have exceeded their ES limit
|
||||
containerEphemeralStorageMessageFmt = "Container %s exceeded its local ephemeral storage limit %q. "
|
||||
// podEphemeralStorageMessageFmt provides additional information for pods which have exceeded their ES limit
|
||||
podEphemeralStorageMessageFmt = "Pod ephemeral local storage usage exceeds the total limit of containers %s. "
|
||||
// emptyDirMessageFmt provides additional information for empty-dir volumes which have exceeded their size limit
|
||||
emptyDirMessageFmt = "Usage of EmptyDir volume %q exceeds the limit %q. "
|
||||
// inodes, number. internal to this module, used to account for local disk inode consumption.
|
||||
resourceInodes v1.ResourceName = "inodes"
|
||||
// imagefs, in bytes. internal to this module, used to account for local image filesystem usage.
|
||||
resourceImageFs v1.ResourceName = "imagefs"
|
||||
// imagefs inodes, number. internal to this module, used to account for local image filesystem inodes.
|
||||
resourceImageFsInodes v1.ResourceName = "imagefsInodes"
|
||||
// nodefs, in bytes. internal to this module, used to account for local node root filesystem usage.
|
||||
resourceNodeFs v1.ResourceName = "nodefs"
|
||||
// nodefs inodes, number. internal to this module, used to account for local node root filesystem inodes.
|
||||
resourceNodeFsInodes v1.ResourceName = "nodefsInodes"
|
||||
// OffendingContainersKey is the key in eviction event annotations for the list of container names which exceeded their requests
|
||||
OffendingContainersKey = "offending_containers"
|
||||
// OffendingContainersUsageKey is the key in eviction event annotations for the list of usage of containers which exceeded their requests
|
||||
OffendingContainersUsageKey = "offending_containers_usage"
|
||||
// StarvedResourceKey is the key for the starved resource in eviction event annotations
|
||||
StarvedResourceKey = "starved_resource"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -59,8 +63,6 @@ var (
|
||||
signalToNodeCondition map[evictionapi.Signal]v1.NodeConditionType
|
||||
// signalToResource maps a Signal to its associated Resource.
|
||||
signalToResource map[evictionapi.Signal]v1.ResourceName
|
||||
// resourceClaimToSignal maps a Resource that can be reclaimed to its associated Signal
|
||||
resourceClaimToSignal map[v1.ResourceName][]evictionapi.Signal
|
||||
)
|
||||
|
||||
func init() {
|
||||
@ -78,17 +80,10 @@ func init() {
|
||||
signalToResource = map[evictionapi.Signal]v1.ResourceName{}
|
||||
signalToResource[evictionapi.SignalMemoryAvailable] = v1.ResourceMemory
|
||||
signalToResource[evictionapi.SignalAllocatableMemoryAvailable] = v1.ResourceMemory
|
||||
signalToResource[evictionapi.SignalImageFsAvailable] = resourceImageFs
|
||||
signalToResource[evictionapi.SignalImageFsInodesFree] = resourceImageFsInodes
|
||||
signalToResource[evictionapi.SignalNodeFsAvailable] = resourceNodeFs
|
||||
signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceNodeFsInodes
|
||||
|
||||
// maps resource to signals (the following resource could be reclaimed)
|
||||
resourceClaimToSignal = map[v1.ResourceName][]evictionapi.Signal{}
|
||||
resourceClaimToSignal[resourceNodeFs] = []evictionapi.Signal{evictionapi.SignalNodeFsAvailable}
|
||||
resourceClaimToSignal[resourceImageFs] = []evictionapi.Signal{evictionapi.SignalImageFsAvailable}
|
||||
resourceClaimToSignal[resourceNodeFsInodes] = []evictionapi.Signal{evictionapi.SignalNodeFsInodesFree}
|
||||
resourceClaimToSignal[resourceImageFsInodes] = []evictionapi.Signal{evictionapi.SignalImageFsInodesFree}
|
||||
signalToResource[evictionapi.SignalImageFsAvailable] = v1.ResourceEphemeralStorage
|
||||
signalToResource[evictionapi.SignalImageFsInodesFree] = resourceInodes
|
||||
signalToResource[evictionapi.SignalNodeFsAvailable] = v1.ResourceEphemeralStorage
|
||||
signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceInodes
|
||||
}
|
||||
|
||||
// validSignal returns true if the signal is supported.
|
||||
@ -305,10 +300,10 @@ func diskUsage(fsStats *statsapi.FsStats) *resource.Quantity {
|
||||
// inodeUsage converts inodes consumed into a resource quantity.
|
||||
func inodeUsage(fsStats *statsapi.FsStats) *resource.Quantity {
|
||||
if fsStats == nil || fsStats.InodesUsed == nil {
|
||||
return &resource.Quantity{Format: resource.BinarySI}
|
||||
return &resource.Quantity{Format: resource.DecimalSI}
|
||||
}
|
||||
usage := int64(*fsStats.InodesUsed)
|
||||
return resource.NewQuantity(usage, resource.BinarySI)
|
||||
return resource.NewQuantity(usage, resource.DecimalSI)
|
||||
}
|
||||
|
||||
// memoryUsage converts working set into a resource quantity.
|
||||
@ -338,7 +333,7 @@ func localVolumeNames(pod *v1.Pod) []string {
|
||||
// containerUsage aggregates container disk usage and inode consumption for the specified stats to measure.
|
||||
func containerUsage(podStats statsapi.PodStats, statsToMeasure []fsStatsType) v1.ResourceList {
|
||||
disk := resource.Quantity{Format: resource.BinarySI}
|
||||
inodes := resource.Quantity{Format: resource.BinarySI}
|
||||
inodes := resource.Quantity{Format: resource.DecimalSI}
|
||||
for _, container := range podStats.Containers {
|
||||
if hasFsStatsType(statsToMeasure, fsStatsRoot) {
|
||||
disk.Add(*diskUsage(container.Rootfs))
|
||||
@ -350,15 +345,15 @@ func containerUsage(podStats statsapi.PodStats, statsToMeasure []fsStatsType) v1
|
||||
}
|
||||
}
|
||||
return v1.ResourceList{
|
||||
resourceDisk: disk,
|
||||
resourceInodes: inodes,
|
||||
v1.ResourceEphemeralStorage: disk,
|
||||
resourceInodes: inodes,
|
||||
}
|
||||
}
|
||||
|
||||
// podLocalVolumeUsage aggregates pod local volumes disk usage and inode consumption for the specified stats to measure.
|
||||
func podLocalVolumeUsage(volumeNames []string, podStats statsapi.PodStats) v1.ResourceList {
|
||||
disk := resource.Quantity{Format: resource.BinarySI}
|
||||
inodes := resource.Quantity{Format: resource.BinarySI}
|
||||
inodes := resource.Quantity{Format: resource.DecimalSI}
|
||||
for _, volumeName := range volumeNames {
|
||||
for _, volumeStats := range podStats.VolumeStats {
|
||||
if volumeStats.Name == volumeName {
|
||||
@ -369,29 +364,29 @@ func podLocalVolumeUsage(volumeNames []string, podStats statsapi.PodStats) v1.Re
|
||||
}
|
||||
}
|
||||
return v1.ResourceList{
|
||||
resourceDisk: disk,
|
||||
resourceInodes: inodes,
|
||||
v1.ResourceEphemeralStorage: disk,
|
||||
resourceInodes: inodes,
|
||||
}
|
||||
}
|
||||
|
||||
// podDiskUsage aggregates pod disk usage and inode consumption for the specified stats to measure.
|
||||
func podDiskUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsStatsType) (v1.ResourceList, error) {
|
||||
disk := resource.Quantity{Format: resource.BinarySI}
|
||||
inodes := resource.Quantity{Format: resource.BinarySI}
|
||||
inodes := resource.Quantity{Format: resource.DecimalSI}
|
||||
|
||||
containerUsageList := containerUsage(podStats, statsToMeasure)
|
||||
disk.Add(containerUsageList[resourceDisk])
|
||||
disk.Add(containerUsageList[v1.ResourceEphemeralStorage])
|
||||
inodes.Add(containerUsageList[resourceInodes])
|
||||
|
||||
if hasFsStatsType(statsToMeasure, fsStatsLocalVolumeSource) {
|
||||
volumeNames := localVolumeNames(pod)
|
||||
podLocalVolumeUsageList := podLocalVolumeUsage(volumeNames, podStats)
|
||||
disk.Add(podLocalVolumeUsageList[resourceDisk])
|
||||
disk.Add(podLocalVolumeUsageList[v1.ResourceEphemeralStorage])
|
||||
inodes.Add(podLocalVolumeUsageList[resourceInodes])
|
||||
}
|
||||
return v1.ResourceList{
|
||||
resourceDisk: disk,
|
||||
resourceInodes: inodes,
|
||||
v1.ResourceEphemeralStorage: disk,
|
||||
resourceInodes: inodes,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -421,21 +416,21 @@ func localEphemeralVolumeNames(pod *v1.Pod) []string {
|
||||
// podLocalEphemeralStorageUsage aggregates pod local ephemeral storage usage and inode consumption for the specified stats to measure.
|
||||
func podLocalEphemeralStorageUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsStatsType) (v1.ResourceList, error) {
|
||||
disk := resource.Quantity{Format: resource.BinarySI}
|
||||
inodes := resource.Quantity{Format: resource.BinarySI}
|
||||
inodes := resource.Quantity{Format: resource.DecimalSI}
|
||||
|
||||
containerUsageList := containerUsage(podStats, statsToMeasure)
|
||||
disk.Add(containerUsageList[resourceDisk])
|
||||
disk.Add(containerUsageList[v1.ResourceEphemeralStorage])
|
||||
inodes.Add(containerUsageList[resourceInodes])
|
||||
|
||||
if hasFsStatsType(statsToMeasure, fsStatsLocalVolumeSource) {
|
||||
volumeNames := localEphemeralVolumeNames(pod)
|
||||
podLocalVolumeUsageList := podLocalVolumeUsage(volumeNames, podStats)
|
||||
disk.Add(podLocalVolumeUsageList[resourceDisk])
|
||||
disk.Add(podLocalVolumeUsageList[v1.ResourceEphemeralStorage])
|
||||
inodes.Add(podLocalVolumeUsageList[resourceInodes])
|
||||
}
|
||||
return v1.ResourceList{
|
||||
resourceDisk: disk,
|
||||
resourceInodes: inodes,
|
||||
v1.ResourceEphemeralStorage: disk,
|
||||
resourceInodes: inodes,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -600,7 +595,7 @@ func memory(stats statsFunc) cmpFunc {
|
||||
// max(max of init container requests, sum of container requests)
|
||||
func podRequest(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity {
|
||||
containerValue := resource.Quantity{Format: resource.BinarySI}
|
||||
if resourceName == resourceDisk && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk
|
||||
return containerValue
|
||||
}
|
||||
@ -608,7 +603,7 @@ func podRequest(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity {
|
||||
switch resourceName {
|
||||
case v1.ResourceMemory:
|
||||
containerValue.Add(*pod.Spec.Containers[i].Resources.Requests.Memory())
|
||||
case resourceDisk:
|
||||
case v1.ResourceEphemeralStorage:
|
||||
containerValue.Add(*pod.Spec.Containers[i].Resources.Requests.StorageEphemeral())
|
||||
}
|
||||
}
|
||||
@ -619,7 +614,7 @@ func podRequest(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity {
|
||||
if initValue.Cmp(*pod.Spec.InitContainers[i].Resources.Requests.Memory()) < 0 {
|
||||
initValue = *pod.Spec.InitContainers[i].Resources.Requests.Memory()
|
||||
}
|
||||
case resourceDisk:
|
||||
case v1.ResourceEphemeralStorage:
|
||||
if initValue.Cmp(*pod.Spec.InitContainers[i].Resources.Requests.StorageEphemeral()) < 0 {
|
||||
initValue = *pod.Spec.InitContainers[i].Resources.Requests.StorageEphemeral()
|
||||
}
|
||||
@ -676,9 +671,9 @@ func disk(stats statsFunc, fsStatsToMeasure []fsStatsType, diskResource v1.Resou
|
||||
// adjust p1, p2 usage relative to the request (if any)
|
||||
p1Disk := p1Usage[diskResource]
|
||||
p2Disk := p2Usage[diskResource]
|
||||
p1Request := podRequest(p1, resourceDisk)
|
||||
p1Request := podRequest(p1, v1.ResourceEphemeralStorage)
|
||||
p1Disk.Sub(p1Request)
|
||||
p2Request := podRequest(p2, resourceDisk)
|
||||
p2Request := podRequest(p2, v1.ResourceEphemeralStorage)
|
||||
p2Disk.Sub(p2Request)
|
||||
// prioritize evicting the pod which has the larger consumption of disk
|
||||
return p2Disk.Cmp(p1Disk)
|
||||
@ -711,14 +706,15 @@ func rankDiskPressureFunc(fsStatsToMeasure []fsStatsType, diskResource v1.Resour
|
||||
}
|
||||
|
||||
// byEvictionPriority implements sort.Interface for []v1.ResourceName.
|
||||
type byEvictionPriority []v1.ResourceName
|
||||
type byEvictionPriority []evictionapi.Threshold
|
||||
|
||||
func (a byEvictionPriority) Len() int { return len(a) }
|
||||
func (a byEvictionPriority) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
|
||||
// Less ranks memory before all other resources.
|
||||
// Less ranks memory before all other resources, and ranks thresholds with no resource to reclaim last
|
||||
func (a byEvictionPriority) Less(i, j int) bool {
|
||||
return a[i] == v1.ResourceMemory
|
||||
_, jSignalHasResource := signalToResource[a[j].Signal]
|
||||
return a[i].Signal == evictionapi.SignalMemoryAvailable || a[i].Signal == evictionapi.SignalAllocatableMemoryAvailable || !jSignalHasResource
|
||||
}
|
||||
|
||||
// makeSignalObservations derives observations using the specified summary provider.
|
||||
@ -756,8 +752,8 @@ func makeSignalObservations(summary *statsapi.Summary) (signalObservations, stat
|
||||
}
|
||||
if nodeFs.InodesFree != nil && nodeFs.Inodes != nil {
|
||||
result[evictionapi.SignalNodeFsInodesFree] = signalObservation{
|
||||
available: resource.NewQuantity(int64(*nodeFs.InodesFree), resource.BinarySI),
|
||||
capacity: resource.NewQuantity(int64(*nodeFs.Inodes), resource.BinarySI),
|
||||
available: resource.NewQuantity(int64(*nodeFs.InodesFree), resource.DecimalSI),
|
||||
capacity: resource.NewQuantity(int64(*nodeFs.Inodes), resource.DecimalSI),
|
||||
time: nodeFs.Time,
|
||||
}
|
||||
}
|
||||
@ -772,8 +768,8 @@ func makeSignalObservations(summary *statsapi.Summary) (signalObservations, stat
|
||||
}
|
||||
if imageFs.InodesFree != nil && imageFs.Inodes != nil {
|
||||
result[evictionapi.SignalImageFsInodesFree] = signalObservation{
|
||||
available: resource.NewQuantity(int64(*imageFs.InodesFree), resource.BinarySI),
|
||||
capacity: resource.NewQuantity(int64(*imageFs.Inodes), resource.BinarySI),
|
||||
available: resource.NewQuantity(int64(*imageFs.InodesFree), resource.DecimalSI),
|
||||
capacity: resource.NewQuantity(int64(*imageFs.Inodes), resource.DecimalSI),
|
||||
time: imageFs.Time,
|
||||
}
|
||||
}
|
||||
@ -1001,82 +997,102 @@ func compareThresholdValue(a evictionapi.ThresholdValue, b evictionapi.Threshold
|
||||
return a.Percentage == b.Percentage
|
||||
}
|
||||
|
||||
// getStarvedResources returns the set of resources that are starved based on thresholds met.
|
||||
func getStarvedResources(thresholds []evictionapi.Threshold) []v1.ResourceName {
|
||||
results := []v1.ResourceName{}
|
||||
for _, threshold := range thresholds {
|
||||
if starvedResource, found := signalToResource[threshold.Signal]; found {
|
||||
results = append(results, starvedResource)
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
// isSoftEviction returns true if the thresholds met for the starved resource are only soft thresholds
|
||||
func isSoftEvictionThresholds(thresholds []evictionapi.Threshold, starvedResource v1.ResourceName) bool {
|
||||
for _, threshold := range thresholds {
|
||||
if resourceToCheck := signalToResource[threshold.Signal]; resourceToCheck != starvedResource {
|
||||
continue
|
||||
}
|
||||
if isHardEvictionThreshold(threshold) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// isHardEvictionThreshold returns true if eviction should immediately occur
|
||||
func isHardEvictionThreshold(threshold evictionapi.Threshold) bool {
|
||||
return threshold.GracePeriod == time.Duration(0)
|
||||
}
|
||||
|
||||
// buildResourceToRankFunc returns ranking functions associated with resources
|
||||
func buildResourceToRankFunc(withImageFs bool) map[v1.ResourceName]rankFunc {
|
||||
resourceToRankFunc := map[v1.ResourceName]rankFunc{
|
||||
v1.ResourceMemory: rankMemoryPressure,
|
||||
func isAllocatableEvictionThreshold(threshold evictionapi.Threshold) bool {
|
||||
return threshold.Signal == evictionapi.SignalAllocatableMemoryAvailable
|
||||
}
|
||||
|
||||
// buildSignalToRankFunc returns ranking functions associated with resources
|
||||
func buildSignalToRankFunc(withImageFs bool) map[evictionapi.Signal]rankFunc {
|
||||
signalToRankFunc := map[evictionapi.Signal]rankFunc{
|
||||
evictionapi.SignalMemoryAvailable: rankMemoryPressure,
|
||||
evictionapi.SignalAllocatableMemoryAvailable: rankMemoryPressure,
|
||||
}
|
||||
// usage of an imagefs is optional
|
||||
if withImageFs {
|
||||
// with an imagefs, nodefs pod rank func for eviction only includes logs and local volumes
|
||||
resourceToRankFunc[resourceNodeFs] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)
|
||||
resourceToRankFunc[resourceNodeFsInodes] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
|
||||
signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
|
||||
signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
|
||||
// with an imagefs, imagefs pod rank func for eviction only includes rootfs
|
||||
resourceToRankFunc[resourceImageFs] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, resourceDisk)
|
||||
resourceToRankFunc[resourceImageFsInodes] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, resourceInodes)
|
||||
signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, v1.ResourceEphemeralStorage)
|
||||
signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, resourceInodes)
|
||||
} else {
|
||||
// without an imagefs, nodefs pod rank func for eviction looks at all fs stats.
|
||||
// since imagefs and nodefs share a common device, they share common ranking functions.
|
||||
resourceToRankFunc[resourceNodeFs] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)
|
||||
resourceToRankFunc[resourceNodeFsInodes] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
|
||||
resourceToRankFunc[resourceImageFs] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)
|
||||
resourceToRankFunc[resourceImageFsInodes] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
|
||||
signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
|
||||
signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
|
||||
signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
|
||||
signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
|
||||
}
|
||||
return resourceToRankFunc
|
||||
return signalToRankFunc
|
||||
}
|
||||
|
||||
// PodIsEvicted returns true if the reported pod status is due to an eviction.
|
||||
func PodIsEvicted(podStatus v1.PodStatus) bool {
|
||||
return podStatus.Phase == v1.PodFailed && podStatus.Reason == reason
|
||||
return podStatus.Phase == v1.PodFailed && podStatus.Reason == Reason
|
||||
}
|
||||
|
||||
// buildResourceToNodeReclaimFuncs returns reclaim functions associated with resources.
|
||||
func buildResourceToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, withImageFs bool) map[v1.ResourceName]nodeReclaimFuncs {
|
||||
resourceToReclaimFunc := map[v1.ResourceName]nodeReclaimFuncs{}
|
||||
// buildSignalToNodeReclaimFuncs returns reclaim functions associated with resources.
|
||||
func buildSignalToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, withImageFs bool) map[evictionapi.Signal]nodeReclaimFuncs {
|
||||
signalToReclaimFunc := map[evictionapi.Signal]nodeReclaimFuncs{}
|
||||
// usage of an imagefs is optional
|
||||
if withImageFs {
|
||||
// with an imagefs, nodefs pressure should just delete logs
|
||||
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{}
|
||||
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{}
|
||||
signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{}
|
||||
signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{}
|
||||
// with an imagefs, imagefs pressure should delete unused images
|
||||
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
signalToReclaimFunc[evictionapi.SignalImageFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
} else {
|
||||
// without an imagefs, nodefs pressure should delete logs, and unused images
|
||||
// since imagefs and nodefs share a common device, they share common reclaim functions
|
||||
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
signalToReclaimFunc[evictionapi.SignalImageFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
|
||||
}
|
||||
return resourceToReclaimFunc
|
||||
return signalToReclaimFunc
|
||||
}
|
||||
|
||||
// evictionMessage constructs a useful message about why an eviction occurred, and annotations to provide metadata about the eviction
|
||||
func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats statsFunc) (message string, annotations map[string]string) {
|
||||
annotations = make(map[string]string)
|
||||
message = fmt.Sprintf(nodeLowMessageFmt, resourceToReclaim)
|
||||
containers := []string{}
|
||||
containerUsage := []string{}
|
||||
podStats, ok := stats(pod)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
for _, containerStats := range podStats.Containers {
|
||||
for _, container := range pod.Spec.Containers {
|
||||
if container.Name == containerStats.Name {
|
||||
requests := container.Resources.Requests[resourceToReclaim]
|
||||
var usage *resource.Quantity
|
||||
switch resourceToReclaim {
|
||||
case v1.ResourceEphemeralStorage:
|
||||
if containerStats.Rootfs != nil && containerStats.Rootfs.UsedBytes != nil && containerStats.Logs != nil && containerStats.Logs.UsedBytes != nil {
|
||||
usage = resource.NewQuantity(int64(*containerStats.Rootfs.UsedBytes+*containerStats.Logs.UsedBytes), resource.BinarySI)
|
||||
}
|
||||
case v1.ResourceMemory:
|
||||
if containerStats.Memory != nil && containerStats.Memory.WorkingSetBytes != nil {
|
||||
usage = resource.NewQuantity(int64(*containerStats.Memory.WorkingSetBytes), resource.BinarySI)
|
||||
}
|
||||
}
|
||||
if usage != nil && usage.Cmp(requests) > 0 {
|
||||
message += fmt.Sprintf(containerMessageFmt, container.Name, usage.String(), requests.String())
|
||||
containers = append(containers, container.Name)
|
||||
containerUsage = append(containerUsage, usage.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
annotations[OffendingContainersKey] = strings.Join(containers, ",")
|
||||
annotations[OffendingContainersUsageKey] = strings.Join(containerUsage, ",")
|
||||
annotations[StarvedResourceKey] = string(resourceToReclaim)
|
||||
return
|
||||
}
|
||||
|
126
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/helpers_test.go
generated
vendored
126
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/helpers_test.go
generated
vendored
@ -19,6 +19,7 @@ package eviction
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -27,12 +28,10 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
api "k8s.io/kubernetes/pkg/apis/core"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/quota"
|
||||
)
|
||||
|
||||
func quantityMustParse(value string) *resource.Quantity {
|
||||
@ -468,7 +467,7 @@ func TestOrderedByExceedsRequestDisk(t *testing.T) {
|
||||
return result, found
|
||||
}
|
||||
pods := []*v1.Pod{below, exceeds}
|
||||
orderedBy(exceedDiskRequests(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)).Sort(pods)
|
||||
orderedBy(exceedDiskRequests(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)).Sort(pods)
|
||||
|
||||
expected := []*v1.Pod{exceeds, below}
|
||||
for i := range expected {
|
||||
@ -582,7 +581,7 @@ func TestOrderedbyDisk(t *testing.T) {
|
||||
return result, found
|
||||
}
|
||||
pods := []*v1.Pod{pod1, pod2, pod3, pod4, pod5, pod6}
|
||||
orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)).Sort(pods)
|
||||
orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)).Sort(pods)
|
||||
expected := []*v1.Pod{pod1, pod3, pod2, pod4, pod5, pod6}
|
||||
for i := range expected {
|
||||
if pods[i] != expected[i] {
|
||||
@ -649,7 +648,7 @@ func TestOrderedbyDiskDisableLocalStorage(t *testing.T) {
|
||||
return result, found
|
||||
}
|
||||
pods := []*v1.Pod{pod1, pod3, pod2, pod4, pod5, pod6}
|
||||
orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)).Sort(pods)
|
||||
orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)).Sort(pods)
|
||||
expected := []*v1.Pod{pod5, pod3, pod1, pod6, pod4, pod2}
|
||||
for i := range expected {
|
||||
if pods[i] != expected[i] {
|
||||
@ -778,7 +777,7 @@ func TestOrderedByPriorityDisk(t *testing.T) {
|
||||
pods := []*v1.Pod{pod8, pod7, pod6, pod5, pod4, pod3, pod2, pod1}
|
||||
expected := []*v1.Pod{pod1, pod2, pod3, pod4, pod5, pod6, pod7, pod8}
|
||||
fsStatsToMeasure := []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}
|
||||
orderedBy(exceedDiskRequests(statsFn, fsStatsToMeasure, resourceDisk), priority, disk(statsFn, fsStatsToMeasure, resourceDisk)).Sort(pods)
|
||||
orderedBy(exceedDiskRequests(statsFn, fsStatsToMeasure, v1.ResourceEphemeralStorage), priority, disk(statsFn, fsStatsToMeasure, v1.ResourceEphemeralStorage)).Sort(pods)
|
||||
for i := range expected {
|
||||
if pods[i] != expected[i] {
|
||||
t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name)
|
||||
@ -930,6 +929,80 @@ func TestOrderedByPriorityMemory(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSortByEvictionPriority(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
thresholds []evictionapi.Threshold
|
||||
expected []evictionapi.Threshold
|
||||
}{
|
||||
{
|
||||
name: "empty threshold list",
|
||||
thresholds: []evictionapi.Threshold{},
|
||||
expected: []evictionapi.Threshold{},
|
||||
},
|
||||
{
|
||||
name: "memory first, PID last",
|
||||
thresholds: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalPIDAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalNodeFsAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
},
|
||||
},
|
||||
expected: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalNodeFsAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalPIDAvailable,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "allocatable memory first, PID last",
|
||||
thresholds: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalPIDAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalNodeFsAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalAllocatableMemoryAvailable,
|
||||
},
|
||||
},
|
||||
expected: []evictionapi.Threshold{
|
||||
{
|
||||
Signal: evictionapi.SignalAllocatableMemoryAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalNodeFsAvailable,
|
||||
},
|
||||
{
|
||||
Signal: evictionapi.SignalPIDAvailable,
|
||||
},
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
sort.Sort(byEvictionPriority(tc.thresholds))
|
||||
for i := range tc.expected {
|
||||
if tc.thresholds[i].Signal != tc.expected[i].Signal {
|
||||
t.Errorf("At index %d, expected threshold with signal %s, but got %s", i, tc.expected[i].Signal, tc.thresholds[i].Signal)
|
||||
}
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type fakeSummaryProvider struct {
|
||||
result *statsapi.Summary
|
||||
}
|
||||
@ -1620,47 +1693,6 @@ func TestHasNodeConditions(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetStarvedResources(t *testing.T) {
|
||||
testCases := map[string]struct {
|
||||
inputs []evictionapi.Threshold
|
||||
result []v1.ResourceName
|
||||
}{
|
||||
"memory.available": {
|
||||
inputs: []evictionapi.Threshold{
|
||||
{Signal: evictionapi.SignalMemoryAvailable},
|
||||
},
|
||||
result: []v1.ResourceName{v1.ResourceMemory},
|
||||
},
|
||||
"imagefs.available": {
|
||||
inputs: []evictionapi.Threshold{
|
||||
{Signal: evictionapi.SignalImageFsAvailable},
|
||||
},
|
||||
result: []v1.ResourceName{resourceImageFs},
|
||||
},
|
||||
"nodefs.available": {
|
||||
inputs: []evictionapi.Threshold{
|
||||
{Signal: evictionapi.SignalNodeFsAvailable},
|
||||
},
|
||||
result: []v1.ResourceName{resourceNodeFs},
|
||||
},
|
||||
}
|
||||
var internalResourceNames = func(in []v1.ResourceName) []api.ResourceName {
|
||||
var out []api.ResourceName
|
||||
for _, name := range in {
|
||||
out = append(out, api.ResourceName(name))
|
||||
}
|
||||
return out
|
||||
}
|
||||
for testName, testCase := range testCases {
|
||||
actual := getStarvedResources(testCase.inputs)
|
||||
actualSet := quota.ToSet(internalResourceNames(actual))
|
||||
expectedSet := quota.ToSet(internalResourceNames(testCase.result))
|
||||
if !actualSet.Equal(expectedSet) {
|
||||
t.Errorf("Test case: %s, expected: %v, actual: %v", testName, expectedSet, actualSet)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePercentage(t *testing.T) {
|
||||
testCases := map[string]struct {
|
||||
hasError bool
|
||||
|
135
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/memory_threshold_notifier.go
generated
vendored
Normal file
135
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/memory_threshold_notifier.go
generated
vendored
Normal file
@ -0,0 +1,135 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package eviction
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
)
|
||||
|
||||
const (
|
||||
memoryUsageAttribute = "memory.usage_in_bytes"
|
||||
// this prevents constantly updating the memcg notifier if synchronize
|
||||
// is run frequently.
|
||||
notifierRefreshInterval = 10 * time.Second
|
||||
)
|
||||
|
||||
type memoryThresholdNotifier struct {
|
||||
threshold evictionapi.Threshold
|
||||
cgroupPath string
|
||||
events chan struct{}
|
||||
factory NotifierFactory
|
||||
handler func(string)
|
||||
notifier CgroupNotifier
|
||||
}
|
||||
|
||||
var _ ThresholdNotifier = &memoryThresholdNotifier{}
|
||||
|
||||
// NewMemoryThresholdNotifier creates a ThresholdNotifier which is designed to respond to the given threshold.
|
||||
// UpdateThreshold must be called once before the threshold will be active.
|
||||
func NewMemoryThresholdNotifier(threshold evictionapi.Threshold, cgroupRoot string, factory NotifierFactory, handler func(string)) (ThresholdNotifier, error) {
|
||||
cgroups, err := cm.GetCgroupSubsystems()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cgpath, found := cgroups.MountPoints["memory"]
|
||||
if !found || len(cgpath) == 0 {
|
||||
return nil, fmt.Errorf("memory cgroup mount point not found")
|
||||
}
|
||||
if isAllocatableEvictionThreshold(threshold) {
|
||||
// for allocatable thresholds, point the cgroup notifier at the allocatable cgroup
|
||||
cgpath += cgroupRoot
|
||||
}
|
||||
return &memoryThresholdNotifier{
|
||||
threshold: threshold,
|
||||
cgroupPath: cgpath,
|
||||
events: make(chan struct{}),
|
||||
handler: handler,
|
||||
factory: factory,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *memoryThresholdNotifier) Start() {
|
||||
glog.Infof("eviction manager: created %s", m.Description())
|
||||
for range m.events {
|
||||
m.handler(fmt.Sprintf("eviction manager: %s crossed", m.Description()))
|
||||
}
|
||||
}
|
||||
|
||||
func (m *memoryThresholdNotifier) UpdateThreshold(summary *statsapi.Summary) error {
|
||||
memoryStats := summary.Node.Memory
|
||||
if isAllocatableEvictionThreshold(m.threshold) {
|
||||
allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
memoryStats = allocatableContainer.Memory
|
||||
}
|
||||
if memoryStats == nil || memoryStats.UsageBytes == nil || memoryStats.WorkingSetBytes == nil || memoryStats.AvailableBytes == nil {
|
||||
return fmt.Errorf("summary was incomplete. Expected MemoryStats and all subfields to be non-nil, but got %+v", memoryStats)
|
||||
}
|
||||
// Set threshold on usage to capacity - eviction_hard + inactive_file,
|
||||
// since we want to be notified when working_set = capacity - eviction_hard
|
||||
inactiveFile := resource.NewQuantity(int64(*memoryStats.UsageBytes-*memoryStats.WorkingSetBytes), resource.BinarySI)
|
||||
capacity := resource.NewQuantity(int64(*memoryStats.AvailableBytes+*memoryStats.WorkingSetBytes), resource.BinarySI)
|
||||
evictionThresholdQuantity := evictionapi.GetThresholdQuantity(m.threshold.Value, capacity)
|
||||
memcgThreshold := capacity.DeepCopy()
|
||||
memcgThreshold.Sub(*evictionThresholdQuantity)
|
||||
memcgThreshold.Add(*inactiveFile)
|
||||
|
||||
glog.V(3).Infof("eviction manager: setting %s to %s\n", m.Description(), memcgThreshold.String())
|
||||
if m.notifier != nil {
|
||||
m.notifier.Stop()
|
||||
}
|
||||
newNotifier, err := m.factory.NewCgroupNotifier(m.cgroupPath, memoryUsageAttribute, memcgThreshold.Value())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.notifier = newNotifier
|
||||
go m.notifier.Start(m.events)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *memoryThresholdNotifier) Description() string {
|
||||
var hard, allocatable string
|
||||
if isHardEvictionThreshold(m.threshold) {
|
||||
hard = "hard "
|
||||
} else {
|
||||
hard = "soft "
|
||||
}
|
||||
if isAllocatableEvictionThreshold(m.threshold) {
|
||||
allocatable = "allocatable "
|
||||
}
|
||||
return fmt.Sprintf("%s%smemory eviction threshold", hard, allocatable)
|
||||
}
|
||||
|
||||
var _ NotifierFactory = &CgroupNotifierFactory{}
|
||||
|
||||
// CgroupNotifierFactory knows how to make CgroupNotifiers which integrate with the kernel
|
||||
type CgroupNotifierFactory struct{}
|
||||
|
||||
// NewCgroupNotifier implements the NotifierFactory interface
|
||||
func (n *CgroupNotifierFactory) NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) {
|
||||
return NewCgroupNotifier(path, attribute, threshold)
|
||||
}
|
270
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/memory_threshold_notifier_test.go
generated
vendored
Normal file
270
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/memory_threshold_notifier_test.go
generated
vendored
Normal file
@ -0,0 +1,270 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package eviction
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
)
|
||||
|
||||
const testCgroupPath = "/sys/fs/cgroups/memory"
|
||||
|
||||
func nodeSummary(available, workingSet, usage resource.Quantity, allocatable bool) *statsapi.Summary {
|
||||
availableBytes := uint64(available.Value())
|
||||
workingSetBytes := uint64(workingSet.Value())
|
||||
usageBytes := uint64(usage.Value())
|
||||
memoryStats := statsapi.MemoryStats{
|
||||
AvailableBytes: &availableBytes,
|
||||
WorkingSetBytes: &workingSetBytes,
|
||||
UsageBytes: &usageBytes,
|
||||
}
|
||||
if allocatable {
|
||||
return &statsapi.Summary{
|
||||
Node: statsapi.NodeStats{
|
||||
SystemContainers: []statsapi.ContainerStats{
|
||||
{
|
||||
Name: statsapi.SystemContainerPods,
|
||||
Memory: &memoryStats,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
return &statsapi.Summary{
|
||||
Node: statsapi.NodeStats{
|
||||
Memory: &memoryStats,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func newTestMemoryThresholdNotifier(threshold evictionapi.Threshold, factory NotifierFactory, handler func(string)) *memoryThresholdNotifier {
|
||||
return &memoryThresholdNotifier{
|
||||
threshold: threshold,
|
||||
cgroupPath: testCgroupPath,
|
||||
events: make(chan struct{}),
|
||||
factory: factory,
|
||||
handler: handler,
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateThreshold(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
available resource.Quantity
|
||||
workingSet resource.Quantity
|
||||
usage resource.Quantity
|
||||
evictionThreshold evictionapi.Threshold
|
||||
expectedThreshold resource.Quantity
|
||||
updateThresholdErr error
|
||||
expectErr bool
|
||||
}{
|
||||
{
|
||||
description: "node level threshold",
|
||||
available: resource.MustParse("3Gi"),
|
||||
usage: resource.MustParse("2Gi"),
|
||||
workingSet: resource.MustParse("1Gi"),
|
||||
evictionThreshold: evictionapi.Threshold{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("1Gi"),
|
||||
},
|
||||
},
|
||||
expectedThreshold: resource.MustParse("4Gi"),
|
||||
updateThresholdErr: nil,
|
||||
expectErr: false,
|
||||
},
|
||||
{
|
||||
description: "allocatable threshold",
|
||||
available: resource.MustParse("4Gi"),
|
||||
usage: resource.MustParse("3Gi"),
|
||||
workingSet: resource.MustParse("1Gi"),
|
||||
evictionThreshold: evictionapi.Threshold{
|
||||
Signal: evictionapi.SignalAllocatableMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("1Gi"),
|
||||
},
|
||||
},
|
||||
expectedThreshold: resource.MustParse("6Gi"),
|
||||
updateThresholdErr: nil,
|
||||
expectErr: false,
|
||||
},
|
||||
{
|
||||
description: "error updating node level threshold",
|
||||
available: resource.MustParse("3Gi"),
|
||||
usage: resource.MustParse("2Gi"),
|
||||
workingSet: resource.MustParse("1Gi"),
|
||||
evictionThreshold: evictionapi.Threshold{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("1Gi"),
|
||||
},
|
||||
},
|
||||
expectedThreshold: resource.MustParse("4Gi"),
|
||||
updateThresholdErr: fmt.Errorf("unexpected error"),
|
||||
expectErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
notifierFactory := &MockNotifierFactory{}
|
||||
notifier := &MockCgroupNotifier{}
|
||||
m := newTestMemoryThresholdNotifier(tc.evictionThreshold, notifierFactory, nil)
|
||||
notifierFactory.On("NewCgroupNotifier", testCgroupPath, memoryUsageAttribute, tc.expectedThreshold.Value()).Return(notifier, tc.updateThresholdErr)
|
||||
var events chan<- struct{}
|
||||
events = m.events
|
||||
notifier.On("Start", events).Return()
|
||||
err := m.UpdateThreshold(nodeSummary(tc.available, tc.workingSet, tc.usage, isAllocatableEvictionThreshold(tc.evictionThreshold)))
|
||||
if err != nil && !tc.expectErr {
|
||||
t.Errorf("Unexpected error updating threshold: %v", err)
|
||||
} else if err == nil && tc.expectErr {
|
||||
t.Errorf("Expected error updating threshold, but got nil")
|
||||
}
|
||||
if !tc.expectErr {
|
||||
notifierFactory.AssertNumberOfCalls(t, "NewCgroupNotifier", 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestStart(t *testing.T) {
|
||||
noResources := resource.MustParse("0")
|
||||
threshold := evictionapi.Threshold{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: &noResources,
|
||||
},
|
||||
}
|
||||
notifier := &MockCgroupNotifier{}
|
||||
notifierFactory := &MockNotifierFactory{}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(4)
|
||||
m := newTestMemoryThresholdNotifier(threshold, notifierFactory, func(string) {
|
||||
wg.Done()
|
||||
})
|
||||
notifierFactory.On("NewCgroupNotifier", testCgroupPath, memoryUsageAttribute, int64(0)).Return(notifier, nil)
|
||||
var events chan<- struct{}
|
||||
events = m.events
|
||||
notifier.On("Start", events).Return()
|
||||
notifier.On("Stop").Return()
|
||||
|
||||
err := m.UpdateThreshold(nodeSummary(noResources, noResources, noResources, isAllocatableEvictionThreshold(threshold)))
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error updating threshold: %v", err)
|
||||
}
|
||||
notifierFactory.AssertNumberOfCalls(t, "NewCgroupNotifier", 1)
|
||||
|
||||
go m.Start()
|
||||
|
||||
for i := 0; i < 4; i++ {
|
||||
m.events <- struct{}{}
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestThresholdDescription(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
evictionThreshold evictionapi.Threshold
|
||||
expectedSubstrings []string
|
||||
omittedSubstrings []string
|
||||
}{
|
||||
{
|
||||
description: "hard node level threshold",
|
||||
evictionThreshold: evictionapi.Threshold{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("2Gi"),
|
||||
},
|
||||
},
|
||||
expectedSubstrings: []string{"hard"},
|
||||
omittedSubstrings: []string{"allocatable", "soft"},
|
||||
},
|
||||
{
|
||||
description: "soft node level threshold",
|
||||
evictionThreshold: evictionapi.Threshold{
|
||||
Signal: evictionapi.SignalMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("2Gi"),
|
||||
},
|
||||
GracePeriod: time.Minute * 2,
|
||||
},
|
||||
expectedSubstrings: []string{"soft"},
|
||||
omittedSubstrings: []string{"allocatable", "hard"},
|
||||
},
|
||||
{
|
||||
description: "hard allocatable threshold",
|
||||
evictionThreshold: evictionapi.Threshold{
|
||||
Signal: evictionapi.SignalAllocatableMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("2Gi"),
|
||||
},
|
||||
GracePeriod: time.Minute * 2,
|
||||
},
|
||||
expectedSubstrings: []string{"soft", "allocatable"},
|
||||
omittedSubstrings: []string{"hard"},
|
||||
},
|
||||
{
|
||||
description: "soft allocatable threshold",
|
||||
evictionThreshold: evictionapi.Threshold{
|
||||
Signal: evictionapi.SignalAllocatableMemoryAvailable,
|
||||
Operator: evictionapi.OpLessThan,
|
||||
Value: evictionapi.ThresholdValue{
|
||||
Quantity: quantityMustParse("2Gi"),
|
||||
},
|
||||
},
|
||||
expectedSubstrings: []string{"hard", "allocatable"},
|
||||
omittedSubstrings: []string{"soft"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
m := &memoryThresholdNotifier{
|
||||
notifier: &MockCgroupNotifier{},
|
||||
threshold: tc.evictionThreshold,
|
||||
cgroupPath: testCgroupPath,
|
||||
}
|
||||
desc := m.Description()
|
||||
for _, expected := range tc.expectedSubstrings {
|
||||
if !strings.Contains(desc, expected) {
|
||||
t.Errorf("expected description for notifier with threshold %+v to contain %s, but it did not", tc.evictionThreshold, expected)
|
||||
}
|
||||
}
|
||||
for _, omitted := range tc.omittedSubstrings {
|
||||
if strings.Contains(desc, omitted) {
|
||||
t.Errorf("expected description for notifier with threshold %+v NOT to contain %s, but it did", tc.evictionThreshold, omitted)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
98
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/mock_threshold_notifier_test.go
generated
vendored
Normal file
98
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/mock_threshold_notifier_test.go
generated
vendored
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package eviction
|
||||
|
||||
import (
|
||||
mock "github.com/stretchr/testify/mock"
|
||||
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
)
|
||||
|
||||
// MockCgroupNotifier is a mock implementation of the CgroupNotifier interface
|
||||
type MockCgroupNotifier struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
// Start implements the NotifierFactory interface
|
||||
func (m *MockCgroupNotifier) Start(a0 chan<- struct{}) {
|
||||
m.Called(a0)
|
||||
}
|
||||
|
||||
// Stop implements the NotifierFactory interface
|
||||
func (m *MockCgroupNotifier) Stop() {
|
||||
m.Called()
|
||||
}
|
||||
|
||||
// MockNotifierFactory is a mock of the NotifierFactory interface
|
||||
type MockNotifierFactory struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
// NewCgroupNotifier implements the NotifierFactory interface
|
||||
func (m *MockNotifierFactory) NewCgroupNotifier(a0, a1 string, a2 int64) (CgroupNotifier, error) {
|
||||
ret := m.Called(a0, a1, a2)
|
||||
|
||||
var r0 CgroupNotifier
|
||||
if rf, ok := ret.Get(0).(func(string, string, int64) CgroupNotifier); ok {
|
||||
r0 = rf(a0, a1, a2)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).(CgroupNotifier)
|
||||
}
|
||||
}
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(string, string, int64) error); ok {
|
||||
r1 = rf(a0, a1, a2)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
// MockThresholdNotifier is a mock implementation of the ThresholdNotifier interface
|
||||
type MockThresholdNotifier struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
// Start implements the ThresholdNotifier interface
|
||||
func (m *MockThresholdNotifier) Start() {
|
||||
m.Called()
|
||||
}
|
||||
|
||||
// UpdateThreshold implements the ThresholdNotifier interface
|
||||
func (m *MockThresholdNotifier) UpdateThreshold(a0 *statsapi.Summary) error {
|
||||
ret := m.Called(a0)
|
||||
|
||||
var r0 error
|
||||
if rf, ok := ret.Get(0).(func(*statsapi.Summary) error); ok {
|
||||
r0 = rf(a0)
|
||||
} else {
|
||||
r0 = ret.Error(0)
|
||||
}
|
||||
return r0
|
||||
}
|
||||
|
||||
// Description implements the ThresholdNotifier interface
|
||||
func (m *MockThresholdNotifier) Description() string {
|
||||
ret := m.Called()
|
||||
var r0 string
|
||||
if rf, ok := ret.Get(0).(func() string); ok {
|
||||
r0 = rf()
|
||||
} else {
|
||||
r0 = ret.String(0)
|
||||
}
|
||||
return r0
|
||||
}
|
176
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/threshold_notifier_linux.go
generated
vendored
176
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/threshold_notifier_linux.go
generated
vendored
@ -18,43 +18,47 @@ package eviction
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type memcgThresholdNotifier struct {
|
||||
watchfd int
|
||||
controlfd int
|
||||
eventfd int
|
||||
handler thresholdNotifierHandlerFunc
|
||||
description string
|
||||
const (
|
||||
// eventSize is the number of bytes returned by a successful read from an eventfd
|
||||
// see http://man7.org/linux/man-pages/man2/eventfd.2.html for more information
|
||||
eventSize = 8
|
||||
// numFdEvents is the number of events we can record at once.
|
||||
// If EpollWait finds more than this, they will be missed.
|
||||
numFdEvents = 6
|
||||
)
|
||||
|
||||
type linuxCgroupNotifier struct {
|
||||
eventfd int
|
||||
epfd int
|
||||
stop chan struct{}
|
||||
stopLock sync.Mutex
|
||||
}
|
||||
|
||||
var _ ThresholdNotifier = &memcgThresholdNotifier{}
|
||||
var _ CgroupNotifier = &linuxCgroupNotifier{}
|
||||
|
||||
// NewMemCGThresholdNotifier sends notifications when a cgroup threshold
|
||||
// is crossed (in either direction) for a given cgroup attribute
|
||||
func NewMemCGThresholdNotifier(path, attribute, threshold, description string, handler thresholdNotifierHandlerFunc) (ThresholdNotifier, error) {
|
||||
watchfd, err := unix.Open(fmt.Sprintf("%s/%s", path, attribute), unix.O_RDONLY, 0)
|
||||
// NewCgroupNotifier returns a linuxCgroupNotifier, which performs cgroup control operations required
|
||||
// to receive notifications from the cgroup when the threshold is crossed in either direction.
|
||||
func NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) {
|
||||
var watchfd, eventfd, epfd, controlfd int
|
||||
var err error
|
||||
watchfd, err = unix.Open(fmt.Sprintf("%s/%s", path, attribute), unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
unix.Close(watchfd)
|
||||
}
|
||||
}()
|
||||
controlfd, err := unix.Open(fmt.Sprintf("%s/cgroup.event_control", path), unix.O_WRONLY, 0)
|
||||
defer unix.Close(watchfd)
|
||||
controlfd, err = unix.Open(fmt.Sprintf("%s/cgroup.event_control", path), unix.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
unix.Close(controlfd)
|
||||
}
|
||||
}()
|
||||
eventfd, err := unix.Eventfd(0, unix.EFD_CLOEXEC)
|
||||
defer unix.Close(controlfd)
|
||||
eventfd, err = unix.Eventfd(0, unix.EFD_CLOEXEC)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -63,55 +67,119 @@ func NewMemCGThresholdNotifier(path, attribute, threshold, description string, h
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
// Close eventfd if we get an error later in initialization
|
||||
if err != nil {
|
||||
unix.Close(eventfd)
|
||||
}
|
||||
}()
|
||||
glog.V(2).Infof("eviction: setting notification threshold to %s", threshold)
|
||||
config := fmt.Sprintf("%d %d %s", eventfd, watchfd, threshold)
|
||||
epfd, err = unix.EpollCreate1(0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if epfd < 0 {
|
||||
err = fmt.Errorf("EpollCreate1 call failed")
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
// Close epfd if we get an error later in initialization
|
||||
if err != nil {
|
||||
unix.Close(epfd)
|
||||
}
|
||||
}()
|
||||
config := fmt.Sprintf("%d %d %d", eventfd, watchfd, threshold)
|
||||
_, err = unix.Write(controlfd, []byte(config))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &memcgThresholdNotifier{
|
||||
watchfd: watchfd,
|
||||
controlfd: controlfd,
|
||||
eventfd: eventfd,
|
||||
handler: handler,
|
||||
description: description,
|
||||
return &linuxCgroupNotifier{
|
||||
eventfd: eventfd,
|
||||
epfd: epfd,
|
||||
stop: make(chan struct{}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getThresholdEvents(eventfd int, eventCh chan<- struct{}, stopCh <-chan struct{}) {
|
||||
func (n *linuxCgroupNotifier) Start(eventCh chan<- struct{}) {
|
||||
err := unix.EpollCtl(n.epfd, unix.EPOLL_CTL_ADD, n.eventfd, &unix.EpollEvent{
|
||||
Fd: int32(n.eventfd),
|
||||
Events: unix.EPOLLIN,
|
||||
})
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: error adding epoll eventfd: %v", err)
|
||||
return
|
||||
}
|
||||
for {
|
||||
buf := make([]byte, 8)
|
||||
_, err := unix.Read(eventfd, buf)
|
||||
select {
|
||||
case <-n.stop:
|
||||
return
|
||||
default:
|
||||
}
|
||||
event, err := wait(n.epfd, n.eventfd, notifierRefreshInterval)
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: error while waiting for memcg events: %v", err)
|
||||
return
|
||||
} else if !event {
|
||||
// Timeout on wait. This is expected if the threshold was not crossed
|
||||
continue
|
||||
}
|
||||
// Consume the event from the eventfd
|
||||
buf := make([]byte, eventSize)
|
||||
_, err = unix.Read(n.eventfd, buf)
|
||||
if err != nil {
|
||||
glog.Warningf("eviction manager: error reading memcg events: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case eventCh <- struct{}{}:
|
||||
case <-stopCh:
|
||||
return
|
||||
}
|
||||
eventCh <- struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *memcgThresholdNotifier) Start(stopCh <-chan struct{}) {
|
||||
eventCh := make(chan struct{})
|
||||
go getThresholdEvents(n.eventfd, eventCh, stopCh)
|
||||
for {
|
||||
select {
|
||||
case <-stopCh:
|
||||
glog.V(2).Infof("eviction: stopping threshold notifier")
|
||||
unix.Close(n.watchfd)
|
||||
unix.Close(n.controlfd)
|
||||
unix.Close(n.eventfd)
|
||||
return
|
||||
case <-eventCh:
|
||||
glog.V(2).Infof("eviction: threshold crossed")
|
||||
n.handler(n.description)
|
||||
// wait waits up to notifierRefreshInterval for an event on the Epoll FD for the
|
||||
// eventfd we are concerned about. It returns an error if one occurrs, and true
|
||||
// if the consumer should read from the eventfd.
|
||||
func wait(epfd, eventfd int, timeout time.Duration) (bool, error) {
|
||||
events := make([]unix.EpollEvent, numFdEvents+1)
|
||||
timeoutMS := int(timeout / time.Millisecond)
|
||||
n, err := unix.EpollWait(epfd, events, timeoutMS)
|
||||
if n == -1 {
|
||||
if err == unix.EINTR {
|
||||
// Interrupt, ignore the error
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
if n == 0 {
|
||||
// Timeout
|
||||
return false, nil
|
||||
}
|
||||
if n > numFdEvents {
|
||||
return false, fmt.Errorf("epoll_wait returned more events than we know what to do with")
|
||||
}
|
||||
for _, event := range events[:n] {
|
||||
if event.Fd == int32(eventfd) {
|
||||
if event.Events&unix.EPOLLHUP != 0 || event.Events&unix.EPOLLERR != 0 || event.Events&unix.EPOLLIN != 0 {
|
||||
// EPOLLHUP: should not happen, but if it does, treat it as a wakeup.
|
||||
|
||||
// EPOLLERR: If an error is waiting on the file descriptor, we should pretend
|
||||
// something is ready to read, and let unix.Read pick up the error.
|
||||
|
||||
// EPOLLIN: There is data to read.
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
// An event occurred that we don't care about.
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (n *linuxCgroupNotifier) Stop() {
|
||||
n.stopLock.Lock()
|
||||
defer n.stopLock.Unlock()
|
||||
select {
|
||||
case <-n.stop:
|
||||
// the linuxCgroupNotifier is already stopped
|
||||
return
|
||||
default:
|
||||
}
|
||||
unix.Close(n.eventfd)
|
||||
unix.Close(n.epfd)
|
||||
close(n.stop)
|
||||
}
|
||||
|
16
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/threshold_notifier_unsupported.go
generated
vendored
16
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/threshold_notifier_unsupported.go
generated
vendored
@ -18,10 +18,16 @@ limitations under the License.
|
||||
|
||||
package eviction
|
||||
|
||||
import "fmt"
|
||||
import "github.com/golang/glog"
|
||||
|
||||
// NewMemCGThresholdNotifier sends notifications when a cgroup threshold
|
||||
// is crossed (in either direction) for a given cgroup attribute
|
||||
func NewMemCGThresholdNotifier(path, attribute, threshold, description string, handler thresholdNotifierHandlerFunc) (ThresholdNotifier, error) {
|
||||
return nil, fmt.Errorf("threshold notification not supported")
|
||||
// NewCgroupNotifier creates a cgroup notifier that does nothing because cgroups do not exist on non-linux systems.
|
||||
func NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) {
|
||||
glog.V(5).Infof("cgroup notifications not supported")
|
||||
return &unsupportedThresholdNotifier{}, nil
|
||||
}
|
||||
|
||||
type unsupportedThresholdNotifier struct{}
|
||||
|
||||
func (*unsupportedThresholdNotifier) Start(_ chan<- struct{}) {}
|
||||
|
||||
func (*unsupportedThresholdNotifier) Stop() {}
|
||||
|
34
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/types.go
generated
vendored
34
vendor/k8s.io/kubernetes/pkg/kubelet/eviction/types.go
generated
vendored
@ -48,6 +48,8 @@ type Config struct {
|
||||
Thresholds []evictionapi.Threshold
|
||||
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
|
||||
KernelMemcgNotification bool
|
||||
// PodCgroupRoot is the cgroup which contains all pods.
|
||||
PodCgroupRoot string
|
||||
}
|
||||
|
||||
// Manager evaluates when an eviction threshold for node stability has been met on the node.
|
||||
@ -129,10 +131,30 @@ type nodeReclaimFunc func() error
|
||||
// nodeReclaimFuncs is an ordered list of nodeReclaimFunc
|
||||
type nodeReclaimFuncs []nodeReclaimFunc
|
||||
|
||||
// thresholdNotifierHandlerFunc is a function that takes action in response to a crossed threshold
|
||||
type thresholdNotifierHandlerFunc func(thresholdDescription string)
|
||||
|
||||
// ThresholdNotifier notifies the user when an attribute crosses a threshold value
|
||||
type ThresholdNotifier interface {
|
||||
Start(stopCh <-chan struct{})
|
||||
// CgroupNotifier generates events from cgroup events
|
||||
type CgroupNotifier interface {
|
||||
// Start causes the CgroupNotifier to begin notifying on the eventCh
|
||||
Start(eventCh chan<- struct{})
|
||||
// Stop stops all processes and cleans up file descriptors associated with the CgroupNotifier
|
||||
Stop()
|
||||
}
|
||||
|
||||
// NotifierFactory creates CgroupNotifer
|
||||
type NotifierFactory interface {
|
||||
// NewCgroupNotifier creates a CgroupNotifier that creates events when the threshold
|
||||
// on the attribute in the cgroup specified by the path is crossed.
|
||||
NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error)
|
||||
}
|
||||
|
||||
// ThresholdNotifier manages CgroupNotifiers based on memory eviction thresholds, and performs a function
|
||||
// when memory eviction thresholds are crossed
|
||||
type ThresholdNotifier interface {
|
||||
// Start calls the notifier function when the CgroupNotifier notifies the ThresholdNotifier that an event occurred
|
||||
Start()
|
||||
// UpdateThreshold updates the memory cgroup threshold based on the metrics provided.
|
||||
// Calling UpdateThreshold with recent metrics allows the ThresholdNotifier to trigger at the
|
||||
// eviction threshold more accurately
|
||||
UpdateThreshold(summary *statsapi.Summary) error
|
||||
// Description produces a relevant string describing the Memory Threshold Notifier
|
||||
Description() string
|
||||
}
|
||||
|
Reference in New Issue
Block a user