vendor updates

This commit is contained in:
Serguei Bezverkhi
2018-03-06 17:33:18 -05:00
parent 4b3ebc171b
commit e9033989a0
5854 changed files with 248382 additions and 119809 deletions

View File

@ -12,13 +12,11 @@ go_test(
"eviction_manager_test.go",
"helpers_test.go",
],
importpath = "k8s.io/kubernetes/pkg/kubelet/eviction",
library = ":go_default_library",
embed = [":go_default_library"],
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/types:go_default_library",
@ -39,12 +37,41 @@ go_library(
"doc.go",
"eviction_manager.go",
"helpers.go",
"threshold_notifier_unsupported.go",
"types.go",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"@io_bazel_rules_go//go/platform:android": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:darwin": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:dragonfly": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:freebsd": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:linux": [
"threshold_notifier_linux.go",
],
"@io_bazel_rules_go//go/platform:nacl": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:netbsd": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:openbsd": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:plan9": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:solaris": [
"threshold_notifier_unsupported.go",
],
"@io_bazel_rules_go//go/platform:windows": [
"threshold_notifier_unsupported.go",
],
"//conditions:default": [],
}),
importpath = "k8s.io/kubernetes/pkg/kubelet/eviction",
@ -61,7 +88,7 @@ go_library(
"//pkg/kubelet/server/stats:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//plugin/pkg/scheduler/util:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
@ -71,7 +98,7 @@ go_library(
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],

View File

@ -40,6 +40,8 @@ const (
SignalAllocatableMemoryAvailable Signal = "allocatableMemory.available"
// SignalAllocatableNodeFsAvailable is amount of local storage available for pod allocation
SignalAllocatableNodeFsAvailable Signal = "allocatableNodeFs.available"
// SignalPIDAvailable is amount of PID available for pod allocation
SignalPIDAvailable Signal = "pid.available"
)
// ThresholdOperator is the operator used to express a Threshold.

View File

@ -59,7 +59,7 @@ type managerImpl struct {
killPodFunc KillPodFunc
// the interface that knows how to do image gc
imageGC ImageGC
// the interface that knows how to do image gc
// the interface that knows how to do container gc
containerGC ContainerGC
// protects access to internal state
sync.RWMutex
@ -101,7 +101,8 @@ func NewManager(
containerGC ContainerGC,
recorder record.EventRecorder,
nodeRef *v1.ObjectReference,
clock clock.Clock) (Manager, lifecycle.PodAdmitHandler) {
clock clock.Clock,
) (Manager, lifecycle.PodAdmitHandler) {
manager := &managerImpl{
clock: clock,
killPodFunc: killPodFunc,
@ -148,11 +149,11 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
}
// Start starts the control loop to observe and response to low compute resources.
func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, capacityProvider CapacityProvider, monitoringInterval time.Duration) {
func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration) {
// start the eviction manager monitoring
go func() {
for {
if evictedPods := m.synchronize(diskInfoProvider, podFunc, capacityProvider); evictedPods != nil {
if evictedPods := m.synchronize(diskInfoProvider, podFunc); evictedPods != nil {
glog.Infof("eviction manager: pods %s evicted, waiting for pod to be cleaned up", format.Pods(evictedPods))
m.waitForPodsCleanup(podCleanedUpFunc, evictedPods)
} else {
@ -176,6 +177,13 @@ func (m *managerImpl) IsUnderDiskPressure() bool {
return hasNodeCondition(m.nodeConditions, v1.NodeDiskPressure)
}
// IsUnderPIDPressure returns true if the node is under PID pressure.
func (m *managerImpl) IsUnderPIDPressure() bool {
m.RLock()
defer m.RUnlock()
return hasNodeCondition(m.nodeConditions, v1.NodePIDPressure)
}
func startMemoryThresholdNotifier(thresholds []evictionapi.Threshold, observations signalObservations, hard bool, handler thresholdNotifierHandlerFunc) error {
for _, threshold := range thresholds {
if threshold.Signal != evictionapi.SignalMemoryAvailable || hard != isHardEvictionThreshold(threshold) {
@ -211,10 +219,10 @@ func startMemoryThresholdNotifier(thresholds []evictionapi.Threshold, observatio
// synchronize is the main control loop that enforces eviction thresholds.
// Returns the pod that was killed, or nil if no pod was killed.
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, capacityProvider CapacityProvider) []*v1.Pod {
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
// if we have nothing to do, just return
thresholds := m.config.Thresholds
if len(thresholds) == 0 {
if len(thresholds) == 0 && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
return nil
}
@ -232,12 +240,15 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
}
activePods := podFunc()
// make observations and get a function to derive pod usage stats relative to those observations.
observations, statsFunc, err := makeSignalObservations(m.summaryProvider, capacityProvider, activePods)
updateStats := true
summary, err := m.summaryProvider.Get(updateStats)
if err != nil {
glog.Errorf("eviction manager: unexpected err: %v", err)
glog.Errorf("eviction manager: failed to get get summary stats: %v", err)
return nil
}
// make observations and get a function to derive pod usage stats relative to those observations.
observations, statsFunc := makeSignalObservations(summary)
debugLogObservations("observations", observations)
// attempt to create a threshold notifier to improve eviction response time
@ -248,18 +259,18 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, false, func(desc string) {
glog.Infof("soft memory eviction threshold crossed at %s", desc)
// TODO wait grace period for soft memory limit
m.synchronize(diskInfoProvider, podFunc, capacityProvider)
m.synchronize(diskInfoProvider, podFunc)
})
if err != nil {
glog.Warningf("eviction manager: failed to create hard memory threshold notifier: %v", err)
glog.Warningf("eviction manager: failed to create soft memory threshold notifier: %v", err)
}
// start hard memory notification
err = startMemoryThresholdNotifier(m.config.Thresholds, observations, true, func(desc string) {
glog.Infof("hard memory eviction threshold crossed at %s", desc)
m.synchronize(diskInfoProvider, podFunc, capacityProvider)
m.synchronize(diskInfoProvider, podFunc)
})
if err != nil {
glog.Warningf("eviction manager: failed to create soft memory threshold notifier: %v", err)
glog.Warningf("eviction manager: failed to create hard memory threshold notifier: %v", err)
}
}
@ -312,9 +323,9 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
m.Unlock()
// evict pods if there is a resource usage violation from local volume temporary storage
// If eviction happens in localVolumeEviction function, skip the rest of eviction action
// If eviction happens in localStorageEviction function, skip the rest of eviction action
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
if evictedPods := m.localStorageEviction(activePods); len(evictedPods) > 0 {
if evictedPods := m.localStorageEviction(summary, activePods); len(evictedPods) > 0 {
return evictedPods
}
}
@ -338,7 +349,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
m.recorder.Eventf(m.nodeRef, v1.EventTypeWarning, "EvictionThresholdMet", "Attempting to reclaim %s", resourceToReclaim)
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
if m.reclaimNodeLevelResources(resourceToReclaim, observations) {
if m.reclaimNodeLevelResources(resourceToReclaim) {
glog.Infof("eviction manager: able to reduce %v pressure without evicting pods.", resourceToReclaim)
return nil
}
@ -426,26 +437,31 @@ func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods
}
// reclaimNodeLevelResources attempts to reclaim node level resources. returns true if thresholds were satisfied and no pod eviction is required.
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName, observations signalObservations) bool {
func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceName) bool {
nodeReclaimFuncs := m.resourceToNodeReclaimFuncs[resourceToReclaim]
for _, nodeReclaimFunc := range nodeReclaimFuncs {
// attempt to reclaim the pressured resource.
reclaimed, err := nodeReclaimFunc()
if err != nil {
if err := nodeReclaimFunc(); err != nil {
glog.Warningf("eviction manager: unexpected error when attempting to reduce %v pressure: %v", resourceToReclaim, err)
}
// update our local observations based on the amount reported to have been reclaimed.
// note: this is optimistic, other things could have been still consuming the pressured resource in the interim.
for _, signal := range resourceClaimToSignal[resourceToReclaim] {
value, ok := observations[signal]
if !ok {
glog.Errorf("eviction manager: unable to find value associated with signal %v", signal)
continue
}
value.available.Add(*reclaimed)
}
if len(nodeReclaimFuncs) > 0 {
summary, err := m.summaryProvider.Get(true)
if err != nil {
glog.Errorf("eviction manager: failed to get get summary stats after resource reclaim: %v", err)
return false
}
// evaluate all current thresholds to see if with adjusted observations, we think we have met min reclaim goals
if len(thresholdsMet(m.thresholdsMet, observations, true)) == 0 {
// make observations and get a function to derive pod usage stats relative to those observations.
observations, _ := makeSignalObservations(summary)
debugLogObservations("observations after resource reclaim", observations)
// determine the set of thresholds met independent of grace period
thresholds := thresholdsMet(m.config.Thresholds, observations, false)
debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations)
if len(thresholds) == 0 {
return true
}
}
@ -454,13 +470,7 @@ func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceNam
// localStorageEviction checks the EmptyDir volume usage for each pod and determine whether it exceeds the specified limit and needs
// to be evicted. It also checks every container in the pod, if the container overlay usage exceeds the limit, the pod will be evicted too.
func (m *managerImpl) localStorageEviction(pods []*v1.Pod) []*v1.Pod {
summary, err := m.summaryProvider.Get()
if err != nil {
glog.Errorf("Could not get summary provider")
return nil
}
func (m *managerImpl) localStorageEviction(summary *statsapi.Summary, pods []*v1.Pod) []*v1.Pod {
statsFunc := cachedStatsFunc(summary.Pods)
evicted := []*v1.Pod{}
for _, pod := range pods {

View File

@ -65,44 +65,30 @@ func (m *mockDiskInfoProvider) HasDedicatedImageFs() (bool, error) {
return m.dedicatedImageFs, nil
}
func newMockCapacityProvider(capacity, reservation v1.ResourceList) *mockCapacityProvider {
return &mockCapacityProvider{
capacity: capacity,
reservation: reservation,
}
}
type mockCapacityProvider struct {
capacity v1.ResourceList
reservation v1.ResourceList
}
func (m *mockCapacityProvider) GetCapacity() v1.ResourceList {
return m.capacity
}
func (m *mockCapacityProvider) GetNodeAllocatableReservation() v1.ResourceList {
return m.reservation
}
// mockDiskGC is used to simulate invoking image and container garbage collection.
type mockDiskGC struct {
err error
imageBytesFreed int64
imageGCInvoked bool
containerGCInvoked bool
err error
imageGCInvoked bool
containerGCInvoked bool
fakeSummaryProvider *fakeSummaryProvider
summaryAfterGC *statsapi.Summary
}
// DeleteUnusedImages returns the mocked values.
func (m *mockDiskGC) DeleteUnusedImages() (int64, error) {
func (m *mockDiskGC) DeleteUnusedImages() error {
m.imageGCInvoked = true
return m.imageBytesFreed, m.err
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
m.fakeSummaryProvider.result = m.summaryAfterGC
}
return m.err
}
// DeleteAllUnusedContainers returns the mocked value
func (m *mockDiskGC) DeleteAllUnusedContainers() error {
m.containerGCInvoked = true
if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
m.fakeSummaryProvider.result = m.summaryAfterGC
}
return m.err
}
@ -132,6 +118,15 @@ func makeMemoryStats(nodeAvailableBytes string, podStats map[*v1.Pod]statsapi.Po
AvailableBytes: &availableBytes,
WorkingSetBytes: &WorkingSetBytes,
},
SystemContainers: []statsapi.ContainerStats{
{
Name: statsapi.SystemContainerPods,
Memory: &statsapi.MemoryStats{
AvailableBytes: &availableBytes,
WorkingSetBytes: &WorkingSetBytes,
},
},
},
},
Pods: []statsapi.PodStats{},
}
@ -210,8 +205,7 @@ func TestMemoryPressure(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
imageGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -239,7 +233,8 @@ func TestMemoryPressure(t *testing.T) {
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: imageGC,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
@ -253,7 +248,7 @@ func TestMemoryPressure(t *testing.T) {
burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure
if manager.IsUnderMemoryPressure() {
@ -271,7 +266,7 @@ func TestMemoryPressure(t *testing.T) {
// induce soft threshold
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -286,7 +281,7 @@ func TestMemoryPressure(t *testing.T) {
// step forward in time pass the grace period
fakeClock.Step(3 * time.Minute)
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -311,7 +306,7 @@ func TestMemoryPressure(t *testing.T) {
// remove memory pressure
fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("3Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure
if manager.IsUnderMemoryPressure() {
@ -321,7 +316,7 @@ func TestMemoryPressure(t *testing.T) {
// induce memory pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -349,7 +344,7 @@ func TestMemoryPressure(t *testing.T) {
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure (because transition period not yet met)
if !manager.IsUnderMemoryPressure() {
@ -373,7 +368,7 @@ func TestMemoryPressure(t *testing.T) {
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure (because transition period met)
if manager.IsUnderMemoryPressure() {
@ -431,8 +426,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -474,7 +468,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi", "0Gi", "0Gi")
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -489,7 +483,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
// induce soft threshold
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("1.5Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -504,7 +498,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
// step forward in time pass the grace period
fakeClock.Step(3 * time.Minute)
summaryProvider.result = summaryStatsMaker("1.5Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -529,7 +523,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
// remove disk pressure
fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -539,7 +533,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
// induce disk pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("500Mi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -564,7 +558,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure (because transition period not yet met)
if !manager.IsUnderDiskPressure() {
@ -585,7 +579,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure (because transition period met)
if manager.IsUnderDiskPressure() {
@ -630,8 +624,7 @@ func TestMinReclaim(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -665,7 +658,7 @@ func TestMinReclaim(t *testing.T) {
}
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure
if manager.IsUnderMemoryPressure() {
@ -675,7 +668,7 @@ func TestMinReclaim(t *testing.T) {
// induce memory pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -695,7 +688,7 @@ func TestMinReclaim(t *testing.T) {
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("1.2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure (because transition period not yet met)
if !manager.IsUnderMemoryPressure() {
@ -715,7 +708,7 @@ func TestMinReclaim(t *testing.T) {
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure (because transition period not yet met)
if !manager.IsUnderMemoryPressure() {
@ -731,7 +724,7 @@ func TestMinReclaim(t *testing.T) {
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure (because transition period met)
if manager.IsUnderMemoryPressure() {
@ -773,9 +766,6 @@ func TestNodeReclaimFuncs(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
imageGcFree := resource.MustParse("700Mi")
diskGC := &mockDiskGC{imageBytesFreed: imageGcFree.Value(), err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -795,6 +785,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
},
}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("16Gi", "200Gi", podStats)}
diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
@ -809,7 +800,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
}
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -819,7 +810,9 @@ func TestNodeReclaimFuncs(t *testing.T) {
// induce hard threshold
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// make GC successfully return disk usage to previous levels
diskGC.summaryAfterGC = summaryStatsMaker("16Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -843,7 +836,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
// remove disk pressure
fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -853,7 +846,9 @@ func TestNodeReclaimFuncs(t *testing.T) {
// induce disk pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
// Dont reclaim any disk
diskGC.summaryAfterGC = summaryStatsMaker("400Mi", "200Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -880,7 +875,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
diskGC.imageGCInvoked = false // reset state
diskGC.containerGCInvoked = false // reset state
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure (because transition period not yet met)
if !manager.IsUnderDiskPressure() {
@ -903,7 +898,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
diskGC.imageGCInvoked = false // reset state
diskGC.containerGCInvoked = false // reset state
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure (because transition period met)
if manager.IsUnderDiskPressure() {
@ -971,8 +966,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -1014,7 +1008,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0", "0", "0")
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -1029,7 +1023,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
// induce soft threshold
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("1.5Mi", "4Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -1044,7 +1038,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
// step forward in time pass the grace period
fakeClock.Step(3 * time.Minute)
summaryProvider.result = summaryStatsMaker("1.5Mi", "4Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -1069,7 +1063,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
// remove inode pressure
fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure
if manager.IsUnderDiskPressure() {
@ -1079,7 +1073,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
// induce inode pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("0.5Mi", "4Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure
if !manager.IsUnderDiskPressure() {
@ -1104,7 +1098,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have disk pressure (because transition period not yet met)
if !manager.IsUnderDiskPressure() {
@ -1125,7 +1119,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker("3Mi", "4Mi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have disk pressure (because transition period met)
if manager.IsUnderDiskPressure() {
@ -1174,8 +1168,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{
Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
}
@ -1220,7 +1213,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
// induce soft threshold
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -1235,7 +1228,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
// step forward in time pass the grace period
fakeClock.Step(3 * time.Minute)
summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -1253,7 +1246,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
// remove memory pressure
fakeClock.Step(20 * time.Minute)
summaryProvider.result = summaryStatsMaker("3Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure
if manager.IsUnderMemoryPressure() {
@ -1266,7 +1259,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
// induce memory pressure!
fakeClock.Step(1 * time.Minute)
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -1284,7 +1277,6 @@ func TestAllocatableMemoryPressure(t *testing.T) {
utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{string(features.PodPriority): true})
podMaker := makePodWithMemoryStats
summaryStatsMaker := makeMemoryStats
constantCapacity := "4Gi"
podsToMake := []podToMake{
{name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
{name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
@ -1307,8 +1299,7 @@ func TestAllocatableMemoryPressure(t *testing.T) {
fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("3Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("1Gi")})
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
config := Config{
@ -1319,12 +1310,12 @@ func TestAllocatableMemoryPressure(t *testing.T) {
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Ki"),
Quantity: quantityMustParse("1Gi"),
},
},
},
}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(constantCapacity, podStats)}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("4Gi", podStats)}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
@ -1343,7 +1334,7 @@ func TestAllocatableMemoryPressure(t *testing.T) {
burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure
if manager.IsUnderMemoryPressure() {
@ -1362,8 +1353,8 @@ func TestAllocatableMemoryPressure(t *testing.T) {
fakeClock.Step(1 * time.Minute)
pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newResourceList("100m", "1Gi", ""), newResourceList("100m", "1Gi", ""), "1Gi")
podStats[pod] = podStat
summaryProvider.result = summaryStatsMaker(constantCapacity, podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
summaryProvider.result = summaryStatsMaker("500Mi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure
if !manager.IsUnderMemoryPressure() {
@ -1397,9 +1388,9 @@ func TestAllocatableMemoryPressure(t *testing.T) {
delete(podStats, pod)
}
}
summaryProvider.result = summaryStatsMaker(constantCapacity, podStats)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should have memory pressure (because transition period not yet met)
if !manager.IsUnderMemoryPressure() {
@ -1421,9 +1412,9 @@ func TestAllocatableMemoryPressure(t *testing.T) {
// move the clock past transition period to ensure that we stop reporting pressure
fakeClock.Step(5 * time.Minute)
summaryProvider.result = summaryStatsMaker(constantCapacity, podStats)
summaryProvider.result = summaryStatsMaker("2Gi", podStats)
podKiller.pod = nil // reset state
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)
manager.synchronize(diskInfoProvider, activePodsFunc)
// we should not have memory pressure (because transition period met)
if manager.IsUnderMemoryPressure() {

View File

@ -29,10 +29,9 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/cm"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/server/stats"
schedulerutils "k8s.io/kubernetes/plugin/pkg/scheduler/util"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
schedulerutils "k8s.io/kubernetes/pkg/scheduler/util"
)
const (
@ -73,6 +72,7 @@ func init() {
signalToNodeCondition[evictionapi.SignalNodeFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalImageFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalPIDAvailable] = v1.NodePIDPressure
// map signals to resources (and vice-versa)
signalToResource = map[evictionapi.Signal]v1.ResourceName{}
@ -100,15 +100,11 @@ func validSignal(signal evictionapi.Signal) bool {
// ParseThresholdConfig parses the flags for thresholds.
func ParseThresholdConfig(allocatableConfig []string, evictionHard, evictionSoft, evictionSoftGracePeriod, evictionMinimumReclaim map[string]string) ([]evictionapi.Threshold, error) {
results := []evictionapi.Threshold{}
allocatableThresholds := getAllocatableThreshold(allocatableConfig)
results = append(results, allocatableThresholds...)
hardThresholds, err := parseThresholdStatements(evictionHard)
if err != nil {
return nil, err
}
results = append(results, hardThresholds...)
softThresholds, err := parseThresholdStatements(evictionSoft)
if err != nil {
return nil, err
@ -138,9 +134,31 @@ func ParseThresholdConfig(allocatableConfig []string, evictionHard, evictionSoft
}
}
}
for _, key := range allocatableConfig {
if key == kubetypes.NodeAllocatableEnforcementKey {
results = addAllocatableThresholds(results)
break
}
}
return results, nil
}
func addAllocatableThresholds(thresholds []evictionapi.Threshold) []evictionapi.Threshold {
additionalThresholds := []evictionapi.Threshold{}
for _, threshold := range thresholds {
if threshold.Signal == evictionapi.SignalMemoryAvailable && isHardEvictionThreshold(threshold) {
// Copy the SignalMemoryAvailable to SignalAllocatableMemoryAvailable
additionalThresholds = append(additionalThresholds, evictionapi.Threshold{
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: threshold.Operator,
Value: threshold.Value,
MinReclaim: threshold.MinReclaim,
})
}
}
return append(thresholds, additionalThresholds...)
}
// parseThresholdStatements parses the input statements into a list of Threshold objects.
func parseThresholdStatements(statements map[string]string) ([]evictionapi.Threshold, error) {
if len(statements) == 0 {
@ -152,26 +170,36 @@ func parseThresholdStatements(statements map[string]string) ([]evictionapi.Thres
if err != nil {
return nil, err
}
results = append(results, result)
if result != nil {
results = append(results, *result)
}
}
return results, nil
}
// parseThresholdStatement parses a threshold statement.
func parseThresholdStatement(signal evictionapi.Signal, val string) (evictionapi.Threshold, error) {
// parseThresholdStatement parses a threshold statement and returns a threshold,
// or nil if the threshold should be ignored.
func parseThresholdStatement(signal evictionapi.Signal, val string) (*evictionapi.Threshold, error) {
if !validSignal(signal) {
return evictionapi.Threshold{}, fmt.Errorf(unsupportedEvictionSignal, signal)
return nil, fmt.Errorf(unsupportedEvictionSignal, signal)
}
operator := evictionapi.OpForSignal[signal]
if strings.HasSuffix(val, "%") {
// ignore 0% and 100%
if val == "0%" || val == "100%" {
return nil, nil
}
percentage, err := parsePercentage(val)
if err != nil {
return evictionapi.Threshold{}, err
return nil, err
}
if percentage <= 0 {
return evictionapi.Threshold{}, fmt.Errorf("eviction percentage threshold %v must be positive: %s", signal, val)
if percentage < 0 {
return nil, fmt.Errorf("eviction percentage threshold %v must be >= 0%%: %s", signal, val)
}
return evictionapi.Threshold{
if percentage > 100 {
return nil, fmt.Errorf("eviction percentage threshold %v must be <= 100%%: %s", signal, val)
}
return &evictionapi.Threshold{
Signal: signal,
Operator: operator,
Value: evictionapi.ThresholdValue{
@ -181,12 +209,12 @@ func parseThresholdStatement(signal evictionapi.Signal, val string) (evictionapi
}
quantity, err := resource.ParseQuantity(val)
if err != nil {
return evictionapi.Threshold{}, err
return nil, err
}
if quantity.Sign() < 0 || quantity.IsZero() {
return evictionapi.Threshold{}, fmt.Errorf("eviction threshold %v must be positive: %s", signal, &quantity)
return nil, fmt.Errorf("eviction threshold %v must be positive: %s", signal, &quantity)
}
return evictionapi.Threshold{
return &evictionapi.Threshold{
Signal: signal,
Operator: operator,
Value: evictionapi.ThresholdValue{
@ -195,27 +223,6 @@ func parseThresholdStatement(signal evictionapi.Signal, val string) (evictionapi
}, nil
}
// getAllocatableThreshold returns the thresholds applicable for the allocatable configuration
func getAllocatableThreshold(allocatableConfig []string) []evictionapi.Threshold {
for _, key := range allocatableConfig {
if key == cm.NodeAllocatableEnforcementKey {
return []evictionapi.Threshold{
{
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(int64(0), resource.BinarySI),
},
MinReclaim: &evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(int64(0), resource.BinarySI),
},
},
}
}
}
return []evictionapi.Threshold{}
}
// parsePercentage parses a string representing a percentage value
func parsePercentage(input string) (float32, error) {
value, err := strconv.ParseFloat(strings.TrimRight(input, "%"), 32)
@ -590,7 +597,7 @@ func memory(stats statsFunc) cmpFunc {
}
// podRequest returns the total resource request of a pod which is the
// max(sum of init container requests, sum of container requests)
// max(max of init container requests, sum of container requests)
func podRequest(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity {
containerValue := resource.Quantity{Format: resource.BinarySI}
if resourceName == resourceDisk && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
@ -609,9 +616,13 @@ func podRequest(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity {
for i := range pod.Spec.InitContainers {
switch resourceName {
case v1.ResourceMemory:
containerValue.Add(*pod.Spec.Containers[i].Resources.Requests.Memory())
if initValue.Cmp(*pod.Spec.InitContainers[i].Resources.Requests.Memory()) < 0 {
initValue = *pod.Spec.InitContainers[i].Resources.Requests.Memory()
}
case resourceDisk:
containerValue.Add(*pod.Spec.Containers[i].Resources.Requests.StorageEphemeral())
if initValue.Cmp(*pod.Spec.InitContainers[i].Resources.Requests.StorageEphemeral()) < 0 {
initValue = *pod.Spec.InitContainers[i].Resources.Requests.StorageEphemeral()
}
}
}
if containerValue.Cmp(initValue) > 0 {
@ -711,11 +722,7 @@ func (a byEvictionPriority) Less(i, j int) bool {
}
// makeSignalObservations derives observations using the specified summary provider.
func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvider CapacityProvider, pods []*v1.Pod) (signalObservations, statsFunc, error) {
summary, err := summaryProvider.Get()
if err != nil {
return nil, nil, err
}
func makeSignalObservations(summary *statsapi.Summary) (signalObservations, statsFunc) {
// build the function to work against for pod stats
statsFunc := cachedStatsFunc(summary.Pods)
// build an evaluation context for current eviction signals
@ -728,6 +735,17 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvi
time: memory.Time,
}
}
if allocatableContainer, err := getSysContainer(summary.Node.SystemContainers, statsapi.SystemContainerPods); err != nil {
glog.Errorf("eviction manager: failed to construct signal: %q error: %v", evictionapi.SignalAllocatableMemoryAvailable, err)
} else {
if memory := allocatableContainer.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil {
result[evictionapi.SignalAllocatableMemoryAvailable] = signalObservation{
available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI),
capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI),
time: memory.Time,
}
}
}
if nodeFs := summary.Node.Fs; nodeFs != nil {
if nodeFs.AvailableBytes != nil && nodeFs.CapacityBytes != nil {
result[evictionapi.SignalNodeFsAvailable] = signalObservation{
@ -762,27 +780,26 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider, capacityProvi
}
}
}
if memoryAllocatableCapacity, ok := capacityProvider.GetCapacity()[v1.ResourceMemory]; ok {
memoryAllocatableAvailable := memoryAllocatableCapacity.Copy()
if reserved, exists := capacityProvider.GetNodeAllocatableReservation()[v1.ResourceMemory]; exists {
memoryAllocatableAvailable.Sub(reserved)
}
for _, pod := range summary.Pods {
mu, err := podMemoryUsage(pod)
if err == nil {
memoryAllocatableAvailable.Sub(mu[v1.ResourceMemory])
if rlimit := summary.Node.Rlimit; rlimit != nil {
if rlimit.NumOfRunningProcesses != nil && rlimit.MaxPID != nil {
available := int64(*rlimit.MaxPID) - int64(*rlimit.NumOfRunningProcesses)
result[evictionapi.SignalPIDAvailable] = signalObservation{
available: resource.NewQuantity(available, resource.BinarySI),
capacity: resource.NewQuantity(int64(*rlimit.MaxPID), resource.BinarySI),
time: rlimit.Time,
}
}
result[evictionapi.SignalAllocatableMemoryAvailable] = signalObservation{
available: memoryAllocatableAvailable,
capacity: &memoryAllocatableCapacity,
}
} else {
glog.Errorf("Could not find capacity information for resource %v", v1.ResourceMemory)
}
return result, statsFunc
}
return result, statsFunc, nil
func getSysContainer(sysContainers []statsapi.ContainerStats, name string) (*statsapi.ContainerStats, error) {
for _, cont := range sysContainers {
if cont.Name == name {
return &cont, nil
}
}
return nil, fmt.Errorf("system container %q not found in metrics", name)
}
// thresholdsMet returns the set of thresholds that were met independent of grace period
@ -1051,38 +1068,15 @@ func buildResourceToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, w
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{}
// with an imagefs, imagefs pressure should delete unused images
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
} else {
// without an imagefs, nodefs pressure should delete logs, and unused images
// since imagefs and nodefs share a common device, they share common reclaim functions
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, true)}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{deleteTerminatedContainers(containerGC), deleteImages(imageGC, false)}
resourceToReclaimFunc[resourceNodeFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceNodeFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceImageFs] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
resourceToReclaimFunc[resourceImageFsInodes] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
}
return resourceToReclaimFunc
}
// deleteTerminatedContainers will delete terminated containers to free up disk pressure.
func deleteTerminatedContainers(containerGC ContainerGC) nodeReclaimFunc {
return func() (*resource.Quantity, error) {
glog.Infof("eviction manager: attempting to delete unused containers")
err := containerGC.DeleteAllUnusedContainers()
// Calculating bytes freed is not yet supported.
return resource.NewQuantity(int64(0), resource.BinarySI), err
}
}
// deleteImages will delete unused images to free up disk pressure.
func deleteImages(imageGC ImageGC, reportBytesFreed bool) nodeReclaimFunc {
return func() (*resource.Quantity, error) {
glog.Infof("eviction manager: attempting to delete unused images")
bytesFreed, err := imageGC.DeleteUnusedImages()
reclaimed := int64(0)
if reportBytesFreed {
reclaimed = bytesFreed
}
return resource.NewQuantity(reclaimed, resource.BinarySI), err
}
}

View File

@ -30,8 +30,8 @@ import (
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/features"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/cm"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/quota"
)
@ -61,7 +61,7 @@ func TestParseThresholdConfig(t *testing.T) {
expectThresholds: []evictionapi.Threshold{},
},
"all memory eviction values": {
allocatableConfig: []string{cm.NodeAllocatableEnforcementKey},
allocatableConfig: []string{kubetypes.NodeAllocatableEnforcementKey},
evictionHard: map[string]string{"memory.available": "150Mi"},
evictionSoft: map[string]string{"memory.available": "300Mi"},
evictionSoftGracePeriod: map[string]string{"memory.available": "30s"},
@ -72,7 +72,7 @@ func TestParseThresholdConfig(t *testing.T) {
Signal: evictionapi.SignalAllocatableMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
Quantity: quantityMustParse("150Mi"),
},
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
@ -288,6 +288,20 @@ func TestParseThresholdConfig(t *testing.T) {
},
},
},
"disable via 0%": {
allocatableConfig: []string{},
evictionHard: map[string]string{"memory.available": "0%"},
evictionSoft: map[string]string{"memory.available": "0%"},
expectErr: false,
expectThresholds: []evictionapi.Threshold{},
},
"disable via 100%": {
allocatableConfig: []string{},
evictionHard: map[string]string{"memory.available": "100%"},
evictionSoft: map[string]string{"memory.available": "100%"},
expectErr: false,
expectThresholds: []evictionapi.Threshold{},
},
"invalid-signal": {
allocatableConfig: []string{},
evictionHard: map[string]string{"mem.available": "150Mi"},
@ -920,7 +934,7 @@ type fakeSummaryProvider struct {
result *statsapi.Summary
}
func (f *fakeSummaryProvider) Get() (*statsapi.Summary, error) {
func (f *fakeSummaryProvider) Get(updateStats bool) (*statsapi.Summary, error) {
return f.result, nil
}
@ -990,12 +1004,18 @@ func TestMakeSignalObservations(t *testing.T) {
InodesFree: &nodeFsInodesFree,
Inodes: &nodeFsInodes,
},
SystemContainers: []statsapi.ContainerStats{
{
Name: statsapi.SystemContainerPods,
Memory: &statsapi.MemoryStats{
AvailableBytes: &nodeAvailableBytes,
WorkingSetBytes: &nodeWorkingSetBytes,
},
},
},
},
Pods: []statsapi.PodStats{},
}
provider := &fakeSummaryProvider{
result: fakeStats,
}
pods := []*v1.Pod{
podMaker("pod1", "ns1", "uuid1", 1),
podMaker("pod1", "ns2", "uuid2", 1),
@ -1006,28 +1026,24 @@ func TestMakeSignalObservations(t *testing.T) {
fakeStats.Pods = append(fakeStats.Pods, newPodStats(pod, containerWorkingSetBytes))
}
res := quantityMustParse("5Gi")
capacityProvider := newMockCapacityProvider(v1.ResourceList{v1.ResourceMemory: *quantityMustParse("5Gi")}, v1.ResourceList{v1.ResourceMemory: *quantityMustParse("0Gi")})
// Allocatable thresholds are always 100%. Verify that Threshold == Capacity.
if res.CmpInt64(int64(allocatableMemoryCapacity)) != 0 {
t.Errorf("Expected Threshold %v to be equal to value %v", res.Value(), allocatableMemoryCapacity)
}
actualObservations, statsFunc, err := makeSignalObservations(provider, capacityProvider, pods)
if err != nil {
t.Errorf("Unexpected err: %v", err)
}
actualObservations, statsFunc := makeSignalObservations(fakeStats)
allocatableMemQuantity, found := actualObservations[evictionapi.SignalAllocatableMemoryAvailable]
if !found {
t.Errorf("Expected allocatable memory observation, but didnt find one")
}
if allocatableMemQuantity.available.Value() != 2*containerWorkingSetBytes {
t.Errorf("Expected %v, actual: %v", containerWorkingSetBytes, allocatableMemQuantity.available.Value())
if expectedBytes := int64(nodeAvailableBytes); allocatableMemQuantity.available.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, allocatableMemQuantity.available.Value())
}
if expectedBytes := int64(allocatableMemoryCapacity); allocatableMemQuantity.capacity.Value() != expectedBytes {
if expectedBytes := int64(nodeWorkingSetBytes + nodeAvailableBytes); allocatableMemQuantity.capacity.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, allocatableMemQuantity.capacity.Value())
}
memQuantity, found := actualObservations[evictionapi.SignalMemoryAvailable]
if !found {
t.Errorf("Expected available memory observation: %v", err)
t.Error("Expected available memory observation")
}
if expectedBytes := int64(nodeAvailableBytes); memQuantity.available.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, memQuantity.available.Value())
@ -1037,7 +1053,7 @@ func TestMakeSignalObservations(t *testing.T) {
}
nodeFsQuantity, found := actualObservations[evictionapi.SignalNodeFsAvailable]
if !found {
t.Errorf("Expected available nodefs observation: %v", err)
t.Error("Expected available nodefs observation")
}
if expectedBytes := int64(nodeFsAvailableBytes); nodeFsQuantity.available.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, nodeFsQuantity.available.Value())
@ -1047,7 +1063,7 @@ func TestMakeSignalObservations(t *testing.T) {
}
nodeFsInodesQuantity, found := actualObservations[evictionapi.SignalNodeFsInodesFree]
if !found {
t.Errorf("Expected inodes free nodefs observation: %v", err)
t.Error("Expected inodes free nodefs observation")
}
if expected := int64(nodeFsInodesFree); nodeFsInodesQuantity.available.Value() != expected {
t.Errorf("Expected %v, actual: %v", expected, nodeFsInodesQuantity.available.Value())
@ -1057,7 +1073,7 @@ func TestMakeSignalObservations(t *testing.T) {
}
imageFsQuantity, found := actualObservations[evictionapi.SignalImageFsAvailable]
if !found {
t.Errorf("Expected available imagefs observation: %v", err)
t.Error("Expected available imagefs observation")
}
if expectedBytes := int64(imageFsAvailableBytes); imageFsQuantity.available.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, imageFsQuantity.available.Value())
@ -1067,7 +1083,7 @@ func TestMakeSignalObservations(t *testing.T) {
}
imageFsInodesQuantity, found := actualObservations[evictionapi.SignalImageFsInodesFree]
if !found {
t.Errorf("Expected inodes free imagefs observation: %v", err)
t.Error("Expected inodes free imagefs observation")
}
if expected := int64(imageFsInodesFree); imageFsInodesQuantity.available.Value() != expected {
t.Errorf("Expected %v, actual: %v", expected, imageFsInodesQuantity.available.Value())
@ -1645,7 +1661,7 @@ func TestGetStarvedResources(t *testing.T) {
}
}
func testParsePercentage(t *testing.T) {
func TestParsePercentage(t *testing.T) {
testCases := map[string]struct {
hasError bool
value float32
@ -1674,7 +1690,7 @@ func testParsePercentage(t *testing.T) {
}
}
func testCompareThresholdValue(t *testing.T) {
func TestCompareThresholdValue(t *testing.T) {
testCases := []struct {
a, b evictionapi.ThresholdValue
equal bool
@ -1831,20 +1847,6 @@ func newResourceList(cpu, memory, disk string) v1.ResourceList {
return res
}
func newEphemeralStorageResourceList(ephemeral, cpu, memory string) v1.ResourceList {
res := v1.ResourceList{}
if ephemeral != "" {
res[v1.ResourceEphemeralStorage] = resource.MustParse(ephemeral)
}
if cpu != "" {
res[v1.ResourceCPU] = resource.MustParse(cpu)
}
if memory != "" {
res[v1.ResourceMemory] = resource.MustParse("1Mi")
}
return res
}
func newResourceRequirements(requests, limits v1.ResourceList) v1.ResourceRequirements {
res := v1.ResourceRequirements{}
res.Requests = requests

View File

@ -53,13 +53,16 @@ type Config struct {
// Manager evaluates when an eviction threshold for node stability has been met on the node.
type Manager interface {
// Start starts the control loop to monitor eviction thresholds at specified interval.
Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, capacityProvider CapacityProvider, monitoringInterval time.Duration)
Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration)
// IsUnderMemoryPressure returns true if the node is under memory pressure.
IsUnderMemoryPressure() bool
// IsUnderDiskPressure returns true if the node is under disk pressure.
IsUnderDiskPressure() bool
// IsUnderPIDPressure returns true if the node is under PID pressure.
IsUnderPIDPressure() bool
}
// DiskInfoProvider is responsible for informing the manager how disk is configured.
@ -68,25 +71,15 @@ type DiskInfoProvider interface {
HasDedicatedImageFs() (bool, error)
}
// CapacityProvider is responsible for providing the resource capacity and reservation information
type CapacityProvider interface {
// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
GetCapacity() v1.ResourceList
// GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling.
GetNodeAllocatableReservation() v1.ResourceList
}
// ImageGC is responsible for performing garbage collection of unused images.
type ImageGC interface {
// DeleteUnusedImages deletes unused images and returns the number of bytes freed, and an error.
// This returns the bytes freed even if an error is returned.
DeleteUnusedImages() (int64, error)
// DeleteUnusedImages deletes unused images.
DeleteUnusedImages() error
}
// ContainerGC is responsible for performing garbage collection of unused containers.
type ContainerGC interface {
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
// It returns an error if it is unsuccessful.
DeleteAllUnusedContainers() error
}
@ -131,9 +124,7 @@ type thresholdsObservedAt map[evictionapi.Threshold]time.Time
type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time
// nodeReclaimFunc is a function that knows how to reclaim a resource from the node without impacting pods.
// Returns the quantity of resources reclaimed and an error, if applicable.
// nodeReclaimFunc return the resources reclaimed even if an error occurs.
type nodeReclaimFunc func() (*resource.Quantity, error)
type nodeReclaimFunc func() error
// nodeReclaimFuncs is an ordered list of nodeReclaimFunc
type nodeReclaimFuncs []nodeReclaimFunc