mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-13 18:43:34 +00:00
vendor update for E2E framework
Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
This commit is contained in:
78
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/metrics.go
generated
vendored
Normal file
78
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/metrics.go
generated
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodelifecycle
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
nodeControllerSubsystem = "node_collector"
|
||||
zoneHealthStatisticKey = "zone_health"
|
||||
zoneSizeKey = "zone_size"
|
||||
zoneNoUnhealthyNodesKey = "unhealthy_nodes_in_zone"
|
||||
evictionsNumberKey = "evictions_number"
|
||||
)
|
||||
|
||||
var (
|
||||
zoneHealth = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: nodeControllerSubsystem,
|
||||
Name: zoneHealthStatisticKey,
|
||||
Help: "Gauge measuring percentage of healthy nodes per zone.",
|
||||
},
|
||||
[]string{"zone"},
|
||||
)
|
||||
zoneSize = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: nodeControllerSubsystem,
|
||||
Name: zoneSizeKey,
|
||||
Help: "Gauge measuring number of registered Nodes per zones.",
|
||||
},
|
||||
[]string{"zone"},
|
||||
)
|
||||
unhealthyNodes = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: nodeControllerSubsystem,
|
||||
Name: zoneNoUnhealthyNodesKey,
|
||||
Help: "Gauge measuring number of not Ready Nodes per zones.",
|
||||
},
|
||||
[]string{"zone"},
|
||||
)
|
||||
evictionsNumber = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: nodeControllerSubsystem,
|
||||
Name: evictionsNumberKey,
|
||||
Help: "Number of Node evictions that happened since current instance of NodeController started.",
|
||||
},
|
||||
[]string{"zone"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
// Register the metrics that are to be monitored.
|
||||
func Register() {
|
||||
registerMetrics.Do(func() {
|
||||
prometheus.MustRegister(zoneHealth)
|
||||
prometheus.MustRegister(zoneSize)
|
||||
prometheus.MustRegister(unhealthyNodes)
|
||||
prometheus.MustRegister(evictionsNumber)
|
||||
})
|
||||
}
|
1284
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/node_lifecycle_controller.go
generated
vendored
Normal file
1284
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/node_lifecycle_controller.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
309
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go
generated
vendored
Normal file
309
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/rate_limited_queue.go
generated
vendored
Normal file
@ -0,0 +1,309 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/util/flowcontrol"
|
||||
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
const (
|
||||
// NodeHealthUpdateRetry controls the number of retries of writing
|
||||
// node health update.
|
||||
NodeHealthUpdateRetry = 5
|
||||
// NodeEvictionPeriod controls how often NodeController will try to
|
||||
// evict Pods from non-responsive Nodes.
|
||||
NodeEvictionPeriod = 100 * time.Millisecond
|
||||
// EvictionRateLimiterBurst is the burst value for all eviction rate
|
||||
// limiters
|
||||
EvictionRateLimiterBurst = 1
|
||||
)
|
||||
|
||||
// TimedValue is a value that should be processed at a designated time.
|
||||
type TimedValue struct {
|
||||
Value string
|
||||
// UID could be anything that helps identify the value
|
||||
UID interface{}
|
||||
AddedAt time.Time
|
||||
ProcessAt time.Time
|
||||
}
|
||||
|
||||
// now is used to test time
|
||||
var now = time.Now
|
||||
|
||||
// TimedQueue is a priority heap where the lowest ProcessAt is at the front of the queue
|
||||
type TimedQueue []*TimedValue
|
||||
|
||||
// Len is the length of the queue.
|
||||
func (h TimedQueue) Len() int { return len(h) }
|
||||
|
||||
// Less returns true if queue[i] < queue[j].
|
||||
func (h TimedQueue) Less(i, j int) bool { return h[i].ProcessAt.Before(h[j].ProcessAt) }
|
||||
|
||||
// Swap swaps index i and j.
|
||||
func (h TimedQueue) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||||
|
||||
// Push a new TimedValue on to the queue.
|
||||
func (h *TimedQueue) Push(x interface{}) {
|
||||
*h = append(*h, x.(*TimedValue))
|
||||
}
|
||||
|
||||
// Pop the lowest ProcessAt item.
|
||||
func (h *TimedQueue) Pop() interface{} {
|
||||
old := *h
|
||||
n := len(old)
|
||||
x := old[n-1]
|
||||
*h = old[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
// UniqueQueue is a FIFO queue which additionally guarantees that any
|
||||
// element can be added only once until it is removed.
|
||||
type UniqueQueue struct {
|
||||
lock sync.Mutex
|
||||
queue TimedQueue
|
||||
set sets.String
|
||||
}
|
||||
|
||||
// Add a new value to the queue if it wasn't added before, or was
|
||||
// explicitly removed by the Remove call. Returns true if new value
|
||||
// was added.
|
||||
func (q *UniqueQueue) Add(value TimedValue) bool {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
if q.set.Has(value.Value) {
|
||||
return false
|
||||
}
|
||||
heap.Push(&q.queue, &value)
|
||||
q.set.Insert(value.Value)
|
||||
return true
|
||||
}
|
||||
|
||||
// Replace replaces an existing value in the queue if it already
|
||||
// exists, otherwise it does nothing. Returns true if the item was
|
||||
// found.
|
||||
func (q *UniqueQueue) Replace(value TimedValue) bool {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
for i := range q.queue {
|
||||
if q.queue[i].Value != value.Value {
|
||||
continue
|
||||
}
|
||||
heap.Remove(&q.queue, i)
|
||||
heap.Push(&q.queue, &value)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// RemoveFromQueue the value from the queue, but keeps it in the set,
|
||||
// so it won't be added second time. Returns true if something was
|
||||
// removed.
|
||||
func (q *UniqueQueue) RemoveFromQueue(value string) bool {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
if !q.set.Has(value) {
|
||||
return false
|
||||
}
|
||||
for i, val := range q.queue {
|
||||
if val.Value == value {
|
||||
heap.Remove(&q.queue, i)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Remove the value from the queue, so Get() call won't return it, and
|
||||
// allow subsequent addition of the given value. If the value is not
|
||||
// present does nothing and returns false.
|
||||
func (q *UniqueQueue) Remove(value string) bool {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
|
||||
if !q.set.Has(value) {
|
||||
return false
|
||||
}
|
||||
q.set.Delete(value)
|
||||
for i, val := range q.queue {
|
||||
if val.Value == value {
|
||||
heap.Remove(&q.queue, i)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Get returns the oldest added value that wasn't returned yet.
|
||||
func (q *UniqueQueue) Get() (TimedValue, bool) {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
if len(q.queue) == 0 {
|
||||
return TimedValue{}, false
|
||||
}
|
||||
result := heap.Pop(&q.queue).(*TimedValue)
|
||||
q.set.Delete(result.Value)
|
||||
return *result, true
|
||||
}
|
||||
|
||||
// Head returns the oldest added value that wasn't returned yet
|
||||
// without removing it.
|
||||
func (q *UniqueQueue) Head() (TimedValue, bool) {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
if len(q.queue) == 0 {
|
||||
return TimedValue{}, false
|
||||
}
|
||||
result := q.queue[0]
|
||||
return *result, true
|
||||
}
|
||||
|
||||
// Clear removes all items from the queue and duplication preventing
|
||||
// set.
|
||||
func (q *UniqueQueue) Clear() {
|
||||
q.lock.Lock()
|
||||
defer q.lock.Unlock()
|
||||
if q.queue.Len() > 0 {
|
||||
q.queue = make(TimedQueue, 0)
|
||||
}
|
||||
if len(q.set) > 0 {
|
||||
q.set = sets.NewString()
|
||||
}
|
||||
}
|
||||
|
||||
// RateLimitedTimedQueue is a unique item priority queue ordered by
|
||||
// the expected next time of execution. It is also rate limited.
|
||||
type RateLimitedTimedQueue struct {
|
||||
queue UniqueQueue
|
||||
limiterLock sync.Mutex
|
||||
limiter flowcontrol.RateLimiter
|
||||
}
|
||||
|
||||
// NewRateLimitedTimedQueue creates new queue which will use given
|
||||
// RateLimiter to oversee execution.
|
||||
func NewRateLimitedTimedQueue(limiter flowcontrol.RateLimiter) *RateLimitedTimedQueue {
|
||||
return &RateLimitedTimedQueue{
|
||||
queue: UniqueQueue{
|
||||
queue: TimedQueue{},
|
||||
set: sets.NewString(),
|
||||
},
|
||||
limiter: limiter,
|
||||
}
|
||||
}
|
||||
|
||||
// ActionFunc takes a timed value and returns false if the item must
|
||||
// be retried, with an optional time.Duration if some minimum wait
|
||||
// interval should be used.
|
||||
type ActionFunc func(TimedValue) (bool, time.Duration)
|
||||
|
||||
// Try processes the queue.Ends prematurely if RateLimiter forbids an
|
||||
// action and leak is true. Otherwise, requeues the item to be
|
||||
// processed. Each value is processed once if fn returns true,
|
||||
// otherwise it is added back to the queue. The returned remaining is
|
||||
// used to identify the minimum time to execute the next item in the
|
||||
// queue. The same value is processed only once unless Remove is
|
||||
// explicitly called on it (it's done by the cancelPodEviction
|
||||
// function in NodeController when Node becomes Ready again) TODO:
|
||||
// figure out a good way to do garbage collection for all Nodes that
|
||||
// were removed from the cluster.
|
||||
func (q *RateLimitedTimedQueue) Try(fn ActionFunc) {
|
||||
val, ok := q.queue.Head()
|
||||
q.limiterLock.Lock()
|
||||
defer q.limiterLock.Unlock()
|
||||
for ok {
|
||||
// rate limit the queue checking
|
||||
if !q.limiter.TryAccept() {
|
||||
klog.V(10).Infof("Try rate limited for value: %v", val)
|
||||
// Try again later
|
||||
break
|
||||
}
|
||||
|
||||
now := now()
|
||||
if now.Before(val.ProcessAt) {
|
||||
break
|
||||
}
|
||||
|
||||
if ok, wait := fn(val); !ok {
|
||||
val.ProcessAt = now.Add(wait + 1)
|
||||
q.queue.Replace(val)
|
||||
} else {
|
||||
q.queue.RemoveFromQueue(val.Value)
|
||||
}
|
||||
val, ok = q.queue.Head()
|
||||
}
|
||||
}
|
||||
|
||||
// Add value to the queue to be processed. Won't add the same
|
||||
// value(comparison by value) a second time if it was already added
|
||||
// and not removed.
|
||||
func (q *RateLimitedTimedQueue) Add(value string, uid interface{}) bool {
|
||||
now := now()
|
||||
return q.queue.Add(TimedValue{
|
||||
Value: value,
|
||||
UID: uid,
|
||||
AddedAt: now,
|
||||
ProcessAt: now,
|
||||
})
|
||||
}
|
||||
|
||||
// Remove Node from the Evictor. The Node won't be processed until
|
||||
// added again.
|
||||
func (q *RateLimitedTimedQueue) Remove(value string) bool {
|
||||
return q.queue.Remove(value)
|
||||
}
|
||||
|
||||
// Clear removes all items from the queue
|
||||
func (q *RateLimitedTimedQueue) Clear() {
|
||||
q.queue.Clear()
|
||||
}
|
||||
|
||||
// SwapLimiter safely swaps current limiter for this queue with the
|
||||
// passed one if capacities or qps's differ.
|
||||
func (q *RateLimitedTimedQueue) SwapLimiter(newQPS float32) {
|
||||
q.limiterLock.Lock()
|
||||
defer q.limiterLock.Unlock()
|
||||
if q.limiter.QPS() == newQPS {
|
||||
return
|
||||
}
|
||||
var newLimiter flowcontrol.RateLimiter
|
||||
if newQPS <= 0 {
|
||||
newLimiter = flowcontrol.NewFakeNeverRateLimiter()
|
||||
} else {
|
||||
newLimiter = flowcontrol.NewTokenBucketRateLimiter(newQPS, EvictionRateLimiterBurst)
|
||||
|
||||
// If we're currently waiting on limiter, we drain the new one - this is a good approach when Burst value is 1
|
||||
// TODO: figure out if we need to support higher Burst values and decide on the drain logic, should we keep:
|
||||
// - saturation (percentage of used tokens)
|
||||
// - number of used tokens
|
||||
// - number of available tokens
|
||||
// - something else
|
||||
if q.limiter.TryAccept() == false {
|
||||
newLimiter.TryAccept()
|
||||
}
|
||||
}
|
||||
q.limiter.Stop()
|
||||
q.limiter = newLimiter
|
||||
}
|
500
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/taint_manager.go
generated
vendored
Normal file
500
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/taint_manager.go
generated
vendored
Normal file
@ -0,0 +1,500 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
"k8s.io/kubernetes/pkg/apis/core/helper"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
const (
|
||||
// TODO (k82cn): Figure out a reasonable number of workers/channels and propagate
|
||||
// the number of workers up making it a paramater of Run() function.
|
||||
|
||||
// NodeUpdateChannelSize defines the size of channel for node update events.
|
||||
NodeUpdateChannelSize = 10
|
||||
// UpdateWorkerSize defines the size of workers for node update or/and pod update.
|
||||
UpdateWorkerSize = 8
|
||||
podUpdateChannelSize = 1
|
||||
retries = 5
|
||||
)
|
||||
|
||||
type nodeUpdateItem struct {
|
||||
nodeName string
|
||||
}
|
||||
|
||||
type podUpdateItem struct {
|
||||
podName string
|
||||
podNamespace string
|
||||
nodeName string
|
||||
}
|
||||
|
||||
func hash(val string, max int) int {
|
||||
hasher := fnv.New32a()
|
||||
io.WriteString(hasher, val)
|
||||
return int(hasher.Sum32() % uint32(max))
|
||||
}
|
||||
|
||||
// GetPodFunc returns the pod for the specified name/namespace, or a NotFound error if missing.
|
||||
type GetPodFunc func(name, namespace string) (*v1.Pod, error)
|
||||
|
||||
// GetNodeFunc returns the node for the specified name, or a NotFound error if missing.
|
||||
type GetNodeFunc func(name string) (*v1.Node, error)
|
||||
|
||||
// NoExecuteTaintManager listens to Taint/Toleration changes and is responsible for removing Pods
|
||||
// from Nodes tainted with NoExecute Taints.
|
||||
type NoExecuteTaintManager struct {
|
||||
client clientset.Interface
|
||||
recorder record.EventRecorder
|
||||
getPod GetPodFunc
|
||||
getNode GetNodeFunc
|
||||
|
||||
taintEvictionQueue *TimedWorkerQueue
|
||||
// keeps a map from nodeName to all noExecute taints on that Node
|
||||
taintedNodesLock sync.Mutex
|
||||
taintedNodes map[string][]v1.Taint
|
||||
|
||||
nodeUpdateChannels []chan nodeUpdateItem
|
||||
podUpdateChannels []chan podUpdateItem
|
||||
|
||||
nodeUpdateQueue workqueue.Interface
|
||||
podUpdateQueue workqueue.Interface
|
||||
}
|
||||
|
||||
func deletePodHandler(c clientset.Interface, emitEventFunc func(types.NamespacedName)) func(args *WorkArgs) error {
|
||||
return func(args *WorkArgs) error {
|
||||
ns := args.NamespacedName.Namespace
|
||||
name := args.NamespacedName.Name
|
||||
klog.V(0).Infof("NoExecuteTaintManager is deleting Pod: %v", args.NamespacedName.String())
|
||||
if emitEventFunc != nil {
|
||||
emitEventFunc(args.NamespacedName)
|
||||
}
|
||||
var err error
|
||||
for i := 0; i < retries; i++ {
|
||||
err = c.CoreV1().Pods(ns).Delete(name, &metav1.DeleteOptions{})
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
func getNoExecuteTaints(taints []v1.Taint) []v1.Taint {
|
||||
result := []v1.Taint{}
|
||||
for i := range taints {
|
||||
if taints[i].Effect == v1.TaintEffectNoExecute {
|
||||
result = append(result, taints[i])
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func getPodsAssignedToNode(c clientset.Interface, nodeName string) ([]v1.Pod, error) {
|
||||
selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName})
|
||||
pods, err := c.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
|
||||
FieldSelector: selector.String(),
|
||||
LabelSelector: labels.Everything().String(),
|
||||
})
|
||||
for i := 0; i < retries && err != nil; i++ {
|
||||
pods, err = c.CoreV1().Pods(v1.NamespaceAll).List(metav1.ListOptions{
|
||||
FieldSelector: selector.String(),
|
||||
LabelSelector: labels.Everything().String(),
|
||||
})
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
if err != nil {
|
||||
return []v1.Pod{}, fmt.Errorf("failed to get Pods assigned to node %v", nodeName)
|
||||
}
|
||||
return pods.Items, nil
|
||||
}
|
||||
|
||||
// getMinTolerationTime returns minimal toleration time from the given slice, or -1 if it's infinite.
|
||||
func getMinTolerationTime(tolerations []v1.Toleration) time.Duration {
|
||||
minTolerationTime := int64(-1)
|
||||
if len(tolerations) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
for i := range tolerations {
|
||||
if tolerations[i].TolerationSeconds != nil {
|
||||
tolerationSeconds := *(tolerations[i].TolerationSeconds)
|
||||
if tolerationSeconds <= 0 {
|
||||
return 0
|
||||
} else if tolerationSeconds < minTolerationTime || minTolerationTime == -1 {
|
||||
minTolerationTime = tolerationSeconds
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return time.Duration(minTolerationTime) * time.Second
|
||||
}
|
||||
|
||||
// NewNoExecuteTaintManager creates a new NoExecuteTaintManager that will use passed clientset to
|
||||
// communicate with the API server.
|
||||
func NewNoExecuteTaintManager(c clientset.Interface, getPod GetPodFunc, getNode GetNodeFunc) *NoExecuteTaintManager {
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "taint-controller"})
|
||||
eventBroadcaster.StartLogging(klog.Infof)
|
||||
if c != nil {
|
||||
klog.V(0).Infof("Sending events to api server.")
|
||||
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: c.CoreV1().Events("")})
|
||||
} else {
|
||||
klog.Fatalf("kubeClient is nil when starting NodeController")
|
||||
}
|
||||
|
||||
tm := &NoExecuteTaintManager{
|
||||
client: c,
|
||||
recorder: recorder,
|
||||
getPod: getPod,
|
||||
getNode: getNode,
|
||||
taintedNodes: make(map[string][]v1.Taint),
|
||||
|
||||
nodeUpdateQueue: workqueue.NewNamed("noexec_taint_node"),
|
||||
podUpdateQueue: workqueue.NewNamed("noexec_taint_pod"),
|
||||
}
|
||||
tm.taintEvictionQueue = CreateWorkerQueue(deletePodHandler(c, tm.emitPodDeletionEvent))
|
||||
|
||||
return tm
|
||||
}
|
||||
|
||||
// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed.
|
||||
func (tc *NoExecuteTaintManager) Run(stopCh <-chan struct{}) {
|
||||
klog.V(0).Infof("Starting NoExecuteTaintManager")
|
||||
|
||||
for i := 0; i < UpdateWorkerSize; i++ {
|
||||
tc.nodeUpdateChannels = append(tc.nodeUpdateChannels, make(chan nodeUpdateItem, NodeUpdateChannelSize))
|
||||
tc.podUpdateChannels = append(tc.podUpdateChannels, make(chan podUpdateItem, podUpdateChannelSize))
|
||||
}
|
||||
|
||||
// Functions that are responsible for taking work items out of the workqueues and putting them
|
||||
// into channels.
|
||||
go func(stopCh <-chan struct{}) {
|
||||
for {
|
||||
item, shutdown := tc.nodeUpdateQueue.Get()
|
||||
if shutdown {
|
||||
break
|
||||
}
|
||||
nodeUpdate := item.(nodeUpdateItem)
|
||||
hash := hash(nodeUpdate.nodeName, UpdateWorkerSize)
|
||||
select {
|
||||
case <-stopCh:
|
||||
tc.nodeUpdateQueue.Done(item)
|
||||
return
|
||||
case tc.nodeUpdateChannels[hash] <- nodeUpdate:
|
||||
// tc.nodeUpdateQueue.Done is called by the nodeUpdateChannels worker
|
||||
}
|
||||
}
|
||||
}(stopCh)
|
||||
|
||||
go func(stopCh <-chan struct{}) {
|
||||
for {
|
||||
item, shutdown := tc.podUpdateQueue.Get()
|
||||
if shutdown {
|
||||
break
|
||||
}
|
||||
podUpdate := item.(podUpdateItem)
|
||||
hash := hash(podUpdate.nodeName, UpdateWorkerSize)
|
||||
select {
|
||||
case <-stopCh:
|
||||
tc.podUpdateQueue.Done(item)
|
||||
return
|
||||
case tc.podUpdateChannels[hash] <- podUpdate:
|
||||
// tc.podUpdateQueue.Done is called by the podUpdateChannels worker
|
||||
}
|
||||
}
|
||||
}(stopCh)
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(UpdateWorkerSize)
|
||||
for i := 0; i < UpdateWorkerSize; i++ {
|
||||
go tc.worker(i, wg.Done, stopCh)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) worker(worker int, done func(), stopCh <-chan struct{}) {
|
||||
defer done()
|
||||
|
||||
// When processing events we want to prioritize Node updates over Pod updates,
|
||||
// as NodeUpdates that interest NoExecuteTaintManager should be handled as soon as possible -
|
||||
// we don't want user (or system) to wait until PodUpdate queue is drained before it can
|
||||
// start evicting Pods from tainted Nodes.
|
||||
for {
|
||||
select {
|
||||
case <-stopCh:
|
||||
return
|
||||
case nodeUpdate := <-tc.nodeUpdateChannels[worker]:
|
||||
tc.handleNodeUpdate(nodeUpdate)
|
||||
tc.nodeUpdateQueue.Done(nodeUpdate)
|
||||
case podUpdate := <-tc.podUpdateChannels[worker]:
|
||||
// If we found a Pod update we need to empty Node queue first.
|
||||
priority:
|
||||
for {
|
||||
select {
|
||||
case nodeUpdate := <-tc.nodeUpdateChannels[worker]:
|
||||
tc.handleNodeUpdate(nodeUpdate)
|
||||
tc.nodeUpdateQueue.Done(nodeUpdate)
|
||||
default:
|
||||
break priority
|
||||
}
|
||||
}
|
||||
// After Node queue is emptied we process podUpdate.
|
||||
tc.handlePodUpdate(podUpdate)
|
||||
tc.podUpdateQueue.Done(podUpdate)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PodUpdated is used to notify NoExecuteTaintManager about Pod changes.
|
||||
func (tc *NoExecuteTaintManager) PodUpdated(oldPod *v1.Pod, newPod *v1.Pod) {
|
||||
podName := ""
|
||||
podNamespace := ""
|
||||
nodeName := ""
|
||||
oldTolerations := []v1.Toleration{}
|
||||
if oldPod != nil {
|
||||
podName = oldPod.Name
|
||||
podNamespace = oldPod.Namespace
|
||||
nodeName = oldPod.Spec.NodeName
|
||||
oldTolerations = oldPod.Spec.Tolerations
|
||||
}
|
||||
newTolerations := []v1.Toleration{}
|
||||
if newPod != nil {
|
||||
podName = newPod.Name
|
||||
podNamespace = newPod.Namespace
|
||||
nodeName = newPod.Spec.NodeName
|
||||
newTolerations = newPod.Spec.Tolerations
|
||||
}
|
||||
|
||||
if oldPod != nil && newPod != nil && helper.Semantic.DeepEqual(oldTolerations, newTolerations) && oldPod.Spec.NodeName == newPod.Spec.NodeName {
|
||||
return
|
||||
}
|
||||
updateItem := podUpdateItem{
|
||||
podName: podName,
|
||||
podNamespace: podNamespace,
|
||||
nodeName: nodeName,
|
||||
}
|
||||
|
||||
tc.podUpdateQueue.Add(updateItem)
|
||||
}
|
||||
|
||||
// NodeUpdated is used to notify NoExecuteTaintManager about Node changes.
|
||||
func (tc *NoExecuteTaintManager) NodeUpdated(oldNode *v1.Node, newNode *v1.Node) {
|
||||
nodeName := ""
|
||||
oldTaints := []v1.Taint{}
|
||||
if oldNode != nil {
|
||||
nodeName = oldNode.Name
|
||||
oldTaints = getNoExecuteTaints(oldNode.Spec.Taints)
|
||||
}
|
||||
|
||||
newTaints := []v1.Taint{}
|
||||
if newNode != nil {
|
||||
nodeName = newNode.Name
|
||||
newTaints = getNoExecuteTaints(newNode.Spec.Taints)
|
||||
}
|
||||
|
||||
if oldNode != nil && newNode != nil && helper.Semantic.DeepEqual(oldTaints, newTaints) {
|
||||
return
|
||||
}
|
||||
updateItem := nodeUpdateItem{
|
||||
nodeName: nodeName,
|
||||
}
|
||||
|
||||
tc.nodeUpdateQueue.Add(updateItem)
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) cancelWorkWithEvent(nsName types.NamespacedName) {
|
||||
if tc.taintEvictionQueue.CancelWork(nsName.String()) {
|
||||
tc.emitCancelPodDeletionEvent(nsName)
|
||||
}
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) processPodOnNode(
|
||||
podNamespacedName types.NamespacedName,
|
||||
nodeName string,
|
||||
tolerations []v1.Toleration,
|
||||
taints []v1.Taint,
|
||||
now time.Time,
|
||||
) {
|
||||
if len(taints) == 0 {
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
}
|
||||
allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations)
|
||||
if !allTolerated {
|
||||
klog.V(2).Infof("Not all taints are tolerated after update for Pod %v on %v", podNamespacedName.String(), nodeName)
|
||||
// We're canceling scheduled work (if any), as we're going to delete the Pod right away.
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
tc.taintEvictionQueue.AddWork(NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
|
||||
return
|
||||
}
|
||||
minTolerationTime := getMinTolerationTime(usedTolerations)
|
||||
// getMinTolerationTime returns negative value to denote infinite toleration.
|
||||
if minTolerationTime < 0 {
|
||||
klog.V(4).Infof("New tolerations for %v tolerate forever. Scheduled deletion won't be cancelled if already scheduled.", podNamespacedName.String())
|
||||
return
|
||||
}
|
||||
|
||||
startTime := now
|
||||
triggerTime := startTime.Add(minTolerationTime)
|
||||
scheduledEviction := tc.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String())
|
||||
if scheduledEviction != nil {
|
||||
startTime = scheduledEviction.CreatedAt
|
||||
if startTime.Add(minTolerationTime).Before(triggerTime) {
|
||||
return
|
||||
}
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
}
|
||||
tc.taintEvictionQueue.AddWork(NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) handlePodUpdate(podUpdate podUpdateItem) {
|
||||
pod, err := tc.getPod(podUpdate.podName, podUpdate.podNamespace)
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
// Delete
|
||||
podNamespacedName := types.NamespacedName{Namespace: podUpdate.podNamespace, Name: podUpdate.podName}
|
||||
klog.V(4).Infof("Noticed pod deletion: %#v", podNamespacedName)
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
return
|
||||
}
|
||||
utilruntime.HandleError(fmt.Errorf("could not get pod %s/%s: %v", podUpdate.podName, podUpdate.podNamespace, err))
|
||||
return
|
||||
}
|
||||
|
||||
// We key the workqueue and shard workers by nodeName. If we don't match the current state we should not be the one processing the current object.
|
||||
if pod.Spec.NodeName != podUpdate.nodeName {
|
||||
return
|
||||
}
|
||||
|
||||
// Create or Update
|
||||
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
|
||||
klog.V(4).Infof("Noticed pod update: %#v", podNamespacedName)
|
||||
nodeName := pod.Spec.NodeName
|
||||
if nodeName == "" {
|
||||
return
|
||||
}
|
||||
taints, ok := func() ([]v1.Taint, bool) {
|
||||
tc.taintedNodesLock.Lock()
|
||||
defer tc.taintedNodesLock.Unlock()
|
||||
taints, ok := tc.taintedNodes[nodeName]
|
||||
return taints, ok
|
||||
}()
|
||||
// It's possible that Node was deleted, or Taints were removed before, which triggered
|
||||
// eviction cancelling if it was needed.
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
tc.processPodOnNode(podNamespacedName, nodeName, pod.Spec.Tolerations, taints, time.Now())
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) handleNodeUpdate(nodeUpdate nodeUpdateItem) {
|
||||
node, err := tc.getNode(nodeUpdate.nodeName)
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
// Delete
|
||||
klog.V(4).Infof("Noticed node deletion: %#v", nodeUpdate.nodeName)
|
||||
tc.taintedNodesLock.Lock()
|
||||
defer tc.taintedNodesLock.Unlock()
|
||||
delete(tc.taintedNodes, nodeUpdate.nodeName)
|
||||
return
|
||||
}
|
||||
utilruntime.HandleError(fmt.Errorf("cannot get node %s: %v", nodeUpdate.nodeName, err))
|
||||
return
|
||||
}
|
||||
|
||||
// Create or Update
|
||||
klog.V(4).Infof("Noticed node update: %#v", nodeUpdate)
|
||||
taints := getNoExecuteTaints(node.Spec.Taints)
|
||||
func() {
|
||||
tc.taintedNodesLock.Lock()
|
||||
defer tc.taintedNodesLock.Unlock()
|
||||
klog.V(4).Infof("Updating known taints on node %v: %v", node.Name, taints)
|
||||
if len(taints) == 0 {
|
||||
delete(tc.taintedNodes, node.Name)
|
||||
} else {
|
||||
tc.taintedNodes[node.Name] = taints
|
||||
}
|
||||
}()
|
||||
pods, err := getPodsAssignedToNode(tc.client, node.Name)
|
||||
if err != nil {
|
||||
klog.Errorf(err.Error())
|
||||
return
|
||||
}
|
||||
if len(pods) == 0 {
|
||||
return
|
||||
}
|
||||
// Short circuit, to make this controller a bit faster.
|
||||
if len(taints) == 0 {
|
||||
klog.V(4).Infof("All taints were removed from the Node %v. Cancelling all evictions...", node.Name)
|
||||
for i := range pods {
|
||||
tc.cancelWorkWithEvent(types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
for i := range pods {
|
||||
pod := &pods[i]
|
||||
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
|
||||
tc.processPodOnNode(podNamespacedName, node.Name, pod.Spec.Tolerations, taints, now)
|
||||
}
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) emitPodDeletionEvent(nsName types.NamespacedName) {
|
||||
if tc.recorder == nil {
|
||||
return
|
||||
}
|
||||
ref := &v1.ObjectReference{
|
||||
Kind: "Pod",
|
||||
Name: nsName.Name,
|
||||
Namespace: nsName.Namespace,
|
||||
}
|
||||
tc.recorder.Eventf(ref, v1.EventTypeNormal, "TaintManagerEviction", "Marking for deletion Pod %s", nsName.String())
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) emitCancelPodDeletionEvent(nsName types.NamespacedName) {
|
||||
if tc.recorder == nil {
|
||||
return
|
||||
}
|
||||
ref := &v1.ObjectReference{
|
||||
Kind: "Pod",
|
||||
Name: nsName.Name,
|
||||
Namespace: nsName.Namespace,
|
||||
}
|
||||
tc.recorder.Eventf(ref, v1.EventTypeNormal, "TaintManagerEviction", "Cancelling deletion of Pod %s", nsName.String())
|
||||
}
|
145
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/timed_workers.go
generated
vendored
Normal file
145
vendor/k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler/timed_workers.go
generated
vendored
Normal file
@ -0,0 +1,145 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
// WorkArgs keeps arguments that will be passed to the function executed by the worker.
|
||||
type WorkArgs struct {
|
||||
NamespacedName types.NamespacedName
|
||||
}
|
||||
|
||||
// KeyFromWorkArgs creates a key for the given `WorkArgs`
|
||||
func (w *WorkArgs) KeyFromWorkArgs() string {
|
||||
return w.NamespacedName.String()
|
||||
}
|
||||
|
||||
// NewWorkArgs is a helper function to create new `WorkArgs`
|
||||
func NewWorkArgs(name, namespace string) *WorkArgs {
|
||||
return &WorkArgs{types.NamespacedName{Namespace: namespace, Name: name}}
|
||||
}
|
||||
|
||||
// TimedWorker is a responsible for executing a function no earlier than at FireAt time.
|
||||
type TimedWorker struct {
|
||||
WorkItem *WorkArgs
|
||||
CreatedAt time.Time
|
||||
FireAt time.Time
|
||||
Timer *time.Timer
|
||||
}
|
||||
|
||||
// CreateWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`.
|
||||
func CreateWorker(args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(args *WorkArgs) error) *TimedWorker {
|
||||
delay := fireAt.Sub(createdAt)
|
||||
if delay <= 0 {
|
||||
go f(args)
|
||||
return nil
|
||||
}
|
||||
timer := time.AfterFunc(delay, func() { f(args) })
|
||||
return &TimedWorker{
|
||||
WorkItem: args,
|
||||
CreatedAt: createdAt,
|
||||
FireAt: fireAt,
|
||||
Timer: timer,
|
||||
}
|
||||
}
|
||||
|
||||
// Cancel cancels the execution of function by the `TimedWorker`
|
||||
func (w *TimedWorker) Cancel() {
|
||||
if w != nil {
|
||||
w.Timer.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
// TimedWorkerQueue keeps a set of TimedWorkers that are still wait for execution.
|
||||
type TimedWorkerQueue struct {
|
||||
sync.Mutex
|
||||
// map of workers keyed by string returned by 'KeyFromWorkArgs' from the given worker.
|
||||
workers map[string]*TimedWorker
|
||||
workFunc func(args *WorkArgs) error
|
||||
}
|
||||
|
||||
// CreateWorkerQueue creates a new TimedWorkerQueue for workers that will execute
|
||||
// given function `f`.
|
||||
func CreateWorkerQueue(f func(args *WorkArgs) error) *TimedWorkerQueue {
|
||||
return &TimedWorkerQueue{
|
||||
workers: make(map[string]*TimedWorker),
|
||||
workFunc: f,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(args *WorkArgs) error {
|
||||
return func(args *WorkArgs) error {
|
||||
err := q.workFunc(args)
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
if err == nil {
|
||||
// To avoid duplicated calls we keep the key in the queue, to prevent
|
||||
// subsequent additions.
|
||||
q.workers[key] = nil
|
||||
} else {
|
||||
delete(q.workers, key)
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// AddWork adds a work to the WorkerQueue which will be executed not earlier than `fireAt`.
|
||||
func (q *TimedWorkerQueue) AddWork(args *WorkArgs, createdAt time.Time, fireAt time.Time) {
|
||||
key := args.KeyFromWorkArgs()
|
||||
klog.V(4).Infof("Adding TimedWorkerQueue item %v at %v to be fired at %v", key, createdAt, fireAt)
|
||||
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
if _, exists := q.workers[key]; exists {
|
||||
klog.Warningf("Trying to add already existing work for %+v. Skipping.", args)
|
||||
return
|
||||
}
|
||||
worker := CreateWorker(args, createdAt, fireAt, q.getWrappedWorkerFunc(key))
|
||||
q.workers[key] = worker
|
||||
}
|
||||
|
||||
// CancelWork removes scheduled function execution from the queue. Returns true if work was cancelled.
|
||||
func (q *TimedWorkerQueue) CancelWork(key string) bool {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
worker, found := q.workers[key]
|
||||
result := false
|
||||
if found {
|
||||
klog.V(4).Infof("Cancelling TimedWorkerQueue item %v at %v", key, time.Now())
|
||||
if worker != nil {
|
||||
result = true
|
||||
worker.Cancel()
|
||||
}
|
||||
delete(q.workers, key)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// GetWorkerUnsafe returns a TimedWorker corresponding to the given key.
|
||||
// Unsafe method - workers have attached goroutines which can fire afater this function is called.
|
||||
func (q *TimedWorkerQueue) GetWorkerUnsafe(key string) *TimedWorker {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
return q.workers[key]
|
||||
}
|
Reference in New Issue
Block a user