mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-13 18:43:34 +00:00
rebase: update replaced k8s.io modules to v0.33.0
Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
committed by
mergify[bot]
parent
dd77e72800
commit
107407b44b
8
e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/cache.go
generated
vendored
8
e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/cache.go
generated
vendored
@ -757,4 +757,12 @@ func (cache *cacheImpl) updateMetrics() {
|
||||
metrics.CacheSize.WithLabelValues("assumed_pods").Set(float64(len(cache.assumedPods)))
|
||||
metrics.CacheSize.WithLabelValues("pods").Set(float64(len(cache.podStates)))
|
||||
metrics.CacheSize.WithLabelValues("nodes").Set(float64(len(cache.nodes)))
|
||||
|
||||
// we intentionally keep them with the deprecation and will remove at v1.34.
|
||||
//nolint:staticcheck
|
||||
metrics.SchedulerCacheSize.WithLabelValues("assumed_pods").Set(float64(len(cache.assumedPods)))
|
||||
//nolint:staticcheck
|
||||
metrics.SchedulerCacheSize.WithLabelValues("pods").Set(float64(len(cache.podStates)))
|
||||
//nolint:staticcheck
|
||||
metrics.SchedulerCacheSize.WithLabelValues("nodes").Set(float64(len(cache.nodes)))
|
||||
}
|
||||
|
107
e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/active_queue.go
generated
vendored
107
e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/active_queue.go
generated
vendored
@ -20,6 +20,7 @@ import (
|
||||
"container/list"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
@ -61,14 +62,63 @@ type activeQueuer interface {
|
||||
// underLock() method should be used to protect these methods.
|
||||
type unlockedActiveQueuer interface {
|
||||
unlockedActiveQueueReader
|
||||
AddOrUpdate(pInfo *framework.QueuedPodInfo)
|
||||
// add adds a new pod to the activeQ.
|
||||
// The event should show which event triggered this addition and is used for the metric recording.
|
||||
// This method should be called in activeQueue.underLock().
|
||||
add(pInfo *framework.QueuedPodInfo, event string)
|
||||
}
|
||||
|
||||
// unlockedActiveQueueReader defines activeQ read-only methods that are not protected by the lock itself.
|
||||
// underLock() or underRLock() method should be used to protect these methods.
|
||||
type unlockedActiveQueueReader interface {
|
||||
Get(pInfo *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool)
|
||||
Has(pInfo *framework.QueuedPodInfo) bool
|
||||
// get returns the pod matching pInfo inside the activeQ.
|
||||
// Returns false if the pInfo doesn't exist in the queue.
|
||||
// This method should be called in activeQueue.underLock() or activeQueue.underRLock().
|
||||
get(pInfo *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool)
|
||||
// has returns if pInfo exists in the queue.
|
||||
// This method should be called in activeQueue.underLock() or activeQueue.underRLock().
|
||||
has(pInfo *framework.QueuedPodInfo) bool
|
||||
}
|
||||
|
||||
// unlockedActiveQueue defines activeQ methods that are not protected by the lock itself.
|
||||
// activeQueue.underLock() or activeQueue.underRLock() method should be used to protect these methods.
|
||||
type unlockedActiveQueue struct {
|
||||
queue *heap.Heap[*framework.QueuedPodInfo]
|
||||
}
|
||||
|
||||
func newUnlockedActiveQueue(queue *heap.Heap[*framework.QueuedPodInfo]) *unlockedActiveQueue {
|
||||
return &unlockedActiveQueue{
|
||||
queue: queue,
|
||||
}
|
||||
}
|
||||
|
||||
// add adds a new pod to the activeQ.
|
||||
// The event should show which event triggered this addition and is used for the metric recording.
|
||||
// This method should be called in activeQueue.underLock().
|
||||
func (uaq *unlockedActiveQueue) add(pInfo *framework.QueuedPodInfo, event string) {
|
||||
uaq.queue.AddOrUpdate(pInfo)
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("active", event).Inc()
|
||||
}
|
||||
|
||||
// get returns the pod matching pInfo inside the activeQ.
|
||||
// Returns false if the pInfo doesn't exist in the queue.
|
||||
// This method should be called in activeQueue.underLock() or activeQueue.underRLock().
|
||||
func (uaq *unlockedActiveQueue) get(pInfo *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool) {
|
||||
return uaq.queue.Get(pInfo)
|
||||
}
|
||||
|
||||
// has returns if pInfo exists in the queue.
|
||||
// This method should be called in activeQueue.underLock() or activeQueue.underRLock().
|
||||
func (uaq *unlockedActiveQueue) has(pInfo *framework.QueuedPodInfo) bool {
|
||||
return uaq.queue.Has(pInfo)
|
||||
}
|
||||
|
||||
// backoffQPopper defines method that is used to pop from the backoffQ when the activeQ is empty.
|
||||
type backoffQPopper interface {
|
||||
// popBackoff pops the pInfo from the podBackoffQ.
|
||||
popBackoff() (*framework.QueuedPodInfo, error)
|
||||
// len returns length of the podBackoffQ queue.
|
||||
lenBackoff() int
|
||||
}
|
||||
|
||||
// activeQueue implements activeQueuer. All of the fields have to be protected using the lock.
|
||||
@ -77,15 +127,21 @@ type activeQueue struct {
|
||||
// It protects activeQ, inFlightPods, inFlightEvents, schedulingCycle and closed fields.
|
||||
// Caution: DO NOT take "SchedulingQueue.lock" after taking "lock".
|
||||
// You should always take "SchedulingQueue.lock" first, otherwise the queue could end up in deadlock.
|
||||
// "lock" should not be taken after taking "nLock".
|
||||
// Correct locking order is: SchedulingQueue.lock > lock > nominator.nLock.
|
||||
// "lock" should not be taken after taking "backoffQueue.lock" or "nominator.nLock".
|
||||
// Correct locking order is: SchedulingQueue.lock > lock > backoffQueue.lock > nominator.nLock.
|
||||
lock sync.RWMutex
|
||||
|
||||
// activeQ is heap structure that scheduler actively looks at to find pods to
|
||||
// schedule. Head of heap is the highest priority pod.
|
||||
queue *heap.Heap[*framework.QueuedPodInfo]
|
||||
|
||||
// unlockedQueue is a wrapper of queue providing methods that are not locked themselves
|
||||
// and can be used in the underLock() or underRLock().
|
||||
unlockedQueue *unlockedActiveQueue
|
||||
|
||||
// cond is a condition that is notified when the pod is added to activeQ.
|
||||
// When SchedulerPopFromBackoffQ feature is enabled,
|
||||
// condition is also notified when the pod is added to backoffQ.
|
||||
// It is used with lock.
|
||||
cond sync.Cond
|
||||
|
||||
@ -125,15 +181,21 @@ type activeQueue struct {
|
||||
isSchedulingQueueHintEnabled bool
|
||||
|
||||
metricsRecorder metrics.MetricAsyncRecorder
|
||||
|
||||
// backoffQPopper is used to pop from backoffQ when activeQ is empty.
|
||||
// It is non-nil only when SchedulerPopFromBackoffQ feature is enabled.
|
||||
backoffQPopper backoffQPopper
|
||||
}
|
||||
|
||||
func newActiveQueue(queue *heap.Heap[*framework.QueuedPodInfo], isSchedulingQueueHintEnabled bool, metricRecorder metrics.MetricAsyncRecorder) *activeQueue {
|
||||
func newActiveQueue(queue *heap.Heap[*framework.QueuedPodInfo], isSchedulingQueueHintEnabled bool, metricRecorder metrics.MetricAsyncRecorder, backoffQPopper backoffQPopper) *activeQueue {
|
||||
aq := &activeQueue{
|
||||
queue: queue,
|
||||
inFlightPods: make(map[types.UID]*list.Element),
|
||||
inFlightEvents: list.New(),
|
||||
isSchedulingQueueHintEnabled: isSchedulingQueueHintEnabled,
|
||||
metricsRecorder: metricRecorder,
|
||||
unlockedQueue: newUnlockedActiveQueue(queue),
|
||||
backoffQPopper: backoffQPopper,
|
||||
}
|
||||
aq.cond.L = &aq.lock
|
||||
|
||||
@ -146,7 +208,7 @@ func newActiveQueue(queue *heap.Heap[*framework.QueuedPodInfo], isSchedulingQueu
|
||||
func (aq *activeQueue) underLock(fn func(unlockedActiveQ unlockedActiveQueuer)) {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
fn(aq.queue)
|
||||
fn(aq.unlockedQueue)
|
||||
}
|
||||
|
||||
// underLock runs the fn function under the lock.RLock.
|
||||
@ -155,7 +217,7 @@ func (aq *activeQueue) underLock(fn func(unlockedActiveQ unlockedActiveQueuer))
|
||||
func (aq *activeQueue) underRLock(fn func(unlockedActiveQ unlockedActiveQueueReader)) {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
fn(aq.queue)
|
||||
fn(aq.unlockedQueue)
|
||||
}
|
||||
|
||||
// update updates the pod in activeQ if oldPodInfo is already in the queue.
|
||||
@ -191,7 +253,13 @@ func (aq *activeQueue) pop(logger klog.Logger) (*framework.QueuedPodInfo, error)
|
||||
}
|
||||
|
||||
func (aq *activeQueue) unlockedPop(logger klog.Logger) (*framework.QueuedPodInfo, error) {
|
||||
var pInfo *framework.QueuedPodInfo
|
||||
for aq.queue.Len() == 0 {
|
||||
// backoffQPopper is non-nil only if SchedulerPopFromBackoffQ feature is enabled.
|
||||
// In case of non-empty backoffQ, try popping from there.
|
||||
if aq.backoffQPopper != nil && aq.backoffQPopper.lenBackoff() != 0 {
|
||||
break
|
||||
}
|
||||
// When the queue is empty, invocation of Pop() is blocked until new item is enqueued.
|
||||
// When Close() is called, the p.closed is set and the condition is broadcast,
|
||||
// which causes this loop to continue and return from the Pop().
|
||||
@ -203,9 +271,18 @@ func (aq *activeQueue) unlockedPop(logger klog.Logger) (*framework.QueuedPodInfo
|
||||
}
|
||||
pInfo, err := aq.queue.Pop()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if aq.backoffQPopper == nil {
|
||||
return nil, err
|
||||
}
|
||||
// Try to pop from backoffQ when activeQ is empty.
|
||||
pInfo, err = aq.backoffQPopper.popBackoff()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("active", framework.PopFromBackoffQ).Inc()
|
||||
}
|
||||
pInfo.Attempts++
|
||||
pInfo.BackoffExpiration = time.Time{}
|
||||
// In flight, no concurrent events yet.
|
||||
if aq.isSchedulingQueueHintEnabled {
|
||||
// If the pod is already in the map, we shouldn't overwrite the inFlightPods otherwise it'd lead to a memory leak.
|
||||
@ -354,6 +431,12 @@ func (aq *activeQueue) done(pod types.UID) {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
aq.unlockedDone(pod)
|
||||
}
|
||||
|
||||
// unlockedDone is used by the activeQueue internally and doesn't take the lock itself.
|
||||
// It assumes the lock is already taken outside before the method is called.
|
||||
func (aq *activeQueue) unlockedDone(pod types.UID) {
|
||||
inFlightPod, ok := aq.inFlightPods[pod]
|
||||
if !ok {
|
||||
// This Pod is already done()ed.
|
||||
@ -398,15 +481,15 @@ func (aq *activeQueue) done(pod types.UID) {
|
||||
|
||||
// close closes the activeQueue.
|
||||
func (aq *activeQueue) close() {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
// We should call done() for all in-flight pods to clean up the inFlightEvents metrics.
|
||||
// It's safe even if the binding cycle running asynchronously calls done() afterwards
|
||||
// done() will just be a no-op.
|
||||
for pod := range aq.inFlightPods {
|
||||
aq.done(pod)
|
||||
aq.unlockedDone(pod)
|
||||
}
|
||||
aq.lock.Lock()
|
||||
aq.closed = true
|
||||
aq.lock.Unlock()
|
||||
}
|
||||
|
||||
// broadcast notifies the pop() operation that new pod(s) was added to the activeQueue.
|
||||
|
405
e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/backoff_queue.go
generated
vendored
Normal file
405
e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/backoff_queue.go
generated
vendored
Normal file
@ -0,0 +1,405 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/heap"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
"k8s.io/utils/clock"
|
||||
)
|
||||
|
||||
// backoffQOrderingWindowDuration is a duration of an ordering window in the podBackoffQ.
|
||||
// In each window, represented as a whole second, pods are ordered by priority.
|
||||
// It is the same as interval of flushing the pods from the podBackoffQ to the activeQ, to flush the whole windows there.
|
||||
// This works only if PopFromBackoffQ feature is enabled.
|
||||
// See the KEP-5142 (http://kep.k8s.io/5142) for rationale.
|
||||
const backoffQOrderingWindowDuration = time.Second
|
||||
|
||||
// backoffQueuer is a wrapper for backoffQ related operations.
|
||||
// Its methods that relies on the queues, take the lock inside.
|
||||
type backoffQueuer interface {
|
||||
// isPodBackingoff returns true if a pod is still waiting for its backoff timer.
|
||||
// If this returns true, the pod should not be re-tried.
|
||||
// If the pod backoff time is in the actual ordering window, it should still be backing off.
|
||||
isPodBackingoff(podInfo *framework.QueuedPodInfo) bool
|
||||
// popAllBackoffCompleted pops all pods from podBackoffQ and podErrorBackoffQ that completed backoff.
|
||||
popAllBackoffCompleted(logger klog.Logger) []*framework.QueuedPodInfo
|
||||
|
||||
// podInitialBackoffDuration returns initial backoff duration that pod can get.
|
||||
podInitialBackoffDuration() time.Duration
|
||||
// podMaxBackoffDuration returns maximum backoff duration that pod can get.
|
||||
podMaxBackoffDuration() time.Duration
|
||||
// waitUntilAlignedWithOrderingWindow waits until the time reaches a multiple of backoffQOrderingWindowDuration.
|
||||
// It then runs the f function at the backoffQOrderingWindowDuration interval using a ticker.
|
||||
// It's important to align the flushing time, because podBackoffQ's ordering is based on the windows
|
||||
// and whole windows have to be flushed at one time without a visible latency.
|
||||
waitUntilAlignedWithOrderingWindow(f func(), stopCh <-chan struct{})
|
||||
|
||||
// add adds the pInfo to backoffQueue.
|
||||
// The event should show which event triggered this addition and is used for the metric recording.
|
||||
// It also ensures that pInfo is not in both queues.
|
||||
add(logger klog.Logger, pInfo *framework.QueuedPodInfo, event string)
|
||||
// update updates the pod in backoffQueue if oldPodInfo is already in the queue.
|
||||
// It returns new pod info if updated, nil otherwise.
|
||||
update(newPod *v1.Pod, oldPodInfo *framework.QueuedPodInfo) *framework.QueuedPodInfo
|
||||
// delete deletes the pInfo from backoffQueue.
|
||||
// It returns true if the pod was deleted.
|
||||
delete(pInfo *framework.QueuedPodInfo) bool
|
||||
// get returns the pInfo matching given pInfoLookup, if exists.
|
||||
get(pInfoLookup *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool)
|
||||
// has inform if pInfo exists in the queue.
|
||||
has(pInfo *framework.QueuedPodInfo) bool
|
||||
// list returns all pods that are in the queue.
|
||||
list() []*v1.Pod
|
||||
// len returns length of the queue.
|
||||
len() int
|
||||
}
|
||||
|
||||
// backoffQueue implements backoffQueuer and wraps two queues inside,
|
||||
// providing seamless access as if it were one queue.
|
||||
type backoffQueue struct {
|
||||
// lock synchronizes all operations related to backoffQ.
|
||||
// It protects both podBackoffQ and podErrorBackoffQ.
|
||||
// Caution: DO NOT take "SchedulingQueue.lock" or "activeQueue.lock" after taking "lock".
|
||||
// You should always take "SchedulingQueue.lock" and "activeQueue.lock" first, otherwise the queue could end up in deadlock.
|
||||
// "lock" should not be taken after taking "nominator.nLock".
|
||||
// Correct locking order is: SchedulingQueue.lock > activeQueue.lock > lock > nominator.nLock.
|
||||
lock sync.RWMutex
|
||||
|
||||
clock clock.WithTicker
|
||||
|
||||
// podBackoffQ is a heap ordered by backoff expiry. Pods which have completed backoff
|
||||
// are popped from this heap before the scheduler looks at activeQ
|
||||
podBackoffQ *heap.Heap[*framework.QueuedPodInfo]
|
||||
// podErrorBackoffQ is a heap ordered by error backoff expiry. Pods which have completed backoff
|
||||
// are popped from this heap before the scheduler looks at activeQ
|
||||
podErrorBackoffQ *heap.Heap[*framework.QueuedPodInfo]
|
||||
|
||||
podInitialBackoff time.Duration
|
||||
podMaxBackoff time.Duration
|
||||
// activeQLessFn is used as an eventual less function if two backoff times are equal,
|
||||
// when the SchedulerPopFromBackoffQ feature is enabled.
|
||||
activeQLessFn framework.LessFunc
|
||||
|
||||
// isPopFromBackoffQEnabled indicates whether the feature gate SchedulerPopFromBackoffQ is enabled.
|
||||
isPopFromBackoffQEnabled bool
|
||||
}
|
||||
|
||||
func newBackoffQueue(clock clock.WithTicker, podInitialBackoffDuration time.Duration, podMaxBackoffDuration time.Duration, activeQLessFn framework.LessFunc, popFromBackoffQEnabled bool) *backoffQueue {
|
||||
bq := &backoffQueue{
|
||||
clock: clock,
|
||||
podInitialBackoff: podInitialBackoffDuration,
|
||||
podMaxBackoff: podMaxBackoffDuration,
|
||||
isPopFromBackoffQEnabled: popFromBackoffQEnabled,
|
||||
activeQLessFn: activeQLessFn,
|
||||
}
|
||||
podBackoffQLessFn := bq.lessBackoffCompleted
|
||||
if popFromBackoffQEnabled {
|
||||
podBackoffQLessFn = bq.lessBackoffCompletedWithPriority
|
||||
}
|
||||
bq.podBackoffQ = heap.NewWithRecorder(podInfoKeyFunc, podBackoffQLessFn, metrics.NewBackoffPodsRecorder())
|
||||
bq.podErrorBackoffQ = heap.NewWithRecorder(podInfoKeyFunc, bq.lessBackoffCompleted, metrics.NewBackoffPodsRecorder())
|
||||
|
||||
return bq
|
||||
}
|
||||
|
||||
// podInitialBackoffDuration returns initial backoff duration that pod can get.
|
||||
func (bq *backoffQueue) podInitialBackoffDuration() time.Duration {
|
||||
return bq.podInitialBackoff
|
||||
}
|
||||
|
||||
// podMaxBackoffDuration returns maximum backoff duration that pod can get.
|
||||
func (bq *backoffQueue) podMaxBackoffDuration() time.Duration {
|
||||
return bq.podMaxBackoff
|
||||
}
|
||||
|
||||
// alignToWindow truncates the provided time to the podBackoffQ ordering window.
|
||||
// It returns the lowest possible timestamp in the window.
|
||||
func (bq *backoffQueue) alignToWindow(t time.Time) time.Time {
|
||||
if !bq.isPopFromBackoffQEnabled {
|
||||
return t
|
||||
}
|
||||
return t.Truncate(backoffQOrderingWindowDuration)
|
||||
}
|
||||
|
||||
// waitUntilAlignedWithOrderingWindow waits until the time reaches a multiple of backoffQOrderingWindowDuration.
|
||||
// It then runs the f function at the backoffQOrderingWindowDuration interval using a ticker.
|
||||
// It's important to align the flushing time, because podBackoffQ's ordering is based on the windows
|
||||
// and whole windows have to be flushed at one time without a visible latency.
|
||||
func (bq *backoffQueue) waitUntilAlignedWithOrderingWindow(f func(), stopCh <-chan struct{}) {
|
||||
now := bq.clock.Now()
|
||||
// Wait until the time reaches the multiple of backoffQOrderingWindowDuration.
|
||||
durationToNextWindow := bq.alignToWindow(now.Add(backoffQOrderingWindowDuration)).Sub(now)
|
||||
timer := bq.clock.NewTimer(durationToNextWindow)
|
||||
select {
|
||||
case <-stopCh:
|
||||
timer.Stop()
|
||||
return
|
||||
case <-timer.C():
|
||||
}
|
||||
|
||||
// Run a ticker to make sure the invocations of f function
|
||||
// are aligned with the backoffQ's ordering window.
|
||||
ticker := bq.clock.NewTicker(backoffQOrderingWindowDuration)
|
||||
for {
|
||||
select {
|
||||
case <-stopCh:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
f()
|
||||
|
||||
// NOTE: b/c there is no priority selection in golang
|
||||
// it is possible for this to race, meaning we could
|
||||
// trigger ticker.C and stopCh, and ticker.C select falls through.
|
||||
// In order to mitigate we re-check stopCh at the beginning
|
||||
// of every loop to prevent extra executions of f().
|
||||
select {
|
||||
case <-stopCh:
|
||||
ticker.Stop()
|
||||
return
|
||||
case <-ticker.C():
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// lessBackoffCompletedWithPriority is a less function of podBackoffQ if PopFromBackoffQ feature is enabled.
|
||||
// It orders the pods in the same BackoffOrderingWindow the same as the activeQ will do to improve popping order from backoffQ when activeQ is empty.
|
||||
func (bq *backoffQueue) lessBackoffCompletedWithPriority(pInfo1, pInfo2 *framework.QueuedPodInfo) bool {
|
||||
bo1 := bq.getBackoffTime(pInfo1)
|
||||
bo2 := bq.getBackoffTime(pInfo2)
|
||||
if !bo1.Equal(bo2) {
|
||||
return bo1.Before(bo2)
|
||||
}
|
||||
// If the backoff time is the same, sort the pod in the same manner as activeQ does.
|
||||
return bq.activeQLessFn(pInfo1, pInfo2)
|
||||
}
|
||||
|
||||
// lessBackoffCompleted is a less function of podErrorBackoffQ.
|
||||
func (bq *backoffQueue) lessBackoffCompleted(pInfo1, pInfo2 *framework.QueuedPodInfo) bool {
|
||||
bo1 := bq.getBackoffTime(pInfo1)
|
||||
bo2 := bq.getBackoffTime(pInfo2)
|
||||
return bo1.Before(bo2)
|
||||
}
|
||||
|
||||
// isPodBackingoff returns true if a pod is still waiting for its backoff timer.
|
||||
// If this returns true, the pod should not be re-tried.
|
||||
// If the pod backoff time is in the actual ordering window, it should still be backing off.
|
||||
func (bq *backoffQueue) isPodBackingoff(podInfo *framework.QueuedPodInfo) bool {
|
||||
boTime := bq.getBackoffTime(podInfo)
|
||||
// Don't use After, because in case of windows equality we want to return true.
|
||||
return !boTime.Before(bq.alignToWindow(bq.clock.Now()))
|
||||
}
|
||||
|
||||
// getBackoffTime returns the time that podInfo completes backoff.
|
||||
// It caches the result in podInfo.BackoffExpiration and returns this value in subsequent calls.
|
||||
// The cache will be cleared when this pod is poped from the scheduling queue again (i.e., at activeQ's pop),
|
||||
// because of the fact that the backoff time is calculated based on podInfo.Attempts,
|
||||
// which doesn't get changed until the pod's scheduling is retried.
|
||||
func (bq *backoffQueue) getBackoffTime(podInfo *framework.QueuedPodInfo) time.Time {
|
||||
if podInfo.Attempts == 0 {
|
||||
// Don't store backoff expiration if the duration is 0
|
||||
// to correctly handle isPodBackingoff, if pod should skip backoff, when it wasn't tried at all.
|
||||
return time.Time{}
|
||||
}
|
||||
if podInfo.BackoffExpiration.IsZero() {
|
||||
duration := bq.calculateBackoffDuration(podInfo)
|
||||
podInfo.BackoffExpiration = bq.alignToWindow(podInfo.Timestamp.Add(duration))
|
||||
}
|
||||
return podInfo.BackoffExpiration
|
||||
}
|
||||
|
||||
// calculateBackoffDuration is a helper function for calculating the backoffDuration
|
||||
// based on the number of attempts the pod has made.
|
||||
func (bq *backoffQueue) calculateBackoffDuration(podInfo *framework.QueuedPodInfo) time.Duration {
|
||||
if podInfo.Attempts == 0 {
|
||||
// When the Pod hasn't experienced any scheduling attempts,
|
||||
// they aren't obliged to get a backoff penalty at all.
|
||||
return 0
|
||||
}
|
||||
|
||||
duration := bq.podInitialBackoff
|
||||
for i := 1; i < podInfo.Attempts; i++ {
|
||||
// Use subtraction instead of addition or multiplication to avoid overflow.
|
||||
if duration > bq.podMaxBackoff-duration {
|
||||
return bq.podMaxBackoff
|
||||
}
|
||||
duration += duration
|
||||
}
|
||||
return duration
|
||||
}
|
||||
|
||||
func (bq *backoffQueue) popAllBackoffCompletedWithQueue(logger klog.Logger, queue *heap.Heap[*framework.QueuedPodInfo]) []*framework.QueuedPodInfo {
|
||||
var poppedPods []*framework.QueuedPodInfo
|
||||
for {
|
||||
pInfo, ok := queue.Peek()
|
||||
if !ok || pInfo == nil {
|
||||
break
|
||||
}
|
||||
pod := pInfo.Pod
|
||||
if bq.isPodBackingoff(pInfo) {
|
||||
break
|
||||
}
|
||||
_, err := queue.Pop()
|
||||
if err != nil {
|
||||
logger.Error(err, "Unable to pop pod from backoff queue despite backoff completion", "pod", klog.KObj(pod))
|
||||
break
|
||||
}
|
||||
poppedPods = append(poppedPods, pInfo)
|
||||
}
|
||||
return poppedPods
|
||||
}
|
||||
|
||||
// popAllBackoffCompleted pops all pods from podBackoffQ and podErrorBackoffQ that completed backoff.
|
||||
func (bq *backoffQueue) popAllBackoffCompleted(logger klog.Logger) []*framework.QueuedPodInfo {
|
||||
bq.lock.Lock()
|
||||
defer bq.lock.Unlock()
|
||||
|
||||
// Ensure both queues are called
|
||||
return append(bq.popAllBackoffCompletedWithQueue(logger, bq.podBackoffQ), bq.popAllBackoffCompletedWithQueue(logger, bq.podErrorBackoffQ)...)
|
||||
}
|
||||
|
||||
// add adds the pInfo to backoffQueue.
|
||||
// The event should show which event triggered this addition and is used for the metric recording.
|
||||
// It also ensures that pInfo is not in both queues.
|
||||
func (bq *backoffQueue) add(logger klog.Logger, pInfo *framework.QueuedPodInfo, event string) {
|
||||
bq.lock.Lock()
|
||||
defer bq.lock.Unlock()
|
||||
|
||||
// If pod has empty both unschedulable plugins and pending plugins,
|
||||
// it means that it failed because of error and should be moved to podErrorBackoffQ.
|
||||
if pInfo.UnschedulablePlugins.Len() == 0 && pInfo.PendingPlugins.Len() == 0 {
|
||||
bq.podErrorBackoffQ.AddOrUpdate(pInfo)
|
||||
// Ensure the pod is not in the podBackoffQ and report the error if it happens.
|
||||
err := bq.podBackoffQ.Delete(pInfo)
|
||||
if err == nil {
|
||||
logger.Error(nil, "BackoffQueue add() was called with a pod that was already in the podBackoffQ", "pod", klog.KObj(pInfo.Pod))
|
||||
return
|
||||
}
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", event).Inc()
|
||||
return
|
||||
}
|
||||
bq.podBackoffQ.AddOrUpdate(pInfo)
|
||||
// Ensure the pod is not in the podErrorBackoffQ and report the error if it happens.
|
||||
err := bq.podErrorBackoffQ.Delete(pInfo)
|
||||
if err == nil {
|
||||
logger.Error(nil, "BackoffQueue add() was called with a pod that was already in the podErrorBackoffQ", "pod", klog.KObj(pInfo.Pod))
|
||||
return
|
||||
}
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", event).Inc()
|
||||
}
|
||||
|
||||
// update updates the pod in backoffQueue if oldPodInfo is already in the queue.
|
||||
// It returns new pod info if updated, nil otherwise.
|
||||
func (bq *backoffQueue) update(newPod *v1.Pod, oldPodInfo *framework.QueuedPodInfo) *framework.QueuedPodInfo {
|
||||
bq.lock.Lock()
|
||||
defer bq.lock.Unlock()
|
||||
|
||||
// If the pod is in the backoff queue, update it there.
|
||||
if pInfo, exists := bq.podBackoffQ.Get(oldPodInfo); exists {
|
||||
_ = pInfo.Update(newPod)
|
||||
bq.podBackoffQ.AddOrUpdate(pInfo)
|
||||
return pInfo
|
||||
}
|
||||
// If the pod is in the error backoff queue, update it there.
|
||||
if pInfo, exists := bq.podErrorBackoffQ.Get(oldPodInfo); exists {
|
||||
_ = pInfo.Update(newPod)
|
||||
bq.podErrorBackoffQ.AddOrUpdate(pInfo)
|
||||
return pInfo
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// delete deletes the pInfo from backoffQueue.
|
||||
// It returns true if the pod was deleted.
|
||||
func (bq *backoffQueue) delete(pInfo *framework.QueuedPodInfo) bool {
|
||||
bq.lock.Lock()
|
||||
defer bq.lock.Unlock()
|
||||
|
||||
if bq.podBackoffQ.Delete(pInfo) == nil {
|
||||
return true
|
||||
}
|
||||
return bq.podErrorBackoffQ.Delete(pInfo) == nil
|
||||
}
|
||||
|
||||
// popBackoff pops the pInfo from the podBackoffQ.
|
||||
// It returns error if the queue is empty.
|
||||
// This doesn't pop the pods from the podErrorBackoffQ.
|
||||
func (bq *backoffQueue) popBackoff() (*framework.QueuedPodInfo, error) {
|
||||
bq.lock.Lock()
|
||||
defer bq.lock.Unlock()
|
||||
|
||||
return bq.podBackoffQ.Pop()
|
||||
}
|
||||
|
||||
// get returns the pInfo matching given pInfoLookup, if exists.
|
||||
func (bq *backoffQueue) get(pInfoLookup *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool) {
|
||||
bq.lock.RLock()
|
||||
defer bq.lock.RUnlock()
|
||||
|
||||
pInfo, exists := bq.podBackoffQ.Get(pInfoLookup)
|
||||
if exists {
|
||||
return pInfo, true
|
||||
}
|
||||
return bq.podErrorBackoffQ.Get(pInfoLookup)
|
||||
}
|
||||
|
||||
// has inform if pInfo exists in the queue.
|
||||
func (bq *backoffQueue) has(pInfo *framework.QueuedPodInfo) bool {
|
||||
bq.lock.RLock()
|
||||
defer bq.lock.RUnlock()
|
||||
|
||||
return bq.podBackoffQ.Has(pInfo) || bq.podErrorBackoffQ.Has(pInfo)
|
||||
}
|
||||
|
||||
// list returns all pods that are in the queue.
|
||||
func (bq *backoffQueue) list() []*v1.Pod {
|
||||
bq.lock.RLock()
|
||||
defer bq.lock.RUnlock()
|
||||
|
||||
var result []*v1.Pod
|
||||
for _, pInfo := range bq.podBackoffQ.List() {
|
||||
result = append(result, pInfo.Pod)
|
||||
}
|
||||
for _, pInfo := range bq.podErrorBackoffQ.List() {
|
||||
result = append(result, pInfo.Pod)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// len returns length of the queue.
|
||||
func (bq *backoffQueue) len() int {
|
||||
bq.lock.RLock()
|
||||
defer bq.lock.RUnlock()
|
||||
|
||||
return bq.podBackoffQ.Len() + bq.podErrorBackoffQ.Len()
|
||||
}
|
||||
|
||||
// lenBackoff returns length of the podBackoffQ.
|
||||
func (bq *backoffQueue) lenBackoff() int {
|
||||
bq.lock.RLock()
|
||||
defer bq.lock.RUnlock()
|
||||
|
||||
return bq.podBackoffQ.Len()
|
||||
}
|
6
e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/nominator.go
generated
vendored
6
e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/nominator.go
generated
vendored
@ -35,10 +35,10 @@ import (
|
||||
type nominator struct {
|
||||
// nLock synchronizes all operations related to nominator.
|
||||
// It should not be used anywhere else.
|
||||
// Caution: DO NOT take ("SchedulingQueue.lock" or "activeQueue.lock") after taking "nLock".
|
||||
// You should always take "SchedulingQueue.lock" and "activeQueue.lock" first,
|
||||
// Caution: DO NOT take ("SchedulingQueue.lock" or "activeQueue.lock" or "backoffQueue.lock") after taking "nLock".
|
||||
// You should always take "SchedulingQueue.lock" and "activeQueue.lock" and "backoffQueue.lock" first,
|
||||
// otherwise the nominator could end up in deadlock.
|
||||
// Correct locking order is: SchedulingQueue.lock > activeQueue.lock > nLock.
|
||||
// Correct locking order is: SchedulingQueue.lock > activeQueue.lock = backoffQueue.lock > nLock.
|
||||
nLock sync.RWMutex
|
||||
|
||||
// podLister is used to verify if the given pod is alive.
|
||||
|
276
e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/scheduling_queue.go
generated
vendored
276
e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/scheduling_queue.go
generated
vendored
@ -132,6 +132,9 @@ type SchedulingQueue interface {
|
||||
PendingPods() ([]*v1.Pod, string)
|
||||
InFlightPods() []*v1.Pod
|
||||
PodsInActiveQ() []*v1.Pod
|
||||
// PodsInBackoffQ returns all the Pods in the backoffQ.
|
||||
PodsInBackoffQ() []*v1.Pod
|
||||
UnschedulablePods() []*v1.Pod
|
||||
}
|
||||
|
||||
// NewSchedulingQueue initializes a priority queue as a new scheduling queue.
|
||||
@ -155,24 +158,18 @@ type PriorityQueue struct {
|
||||
*nominator
|
||||
|
||||
stop chan struct{}
|
||||
clock clock.Clock
|
||||
clock clock.WithTicker
|
||||
|
||||
// lock takes precedence and should be taken first,
|
||||
// before any other locks in the queue (activeQueue.lock or nominator.nLock).
|
||||
// Correct locking order is: lock > activeQueue.lock > nominator.nLock.
|
||||
// before any other locks in the queue (activeQueue.lock or backoffQueue.lock or nominator.nLock).
|
||||
// Correct locking order is: lock > activeQueue.lock > backoffQueue.lock > nominator.nLock.
|
||||
lock sync.RWMutex
|
||||
|
||||
// pod initial backoff duration.
|
||||
podInitialBackoffDuration time.Duration
|
||||
// pod maximum backoff duration.
|
||||
podMaxBackoffDuration time.Duration
|
||||
// the maximum time a pod can stay in the unschedulablePods.
|
||||
podMaxInUnschedulablePodsDuration time.Duration
|
||||
|
||||
activeQ activeQueuer
|
||||
// podBackoffQ is a heap ordered by backoff expiry. Pods which have completed backoff
|
||||
// are popped from this heap before the scheduler looks at activeQ
|
||||
podBackoffQ *heap.Heap[*framework.QueuedPodInfo]
|
||||
activeQ activeQueuer
|
||||
backoffQ backoffQueuer
|
||||
// unschedulablePods holds pods that have been tried and determined unschedulable.
|
||||
unschedulablePods *UnschedulablePods
|
||||
// moveRequestCycle caches the sequence number of scheduling cycle when we
|
||||
@ -195,6 +192,8 @@ type PriorityQueue struct {
|
||||
|
||||
// isSchedulingQueueHintEnabled indicates whether the feature gate for the scheduling queue is enabled.
|
||||
isSchedulingQueueHintEnabled bool
|
||||
// isPopFromBackoffQEnabled indicates whether the feature gate SchedulerPopFromBackoffQ is enabled.
|
||||
isPopFromBackoffQEnabled bool
|
||||
}
|
||||
|
||||
// QueueingHintFunction is the wrapper of QueueingHintFn that has PluginName.
|
||||
@ -213,7 +212,7 @@ type clusterEvent struct {
|
||||
}
|
||||
|
||||
type priorityQueueOptions struct {
|
||||
clock clock.Clock
|
||||
clock clock.WithTicker
|
||||
podInitialBackoffDuration time.Duration
|
||||
podMaxBackoffDuration time.Duration
|
||||
podMaxInUnschedulablePodsDuration time.Duration
|
||||
@ -228,7 +227,7 @@ type priorityQueueOptions struct {
|
||||
type Option func(*priorityQueueOptions)
|
||||
|
||||
// WithClock sets clock for PriorityQueue, the default clock is clock.RealClock.
|
||||
func WithClock(clock clock.Clock) Option {
|
||||
func WithClock(clock clock.WithTicker) Option {
|
||||
return func(o *priorityQueueOptions) {
|
||||
o.clock = clock
|
||||
}
|
||||
@ -331,14 +330,14 @@ func NewPriorityQueue(
|
||||
}
|
||||
|
||||
isSchedulingQueueHintEnabled := utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints)
|
||||
isPopFromBackoffQEnabled := utilfeature.DefaultFeatureGate.Enabled(features.SchedulerPopFromBackoffQ)
|
||||
|
||||
backoffQ := newBackoffQueue(options.clock, options.podInitialBackoffDuration, options.podMaxBackoffDuration, lessFn, isPopFromBackoffQEnabled)
|
||||
pq := &PriorityQueue{
|
||||
clock: options.clock,
|
||||
stop: make(chan struct{}),
|
||||
podInitialBackoffDuration: options.podInitialBackoffDuration,
|
||||
podMaxBackoffDuration: options.podMaxBackoffDuration,
|
||||
podMaxInUnschedulablePodsDuration: options.podMaxInUnschedulablePodsDuration,
|
||||
activeQ: newActiveQueue(heap.NewWithRecorder(podInfoKeyFunc, heap.LessFunc[*framework.QueuedPodInfo](lessFn), metrics.NewActivePodsRecorder()), isSchedulingQueueHintEnabled, options.metricsRecorder),
|
||||
backoffQ: backoffQ,
|
||||
unschedulablePods: newUnschedulablePods(metrics.NewUnschedulablePodsRecorder(), metrics.NewGatedPodsRecorder()),
|
||||
preEnqueuePluginMap: options.preEnqueuePluginMap,
|
||||
queueingHintMap: options.queueingHintMap,
|
||||
@ -346,19 +345,24 @@ func NewPriorityQueue(
|
||||
pluginMetricsSamplePercent: options.pluginMetricsSamplePercent,
|
||||
moveRequestCycle: -1,
|
||||
isSchedulingQueueHintEnabled: isSchedulingQueueHintEnabled,
|
||||
isPopFromBackoffQEnabled: isPopFromBackoffQEnabled,
|
||||
}
|
||||
pq.podBackoffQ = heap.NewWithRecorder(podInfoKeyFunc, pq.podsCompareBackoffCompleted, metrics.NewBackoffPodsRecorder())
|
||||
var backoffQPopper backoffQPopper
|
||||
if isPopFromBackoffQEnabled {
|
||||
backoffQPopper = backoffQ
|
||||
}
|
||||
pq.activeQ = newActiveQueue(heap.NewWithRecorder(podInfoKeyFunc, heap.LessFunc[*framework.QueuedPodInfo](lessFn), metrics.NewActivePodsRecorder()), isSchedulingQueueHintEnabled, options.metricsRecorder, backoffQPopper)
|
||||
pq.nsLister = informerFactory.Core().V1().Namespaces().Lister()
|
||||
pq.nominator = newPodNominator(options.podLister)
|
||||
|
||||
return pq
|
||||
}
|
||||
|
||||
// Run starts the goroutine to pump from podBackoffQ to activeQ
|
||||
// Run starts the goroutine to pump from backoffQ to activeQ
|
||||
func (p *PriorityQueue) Run(logger klog.Logger) {
|
||||
go wait.Until(func() {
|
||||
go p.backoffQ.waitUntilAlignedWithOrderingWindow(func() {
|
||||
p.flushBackoffQCompleted(logger)
|
||||
}, 1.0*time.Second, p.stop)
|
||||
}, p.stop)
|
||||
go wait.Until(func() {
|
||||
p.flushUnschedulablePodsLeftover(logger)
|
||||
}, 30*time.Second, p.stop)
|
||||
@ -553,25 +557,33 @@ func (p *PriorityQueue) runPreEnqueuePlugin(ctx context.Context, pl framework.Pr
|
||||
return s
|
||||
}
|
||||
|
||||
// moveToActiveQ tries to add pod to active queue and remove it from unschedulable and backoff queues.
|
||||
// It returns 2 parameters:
|
||||
// 1. a boolean flag to indicate whether the pod is added successfully.
|
||||
// 2. an error for the caller to act on.
|
||||
// moveToActiveQ tries to add the pod to the active queue.
|
||||
// If the pod doesn't pass PreEnqueue plugins, it gets added to unschedulablePods instead.
|
||||
// It returns a boolean flag to indicate whether the pod is added successfully.
|
||||
func (p *PriorityQueue) moveToActiveQ(logger klog.Logger, pInfo *framework.QueuedPodInfo, event string) bool {
|
||||
gatedBefore := pInfo.Gated
|
||||
pInfo.Gated = !p.runPreEnqueuePlugins(context.Background(), pInfo)
|
||||
// If SchedulerPopFromBackoffQ feature gate is enabled,
|
||||
// PreEnqueue plugins were called when the pod was added to the backoffQ.
|
||||
// Don't need to repeat it here when the pod is directly moved from the backoffQ.
|
||||
if !p.isPopFromBackoffQEnabled || event != framework.BackoffComplete {
|
||||
pInfo.Gated = !p.runPreEnqueuePlugins(context.Background(), pInfo)
|
||||
}
|
||||
|
||||
added := false
|
||||
p.activeQ.underLock(func(unlockedActiveQ unlockedActiveQueuer) {
|
||||
if pInfo.Gated {
|
||||
// Add the Pod to unschedulablePods if it's not passing PreEnqueuePlugins.
|
||||
if unlockedActiveQ.Has(pInfo) {
|
||||
if unlockedActiveQ.has(pInfo) {
|
||||
return
|
||||
}
|
||||
if p.podBackoffQ.Has(pInfo) {
|
||||
if p.backoffQ.has(pInfo) {
|
||||
return
|
||||
}
|
||||
p.unschedulablePods.addOrUpdate(pInfo)
|
||||
if p.unschedulablePods.get(pInfo.Pod) != nil {
|
||||
return
|
||||
}
|
||||
p.unschedulablePods.addOrUpdate(pInfo, event)
|
||||
logger.V(5).Info("Pod moved to an internal scheduling queue, because the pod is gated", "pod", klog.KObj(pInfo.Pod), "event", event, "queue", unschedulablePods)
|
||||
return
|
||||
}
|
||||
if pInfo.InitialAttemptTimestamp == nil {
|
||||
@ -579,13 +591,12 @@ func (p *PriorityQueue) moveToActiveQ(logger klog.Logger, pInfo *framework.Queue
|
||||
pInfo.InitialAttemptTimestamp = &now
|
||||
}
|
||||
|
||||
unlockedActiveQ.AddOrUpdate(pInfo)
|
||||
unlockedActiveQ.add(pInfo, event)
|
||||
added = true
|
||||
|
||||
p.unschedulablePods.delete(pInfo.Pod, gatedBefore)
|
||||
_ = p.podBackoffQ.Delete(pInfo) // Don't need to react when pInfo is not found.
|
||||
p.backoffQ.delete(pInfo)
|
||||
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event, "queue", activeQ)
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("active", event).Inc()
|
||||
if event == framework.EventUnscheduledPodAdd.Label() || event == framework.EventUnscheduledPodUpdate.Label() {
|
||||
p.AddNominatedPod(logger, pInfo.PodInfo, nil)
|
||||
}
|
||||
@ -593,6 +604,28 @@ func (p *PriorityQueue) moveToActiveQ(logger klog.Logger, pInfo *framework.Queue
|
||||
return added
|
||||
}
|
||||
|
||||
// moveToBackoffQ tries to add the pod to the backoff queue.
|
||||
// If SchedulerPopFromBackoffQ feature gate is enabled and the pod doesn't pass PreEnqueue plugins, it gets added to unschedulablePods instead.
|
||||
// It returns a boolean flag to indicate whether the pod is added successfully.
|
||||
func (p *PriorityQueue) moveToBackoffQ(logger klog.Logger, pInfo *framework.QueuedPodInfo, event string) bool {
|
||||
// If SchedulerPopFromBackoffQ feature gate is enabled,
|
||||
// PreEnqueue plugins are called on inserting pods to the backoffQ,
|
||||
// not to call them again on popping out.
|
||||
if p.isPopFromBackoffQEnabled {
|
||||
pInfo.Gated = !p.runPreEnqueuePlugins(context.Background(), pInfo)
|
||||
if pInfo.Gated {
|
||||
if p.unschedulablePods.get(pInfo.Pod) == nil {
|
||||
p.unschedulablePods.addOrUpdate(pInfo, event)
|
||||
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event, "queue", unschedulablePods)
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
p.backoffQ.add(logger, pInfo, event)
|
||||
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event, "queue", backoffQ)
|
||||
return true
|
||||
}
|
||||
|
||||
// Add adds a pod to the active queue. It should be called only when a new pod
|
||||
// is added so there is no chance the pod is already in active/unschedulable/backoff queues
|
||||
func (p *PriorityQueue) Add(logger klog.Logger, pod *v1.Pod) {
|
||||
@ -641,10 +674,16 @@ func (p *PriorityQueue) activate(logger klog.Logger, pod *v1.Pod) bool {
|
||||
// If the pod doesn't belong to unschedulablePods or backoffQ, don't activate it.
|
||||
// The pod can be already in activeQ.
|
||||
var exists bool
|
||||
pInfo, exists = p.podBackoffQ.Get(newQueuedPodInfoForLookup(pod))
|
||||
pInfo, exists = p.backoffQ.get(newQueuedPodInfoForLookup(pod))
|
||||
if !exists {
|
||||
return false
|
||||
}
|
||||
// Delete pod from the backoffQ now to make sure it won't be popped from the backoffQ
|
||||
// just before moving it to the activeQ
|
||||
if deleted := p.backoffQ.delete(pInfo); !deleted {
|
||||
// Pod was popped from the backoffQ in the meantime. Don't activate it.
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if pInfo == nil {
|
||||
@ -656,13 +695,6 @@ func (p *PriorityQueue) activate(logger klog.Logger, pod *v1.Pod) bool {
|
||||
return p.moveToActiveQ(logger, pInfo, framework.ForceActivate)
|
||||
}
|
||||
|
||||
// isPodBackingoff returns true if a pod is still waiting for its backoff timer.
|
||||
// If this returns true, the pod should not be re-tried.
|
||||
func (p *PriorityQueue) isPodBackingoff(podInfo *framework.QueuedPodInfo) bool {
|
||||
boTime := p.getBackoffTime(podInfo)
|
||||
return boTime.After(p.clock.Now())
|
||||
}
|
||||
|
||||
// SchedulingCycle returns current scheduling cycle.
|
||||
func (p *PriorityQueue) SchedulingCycle() int64 {
|
||||
return p.activeQ.schedulingCycle()
|
||||
@ -712,7 +744,7 @@ func (p *PriorityQueue) determineSchedulingHintForInFlightPod(logger klog.Logger
|
||||
// addUnschedulableIfNotPresentWithoutQueueingHint inserts a pod that cannot be scheduled into
|
||||
// the queue, unless it is already in the queue. Normally, PriorityQueue puts
|
||||
// unschedulable pods in `unschedulablePods`. But if there has been a recent move
|
||||
// request, then the pod is put in `podBackoffQ`.
|
||||
// request, then the pod is put in `backoffQ`.
|
||||
// TODO: This function is called only when p.isSchedulingQueueHintEnabled is false,
|
||||
// and this will be removed after SchedulingQueueHint goes to stable and the feature gate is removed.
|
||||
func (p *PriorityQueue) addUnschedulableWithoutQueueingHint(logger klog.Logger, pInfo *framework.QueuedPodInfo, podSchedulingCycle int64) error {
|
||||
@ -736,13 +768,14 @@ func (p *PriorityQueue) addUnschedulableWithoutQueueingHint(logger klog.Logger,
|
||||
// - No unschedulable plugins are associated with this Pod,
|
||||
// meaning something unusual (a temporal failure on kube-apiserver, etc) happened and this Pod gets moved back to the queue.
|
||||
// In this case, we should retry scheduling it because this Pod may not be retried until the next flush.
|
||||
p.podBackoffQ.AddOrUpdate(pInfo)
|
||||
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", framework.ScheduleAttemptFailure, "queue", backoffQ)
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", framework.ScheduleAttemptFailure).Inc()
|
||||
if added := p.moveToBackoffQ(logger, pInfo, framework.ScheduleAttemptFailure); added {
|
||||
if p.isPopFromBackoffQEnabled {
|
||||
p.activeQ.broadcast()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
p.unschedulablePods.addOrUpdate(pInfo)
|
||||
p.unschedulablePods.addOrUpdate(pInfo, framework.ScheduleAttemptFailure)
|
||||
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", framework.ScheduleAttemptFailure, "queue", unschedulablePods)
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", framework.ScheduleAttemptFailure).Inc()
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -751,7 +784,7 @@ func (p *PriorityQueue) addUnschedulableWithoutQueueingHint(logger klog.Logger,
|
||||
// AddUnschedulableIfNotPresent inserts a pod that cannot be scheduled into
|
||||
// the queue, unless it is already in the queue. Normally, PriorityQueue puts
|
||||
// unschedulable pods in `unschedulablePods`. But if there has been a recent move
|
||||
// request, then the pod is put in `podBackoffQ`.
|
||||
// request, then the pod is put in `backoffQ`.
|
||||
func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *framework.QueuedPodInfo, podSchedulingCycle int64) error {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
@ -767,7 +800,7 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *
|
||||
if p.activeQ.has(pInfo) {
|
||||
return fmt.Errorf("Pod %v is already present in the active queue", klog.KObj(pod))
|
||||
}
|
||||
if p.podBackoffQ.Has(pInfo) {
|
||||
if p.backoffQ.has(pInfo) {
|
||||
return fmt.Errorf("Pod %v is already present in the backoff queue", klog.KObj(pod))
|
||||
}
|
||||
|
||||
@ -792,7 +825,7 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *
|
||||
// In this case, we try to requeue this Pod to activeQ/backoffQ.
|
||||
queue := p.requeuePodViaQueueingHint(logger, pInfo, schedulingHint, framework.ScheduleAttemptFailure)
|
||||
logger.V(3).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", framework.ScheduleAttemptFailure, "queue", queue, "schedulingCycle", podSchedulingCycle, "hint", schedulingHint, "unschedulable plugins", rejectorPlugins)
|
||||
if queue == activeQ {
|
||||
if queue == activeQ || (p.isPopFromBackoffQEnabled && queue == backoffQ) {
|
||||
// When the Pod is moved to activeQ, need to let p.cond know so that the Pod will be pop()ed out.
|
||||
p.activeQ.broadcast()
|
||||
}
|
||||
@ -805,25 +838,12 @@ func (p *PriorityQueue) flushBackoffQCompleted(logger klog.Logger) {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
activated := false
|
||||
for {
|
||||
pInfo, ok := p.podBackoffQ.Peek()
|
||||
if !ok || pInfo == nil {
|
||||
break
|
||||
}
|
||||
pod := pInfo.Pod
|
||||
if p.isPodBackingoff(pInfo) {
|
||||
break
|
||||
}
|
||||
_, err := p.podBackoffQ.Pop()
|
||||
if err != nil {
|
||||
logger.Error(err, "Unable to pop pod from backoff queue despite backoff completion", "pod", klog.KObj(pod))
|
||||
break
|
||||
}
|
||||
podsCompletedBackoff := p.backoffQ.popAllBackoffCompleted(logger)
|
||||
for _, pInfo := range podsCompletedBackoff {
|
||||
if added := p.moveToActiveQ(logger, pInfo, framework.BackoffComplete); added {
|
||||
activated = true
|
||||
}
|
||||
}
|
||||
|
||||
if activated {
|
||||
p.activeQ.broadcast()
|
||||
}
|
||||
@ -928,10 +948,8 @@ func (p *PriorityQueue) Update(logger klog.Logger, oldPod, newPod *v1.Pod) {
|
||||
}
|
||||
|
||||
// If the pod is in the backoff queue, update it there.
|
||||
if pInfo, exists := p.podBackoffQ.Get(oldPodInfo); exists {
|
||||
_ = pInfo.Update(newPod)
|
||||
if pInfo := p.backoffQ.update(newPod, oldPodInfo); pInfo != nil {
|
||||
p.UpdateNominatedPod(logger, oldPod, pInfo.PodInfo)
|
||||
p.podBackoffQ.AddOrUpdate(pInfo)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -953,7 +971,7 @@ func (p *PriorityQueue) Update(logger klog.Logger, oldPod, newPod *v1.Pod) {
|
||||
logger.V(5).Info("Pod moved to an internal scheduling queue because the Pod is updated", "pod", klog.KObj(newPod), "event", evt.Label(), "queue", queue)
|
||||
p.unschedulablePods.delete(pInfo.Pod, gated)
|
||||
}
|
||||
if queue == activeQ {
|
||||
if queue == activeQ || (p.isPopFromBackoffQEnabled && queue == backoffQ) {
|
||||
p.activeQ.broadcast()
|
||||
break
|
||||
}
|
||||
@ -961,21 +979,26 @@ func (p *PriorityQueue) Update(logger klog.Logger, oldPod, newPod *v1.Pod) {
|
||||
return
|
||||
}
|
||||
if isPodUpdated(oldPod, newPod) {
|
||||
if p.isPodBackingoff(pInfo) {
|
||||
p.podBackoffQ.AddOrUpdate(pInfo)
|
||||
p.unschedulablePods.delete(pInfo.Pod, gated)
|
||||
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", framework.EventUnscheduledPodUpdate.Label(), "queue", backoffQ)
|
||||
// Pod might have completed its backoff time while being in unschedulablePods,
|
||||
// so we should check isPodBackingoff before moving the pod to backoffQ.
|
||||
if p.backoffQ.isPodBackingoff(pInfo) {
|
||||
if added := p.moveToBackoffQ(logger, pInfo, framework.EventUnscheduledPodUpdate.Label()); added {
|
||||
p.unschedulablePods.delete(pInfo.Pod, gated)
|
||||
if p.isPopFromBackoffQEnabled {
|
||||
p.activeQ.broadcast()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if added := p.moveToActiveQ(logger, pInfo, framework.BackoffComplete); added {
|
||||
if added := p.moveToActiveQ(logger, pInfo, framework.EventUnscheduledPodUpdate.Label()); added {
|
||||
p.activeQ.broadcast()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Pod update didn't make it schedulable, keep it in the unschedulable queue.
|
||||
p.unschedulablePods.addOrUpdate(pInfo)
|
||||
p.unschedulablePods.addOrUpdate(pInfo, framework.EventUnscheduledPodUpdate.Label())
|
||||
return
|
||||
}
|
||||
// If pod is not in any of the queues, we put it in the active queue.
|
||||
@ -992,12 +1015,14 @@ func (p *PriorityQueue) Delete(pod *v1.Pod) {
|
||||
defer p.lock.Unlock()
|
||||
p.DeleteNominatedPodIfExists(pod)
|
||||
pInfo := newQueuedPodInfoForLookup(pod)
|
||||
if err := p.activeQ.delete(pInfo); err != nil {
|
||||
// The item was probably not found in the activeQ.
|
||||
p.podBackoffQ.Delete(pInfo)
|
||||
if pInfo = p.unschedulablePods.get(pod); pInfo != nil {
|
||||
p.unschedulablePods.delete(pod, pInfo.Gated)
|
||||
}
|
||||
if err := p.activeQ.delete(pInfo); err == nil {
|
||||
return
|
||||
}
|
||||
if deleted := p.backoffQ.delete(pInfo); deleted {
|
||||
return
|
||||
}
|
||||
if pInfo = p.unschedulablePods.get(pod); pInfo != nil {
|
||||
p.unschedulablePods.delete(pod, pInfo.Gated)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1065,28 +1090,24 @@ func (p *PriorityQueue) MoveAllToActiveOrBackoffQueue(logger klog.Logger, event
|
||||
// NOTE: this function assumes lock has been acquired in caller
|
||||
func (p *PriorityQueue) requeuePodViaQueueingHint(logger klog.Logger, pInfo *framework.QueuedPodInfo, strategy queueingStrategy, event string) string {
|
||||
if strategy == queueSkip {
|
||||
p.unschedulablePods.addOrUpdate(pInfo)
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", event).Inc()
|
||||
p.unschedulablePods.addOrUpdate(pInfo, event)
|
||||
return unschedulablePods
|
||||
}
|
||||
|
||||
if strategy == queueAfterBackoff && p.isPodBackingoff(pInfo) {
|
||||
p.podBackoffQ.AddOrUpdate(pInfo)
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", event).Inc()
|
||||
return backoffQ
|
||||
// Pod might have completed its backoff time while being in unschedulablePods,
|
||||
// so we should check isPodBackingoff before moving the pod to backoffQ.
|
||||
if strategy == queueAfterBackoff && p.backoffQ.isPodBackingoff(pInfo) {
|
||||
if added := p.moveToBackoffQ(logger, pInfo, event); added {
|
||||
return backoffQ
|
||||
}
|
||||
return unschedulablePods
|
||||
}
|
||||
|
||||
// Reach here if schedulingHint is QueueImmediately, or schedulingHint is Queue but the pod is not backing off.
|
||||
if added := p.moveToActiveQ(logger, pInfo, event); added {
|
||||
return activeQ
|
||||
}
|
||||
if pInfo.Gated {
|
||||
// In case the pod is gated, the Pod is pushed back to unschedulable Pods pool in moveToActiveQ.
|
||||
return unschedulablePods
|
||||
}
|
||||
|
||||
p.unschedulablePods.addOrUpdate(pInfo)
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", framework.ScheduleAttemptFailure).Inc()
|
||||
// Pod is gated. We don't have to push it back to unschedulable queue, because moveToActiveQ should already have done that.
|
||||
return unschedulablePods
|
||||
}
|
||||
|
||||
@ -1128,7 +1149,7 @@ func (p *PriorityQueue) movePodsToActiveOrBackoffQueue(logger klog.Logger, podIn
|
||||
p.unschedulablePods.delete(pInfo.Pod, pInfo.Gated)
|
||||
queue := p.requeuePodViaQueueingHint(logger, pInfo, schedulingHint, event.Label())
|
||||
logger.V(4).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event.Label(), "queue", queue, "hint", schedulingHint)
|
||||
if queue == activeQ {
|
||||
if queue == activeQ || (p.isPopFromBackoffQEnabled && queue == backoffQ) {
|
||||
activated = true
|
||||
}
|
||||
}
|
||||
@ -1180,6 +1201,20 @@ func (p *PriorityQueue) PodsInActiveQ() []*v1.Pod {
|
||||
return p.activeQ.list()
|
||||
}
|
||||
|
||||
// PodsInBackoffQ returns all the Pods in the backoffQ.
|
||||
func (p *PriorityQueue) PodsInBackoffQ() []*v1.Pod {
|
||||
return p.backoffQ.list()
|
||||
}
|
||||
|
||||
// UnschedulablePods returns all the pods in unschedulable state.
|
||||
func (p *PriorityQueue) UnschedulablePods() []*v1.Pod {
|
||||
var result []*v1.Pod
|
||||
for _, pInfo := range p.unschedulablePods.podInfoMap {
|
||||
result = append(result, pInfo.Pod)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
var pendingPodsSummary = "activeQ:%v; backoffQ:%v; unschedulablePods:%v"
|
||||
|
||||
// GetPod searches for a pod in the activeQ, backoffQ, and unschedulablePods.
|
||||
@ -1197,7 +1232,7 @@ func (p *PriorityQueue) GetPod(name, namespace string) (pInfo *framework.QueuedP
|
||||
},
|
||||
},
|
||||
}
|
||||
if pInfo, ok = p.podBackoffQ.Get(pInfoLookup); ok {
|
||||
if pInfo, ok = p.backoffQ.get(pInfoLookup); ok {
|
||||
return pInfo, true
|
||||
}
|
||||
if pInfo = p.unschedulablePods.get(pInfoLookup.Pod); pInfo != nil {
|
||||
@ -1205,7 +1240,7 @@ func (p *PriorityQueue) GetPod(name, namespace string) (pInfo *framework.QueuedP
|
||||
}
|
||||
|
||||
p.activeQ.underRLock(func(unlockedActiveQ unlockedActiveQueueReader) {
|
||||
pInfo, ok = unlockedActiveQ.Get(pInfoLookup)
|
||||
pInfo, ok = unlockedActiveQ.get(pInfoLookup)
|
||||
})
|
||||
return
|
||||
}
|
||||
@ -1216,15 +1251,15 @@ func (p *PriorityQueue) GetPod(name, namespace string) (pInfo *framework.QueuedP
|
||||
func (p *PriorityQueue) PendingPods() ([]*v1.Pod, string) {
|
||||
p.lock.RLock()
|
||||
defer p.lock.RUnlock()
|
||||
result := p.activeQ.list()
|
||||
result := p.PodsInActiveQ()
|
||||
activeQLen := len(result)
|
||||
for _, pInfo := range p.podBackoffQ.List() {
|
||||
result = append(result, pInfo.Pod)
|
||||
}
|
||||
backoffQPods := p.PodsInBackoffQ()
|
||||
backoffQLen := len(backoffQPods)
|
||||
result = append(result, backoffQPods...)
|
||||
for _, pInfo := range p.unschedulablePods.podInfoMap {
|
||||
result = append(result, pInfo.Pod)
|
||||
}
|
||||
return result, fmt.Sprintf(pendingPodsSummary, activeQLen, p.podBackoffQ.Len(), len(p.unschedulablePods.podInfoMap))
|
||||
return result, fmt.Sprintf(pendingPodsSummary, activeQLen, backoffQLen, len(p.unschedulablePods.podInfoMap))
|
||||
}
|
||||
|
||||
// Note: this function assumes the caller locks both p.lock.RLock and p.activeQ.getLock().RLock.
|
||||
@ -1232,7 +1267,7 @@ func (p *PriorityQueue) nominatedPodToInfo(np podRef, unlockedActiveQ unlockedAc
|
||||
pod := np.toPod()
|
||||
pInfoLookup := newQueuedPodInfoForLookup(pod)
|
||||
|
||||
queuedPodInfo, exists := unlockedActiveQ.Get(pInfoLookup)
|
||||
queuedPodInfo, exists := unlockedActiveQ.get(pInfoLookup)
|
||||
if exists {
|
||||
return queuedPodInfo.PodInfo
|
||||
}
|
||||
@ -1242,7 +1277,7 @@ func (p *PriorityQueue) nominatedPodToInfo(np podRef, unlockedActiveQ unlockedAc
|
||||
return queuedPodInfo.PodInfo
|
||||
}
|
||||
|
||||
queuedPodInfo, exists = p.podBackoffQ.Get(pInfoLookup)
|
||||
queuedPodInfo, exists = p.backoffQ.get(pInfoLookup)
|
||||
if exists {
|
||||
return queuedPodInfo.PodInfo
|
||||
}
|
||||
@ -1276,12 +1311,6 @@ func (p *PriorityQueue) NominatedPodsForNode(nodeName string) []*framework.PodIn
|
||||
return pods
|
||||
}
|
||||
|
||||
func (p *PriorityQueue) podsCompareBackoffCompleted(pInfo1, pInfo2 *framework.QueuedPodInfo) bool {
|
||||
bo1 := p.getBackoffTime(pInfo1)
|
||||
bo2 := p.getBackoffTime(pInfo2)
|
||||
return bo1.Before(bo2)
|
||||
}
|
||||
|
||||
// newQueuedPodInfo builds a QueuedPodInfo object.
|
||||
func (p *PriorityQueue) newQueuedPodInfo(pod *v1.Pod, plugins ...string) *framework.QueuedPodInfo {
|
||||
now := p.clock.Now()
|
||||
@ -1296,33 +1325,6 @@ func (p *PriorityQueue) newQueuedPodInfo(pod *v1.Pod, plugins ...string) *framew
|
||||
}
|
||||
}
|
||||
|
||||
// getBackoffTime returns the time that podInfo completes backoff
|
||||
func (p *PriorityQueue) getBackoffTime(podInfo *framework.QueuedPodInfo) time.Time {
|
||||
duration := p.calculateBackoffDuration(podInfo)
|
||||
backoffTime := podInfo.Timestamp.Add(duration)
|
||||
return backoffTime
|
||||
}
|
||||
|
||||
// calculateBackoffDuration is a helper function for calculating the backoffDuration
|
||||
// based on the number of attempts the pod has made.
|
||||
func (p *PriorityQueue) calculateBackoffDuration(podInfo *framework.QueuedPodInfo) time.Duration {
|
||||
if podInfo.Attempts == 0 {
|
||||
// When the Pod hasn't experienced any scheduling attempts,
|
||||
// they aren't obliged to get a backoff penalty at all.
|
||||
return 0
|
||||
}
|
||||
|
||||
duration := p.podInitialBackoffDuration
|
||||
for i := 1; i < podInfo.Attempts; i++ {
|
||||
// Use subtraction instead of addition or multiplication to avoid overflow.
|
||||
if duration > p.podMaxBackoffDuration-duration {
|
||||
return p.podMaxBackoffDuration
|
||||
}
|
||||
duration += duration
|
||||
}
|
||||
return duration
|
||||
}
|
||||
|
||||
// UnschedulablePods holds pods that cannot be scheduled. This data structure
|
||||
// is used to implement unschedulablePods.
|
||||
type UnschedulablePods struct {
|
||||
@ -1335,7 +1337,8 @@ type UnschedulablePods struct {
|
||||
}
|
||||
|
||||
// addOrUpdate adds a pod to the unschedulable podInfoMap.
|
||||
func (u *UnschedulablePods) addOrUpdate(pInfo *framework.QueuedPodInfo) {
|
||||
// The event should show which event triggered the addition and is used for the metric recording.
|
||||
func (u *UnschedulablePods) addOrUpdate(pInfo *framework.QueuedPodInfo, event string) {
|
||||
podID := u.keyFunc(pInfo.Pod)
|
||||
if _, exists := u.podInfoMap[podID]; !exists {
|
||||
if pInfo.Gated && u.gatedRecorder != nil {
|
||||
@ -1343,6 +1346,7 @@ func (u *UnschedulablePods) addOrUpdate(pInfo *framework.QueuedPodInfo) {
|
||||
} else if !pInfo.Gated && u.unschedulableRecorder != nil {
|
||||
u.unschedulableRecorder.Inc()
|
||||
}
|
||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", event).Inc()
|
||||
}
|
||||
u.podInfoMap[podID] = pInfo
|
||||
}
|
||||
|
Reference in New Issue
Block a user