mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-14 18:53:35 +00:00
rebase: update K8s packages to v0.32.1
Update K8s packages in go.mod to v0.32.1 Signed-off-by: Praveen M <m.praveen@ibm.com>
This commit is contained in:
415
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/active_queue.go
generated
vendored
Normal file
415
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/active_queue.go
generated
vendored
Normal file
@ -0,0 +1,415 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/backend/heap"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
)
|
||||
|
||||
// activeQueuer is a wrapper for activeQ related operations.
|
||||
// Its methods, except "unlocked" ones, take the lock inside.
|
||||
// Note: be careful when using unlocked() methods.
|
||||
// getLock() methods should be used only for unlocked() methods
|
||||
// and it is forbidden to call any other activeQueuer's method under this lock.
|
||||
type activeQueuer interface {
|
||||
underLock(func(unlockedActiveQ unlockedActiveQueuer))
|
||||
underRLock(func(unlockedActiveQ unlockedActiveQueueReader))
|
||||
|
||||
update(newPod *v1.Pod, oldPodInfo *framework.QueuedPodInfo) *framework.QueuedPodInfo
|
||||
delete(pInfo *framework.QueuedPodInfo) error
|
||||
pop(logger klog.Logger) (*framework.QueuedPodInfo, error)
|
||||
list() []*v1.Pod
|
||||
len() int
|
||||
has(pInfo *framework.QueuedPodInfo) bool
|
||||
|
||||
listInFlightEvents() []interface{}
|
||||
listInFlightPods() []*v1.Pod
|
||||
clusterEventsForPod(logger klog.Logger, pInfo *framework.QueuedPodInfo) ([]*clusterEvent, error)
|
||||
addEventsIfPodInFlight(oldPod, newPod *v1.Pod, events []framework.ClusterEvent) bool
|
||||
addEventIfAnyInFlight(oldObj, newObj interface{}, event framework.ClusterEvent) bool
|
||||
|
||||
schedulingCycle() int64
|
||||
done(pod types.UID)
|
||||
close()
|
||||
broadcast()
|
||||
}
|
||||
|
||||
// unlockedActiveQueuer defines activeQ methods that are not protected by the lock itself.
|
||||
// underLock() method should be used to protect these methods.
|
||||
type unlockedActiveQueuer interface {
|
||||
unlockedActiveQueueReader
|
||||
AddOrUpdate(pInfo *framework.QueuedPodInfo)
|
||||
}
|
||||
|
||||
// unlockedActiveQueueReader defines activeQ read-only methods that are not protected by the lock itself.
|
||||
// underLock() or underRLock() method should be used to protect these methods.
|
||||
type unlockedActiveQueueReader interface {
|
||||
Get(pInfo *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool)
|
||||
Has(pInfo *framework.QueuedPodInfo) bool
|
||||
}
|
||||
|
||||
// activeQueue implements activeQueuer. All of the fields have to be protected using the lock.
|
||||
type activeQueue struct {
|
||||
// lock synchronizes all operations related to activeQ.
|
||||
// It protects activeQ, inFlightPods, inFlightEvents, schedulingCycle and closed fields.
|
||||
// Caution: DO NOT take "SchedulingQueue.lock" after taking "lock".
|
||||
// You should always take "SchedulingQueue.lock" first, otherwise the queue could end up in deadlock.
|
||||
// "lock" should not be taken after taking "nLock".
|
||||
// Correct locking order is: SchedulingQueue.lock > lock > nominator.nLock.
|
||||
lock sync.RWMutex
|
||||
|
||||
// activeQ is heap structure that scheduler actively looks at to find pods to
|
||||
// schedule. Head of heap is the highest priority pod.
|
||||
queue *heap.Heap[*framework.QueuedPodInfo]
|
||||
|
||||
// cond is a condition that is notified when the pod is added to activeQ.
|
||||
// It is used with lock.
|
||||
cond sync.Cond
|
||||
|
||||
// inFlightPods holds the UID of all pods which have been popped out for which Done
|
||||
// hasn't been called yet - in other words, all pods that are currently being
|
||||
// processed (being scheduled, in permit, or in the binding cycle).
|
||||
//
|
||||
// The values in the map are the entry of each pod in the inFlightEvents list.
|
||||
// The value of that entry is the *v1.Pod at the time that scheduling of that
|
||||
// pod started, which can be useful for logging or debugging.
|
||||
inFlightPods map[types.UID]*list.Element
|
||||
|
||||
// inFlightEvents holds the events received by the scheduling queue
|
||||
// (entry value is clusterEvent) together with in-flight pods (entry
|
||||
// value is *v1.Pod). Entries get added at the end while the mutex is
|
||||
// locked, so they get serialized.
|
||||
//
|
||||
// The pod entries are added in Pop and used to track which events
|
||||
// occurred after the pod scheduling attempt for that pod started.
|
||||
// They get removed when the scheduling attempt is done, at which
|
||||
// point all events that occurred in the meantime are processed.
|
||||
//
|
||||
// After removal of a pod, events at the start of the list are no
|
||||
// longer needed because all of the other in-flight pods started
|
||||
// later. Those events can be removed.
|
||||
inFlightEvents *list.List
|
||||
|
||||
// schedCycle represents sequence number of scheduling cycle and is incremented
|
||||
// when a pod is popped.
|
||||
schedCycle int64
|
||||
|
||||
// closed indicates that the queue is closed.
|
||||
// It is mainly used to let Pop() exit its control loop while waiting for an item.
|
||||
closed bool
|
||||
|
||||
// isSchedulingQueueHintEnabled indicates whether the feature gate for the scheduling queue is enabled.
|
||||
isSchedulingQueueHintEnabled bool
|
||||
|
||||
metricsRecorder metrics.MetricAsyncRecorder
|
||||
}
|
||||
|
||||
func newActiveQueue(queue *heap.Heap[*framework.QueuedPodInfo], isSchedulingQueueHintEnabled bool, metricRecorder metrics.MetricAsyncRecorder) *activeQueue {
|
||||
aq := &activeQueue{
|
||||
queue: queue,
|
||||
inFlightPods: make(map[types.UID]*list.Element),
|
||||
inFlightEvents: list.New(),
|
||||
isSchedulingQueueHintEnabled: isSchedulingQueueHintEnabled,
|
||||
metricsRecorder: metricRecorder,
|
||||
}
|
||||
aq.cond.L = &aq.lock
|
||||
|
||||
return aq
|
||||
}
|
||||
|
||||
// underLock runs the fn function under the lock.Lock.
|
||||
// fn can run unlockedActiveQueuer methods but should NOT run any other activeQueue method,
|
||||
// as it would end up in deadlock.
|
||||
func (aq *activeQueue) underLock(fn func(unlockedActiveQ unlockedActiveQueuer)) {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
fn(aq.queue)
|
||||
}
|
||||
|
||||
// underLock runs the fn function under the lock.RLock.
|
||||
// fn can run unlockedActiveQueueReader methods but should NOT run any other activeQueue method,
|
||||
// as it would end up in deadlock.
|
||||
func (aq *activeQueue) underRLock(fn func(unlockedActiveQ unlockedActiveQueueReader)) {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
fn(aq.queue)
|
||||
}
|
||||
|
||||
// update updates the pod in activeQ if oldPodInfo is already in the queue.
|
||||
// It returns new pod info if updated, nil otherwise.
|
||||
func (aq *activeQueue) update(newPod *v1.Pod, oldPodInfo *framework.QueuedPodInfo) *framework.QueuedPodInfo {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
if pInfo, exists := aq.queue.Get(oldPodInfo); exists {
|
||||
_ = pInfo.Update(newPod)
|
||||
aq.queue.AddOrUpdate(pInfo)
|
||||
return pInfo
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// delete deletes the pod info from activeQ.
|
||||
func (aq *activeQueue) delete(pInfo *framework.QueuedPodInfo) error {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
return aq.queue.Delete(pInfo)
|
||||
}
|
||||
|
||||
// pop removes the head of the queue and returns it.
|
||||
// It blocks if the queue is empty and waits until a new item is added to the queue.
|
||||
// It increments scheduling cycle when a pod is popped.
|
||||
func (aq *activeQueue) pop(logger klog.Logger) (*framework.QueuedPodInfo, error) {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
return aq.unlockedPop(logger)
|
||||
}
|
||||
|
||||
func (aq *activeQueue) unlockedPop(logger klog.Logger) (*framework.QueuedPodInfo, error) {
|
||||
for aq.queue.Len() == 0 {
|
||||
// When the queue is empty, invocation of Pop() is blocked until new item is enqueued.
|
||||
// When Close() is called, the p.closed is set and the condition is broadcast,
|
||||
// which causes this loop to continue and return from the Pop().
|
||||
if aq.closed {
|
||||
logger.V(2).Info("Scheduling queue is closed")
|
||||
return nil, nil
|
||||
}
|
||||
aq.cond.Wait()
|
||||
}
|
||||
pInfo, err := aq.queue.Pop()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pInfo.Attempts++
|
||||
// In flight, no concurrent events yet.
|
||||
if aq.isSchedulingQueueHintEnabled {
|
||||
// If the pod is already in the map, we shouldn't overwrite the inFlightPods otherwise it'd lead to a memory leak.
|
||||
// https://github.com/kubernetes/kubernetes/pull/127016
|
||||
if _, ok := aq.inFlightPods[pInfo.Pod.UID]; ok {
|
||||
// Just report it as an error, but no need to stop the scheduler
|
||||
// because it likely doesn't cause any visible issues from the scheduling perspective.
|
||||
logger.Error(nil, "the same pod is tracked in multiple places in the scheduler, and just discard it", "pod", klog.KObj(pInfo.Pod))
|
||||
// Just ignore/discard this duplicated pod and try to pop the next one.
|
||||
return aq.unlockedPop(logger)
|
||||
}
|
||||
|
||||
aq.metricsRecorder.ObserveInFlightEventsAsync(metrics.PodPoppedInFlightEvent, 1, false)
|
||||
aq.inFlightPods[pInfo.Pod.UID] = aq.inFlightEvents.PushBack(pInfo.Pod)
|
||||
}
|
||||
aq.schedCycle++
|
||||
|
||||
// Update metrics and reset the set of unschedulable plugins for the next attempt.
|
||||
for plugin := range pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins) {
|
||||
metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Dec()
|
||||
}
|
||||
pInfo.UnschedulablePlugins.Clear()
|
||||
pInfo.PendingPlugins.Clear()
|
||||
|
||||
return pInfo, nil
|
||||
}
|
||||
|
||||
// list returns all pods that are in the queue.
|
||||
func (aq *activeQueue) list() []*v1.Pod {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
var result []*v1.Pod
|
||||
for _, pInfo := range aq.queue.List() {
|
||||
result = append(result, pInfo.Pod)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// len returns length of the queue.
|
||||
func (aq *activeQueue) len() int {
|
||||
return aq.queue.Len()
|
||||
}
|
||||
|
||||
// has inform if pInfo exists in the queue.
|
||||
func (aq *activeQueue) has(pInfo *framework.QueuedPodInfo) bool {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
return aq.queue.Has(pInfo)
|
||||
}
|
||||
|
||||
// listInFlightEvents returns all inFlightEvents.
|
||||
func (aq *activeQueue) listInFlightEvents() []interface{} {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
var values []interface{}
|
||||
for event := aq.inFlightEvents.Front(); event != nil; event = event.Next() {
|
||||
values = append(values, event.Value)
|
||||
}
|
||||
return values
|
||||
}
|
||||
|
||||
// listInFlightPods returns all inFlightPods.
|
||||
func (aq *activeQueue) listInFlightPods() []*v1.Pod {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
var pods []*v1.Pod
|
||||
for _, obj := range aq.inFlightPods {
|
||||
pods = append(pods, obj.Value.(*v1.Pod))
|
||||
}
|
||||
return pods
|
||||
}
|
||||
|
||||
// clusterEventsForPod gets all cluster events that have happened during pod for pInfo is being scheduled.
|
||||
func (aq *activeQueue) clusterEventsForPod(logger klog.Logger, pInfo *framework.QueuedPodInfo) ([]*clusterEvent, error) {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
logger.V(5).Info("Checking events for in-flight pod", "pod", klog.KObj(pInfo.Pod), "unschedulablePlugins", pInfo.UnschedulablePlugins, "inFlightEventsSize", aq.inFlightEvents.Len(), "inFlightPodsSize", len(aq.inFlightPods))
|
||||
|
||||
// AddUnschedulableIfNotPresent is called with the Pod at the end of scheduling or binding.
|
||||
// So, given pInfo should have been Pop()ed before,
|
||||
// we can assume pInfo must be recorded in inFlightPods and thus inFlightEvents.
|
||||
inFlightPod, ok := aq.inFlightPods[pInfo.Pod.UID]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("in flight Pod isn't found in the scheduling queue. If you see this error log, it's likely a bug in the scheduler")
|
||||
}
|
||||
|
||||
var events []*clusterEvent
|
||||
for event := inFlightPod.Next(); event != nil; event = event.Next() {
|
||||
e, ok := event.Value.(*clusterEvent)
|
||||
if !ok {
|
||||
// Must be another in-flight Pod (*v1.Pod). Can be ignored.
|
||||
continue
|
||||
}
|
||||
events = append(events, e)
|
||||
}
|
||||
return events, nil
|
||||
}
|
||||
|
||||
// addEventsIfPodInFlight adds clusterEvent to inFlightEvents if the newPod is in inFlightPods.
|
||||
// It returns true if pushed the event to the inFlightEvents.
|
||||
func (aq *activeQueue) addEventsIfPodInFlight(oldPod, newPod *v1.Pod, events []framework.ClusterEvent) bool {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
_, ok := aq.inFlightPods[newPod.UID]
|
||||
if ok {
|
||||
for _, event := range events {
|
||||
aq.metricsRecorder.ObserveInFlightEventsAsync(event.Label(), 1, false)
|
||||
aq.inFlightEvents.PushBack(&clusterEvent{
|
||||
event: event,
|
||||
oldObj: oldPod,
|
||||
newObj: newPod,
|
||||
})
|
||||
}
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
// addEventIfAnyInFlight adds clusterEvent to inFlightEvents if any pod is in inFlightPods.
|
||||
// It returns true if pushed the event to the inFlightEvents.
|
||||
func (aq *activeQueue) addEventIfAnyInFlight(oldObj, newObj interface{}, event framework.ClusterEvent) bool {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
if len(aq.inFlightPods) != 0 {
|
||||
aq.metricsRecorder.ObserveInFlightEventsAsync(event.Label(), 1, false)
|
||||
aq.inFlightEvents.PushBack(&clusterEvent{
|
||||
event: event,
|
||||
oldObj: oldObj,
|
||||
newObj: newObj,
|
||||
})
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (aq *activeQueue) schedulingCycle() int64 {
|
||||
aq.lock.RLock()
|
||||
defer aq.lock.RUnlock()
|
||||
return aq.schedCycle
|
||||
}
|
||||
|
||||
// done must be called for pod returned by Pop. This allows the queue to
|
||||
// keep track of which pods are currently being processed.
|
||||
func (aq *activeQueue) done(pod types.UID) {
|
||||
aq.lock.Lock()
|
||||
defer aq.lock.Unlock()
|
||||
|
||||
inFlightPod, ok := aq.inFlightPods[pod]
|
||||
if !ok {
|
||||
// This Pod is already done()ed.
|
||||
return
|
||||
}
|
||||
delete(aq.inFlightPods, pod)
|
||||
|
||||
// Remove the pod from the list.
|
||||
aq.inFlightEvents.Remove(inFlightPod)
|
||||
|
||||
aggrMetricsCounter := map[string]int{}
|
||||
// Remove events which are only referred to by this Pod
|
||||
// so that the inFlightEvents list doesn't grow infinitely.
|
||||
// If the pod was at the head of the list, then all
|
||||
// events between it and the next pod are no longer needed
|
||||
// and can be removed.
|
||||
for {
|
||||
e := aq.inFlightEvents.Front()
|
||||
if e == nil {
|
||||
// Empty list.
|
||||
break
|
||||
}
|
||||
ev, ok := e.Value.(*clusterEvent)
|
||||
if !ok {
|
||||
// A pod, must stop pruning.
|
||||
break
|
||||
}
|
||||
aq.inFlightEvents.Remove(e)
|
||||
aggrMetricsCounter[ev.event.Label()]--
|
||||
}
|
||||
|
||||
for evLabel, count := range aggrMetricsCounter {
|
||||
aq.metricsRecorder.ObserveInFlightEventsAsync(evLabel, float64(count), false)
|
||||
}
|
||||
|
||||
aq.metricsRecorder.ObserveInFlightEventsAsync(metrics.PodPoppedInFlightEvent, -1,
|
||||
// If it's the last Pod in inFlightPods, we should force-flush the metrics.
|
||||
// Otherwise, especially in small clusters, which don't get a new Pod frequently,
|
||||
// the metrics might not be flushed for a long time.
|
||||
len(aq.inFlightPods) == 0)
|
||||
}
|
||||
|
||||
// close closes the activeQueue.
|
||||
func (aq *activeQueue) close() {
|
||||
// We should call done() for all in-flight pods to clean up the inFlightEvents metrics.
|
||||
// It's safe even if the binding cycle running asynchronously calls done() afterwards
|
||||
// done() will just be a no-op.
|
||||
for pod := range aq.inFlightPods {
|
||||
aq.done(pod)
|
||||
}
|
||||
aq.lock.Lock()
|
||||
aq.closed = true
|
||||
aq.lock.Unlock()
|
||||
}
|
||||
|
||||
// broadcast notifies the pop() operation that new pod(s) was added to the activeQueue.
|
||||
func (aq *activeQueue) broadcast() {
|
||||
aq.cond.Broadcast()
|
||||
}
|
195
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/nominator.go
generated
vendored
Normal file
195
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/nominator.go
generated
vendored
Normal file
@ -0,0 +1,195 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
listersv1 "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// nominator is a structure that stores pods nominated to run on nodes.
|
||||
// It exists because nominatedNodeName of pod objects stored in the structure
|
||||
// may be different than what scheduler has here. We should be able to find pods
|
||||
// by their UID and update/delete them.
|
||||
type nominator struct {
|
||||
// nLock synchronizes all operations related to nominator.
|
||||
// It should not be used anywhere else.
|
||||
// Caution: DO NOT take ("SchedulingQueue.lock" or "activeQueue.lock") after taking "nLock".
|
||||
// You should always take "SchedulingQueue.lock" and "activeQueue.lock" first,
|
||||
// otherwise the nominator could end up in deadlock.
|
||||
// Correct locking order is: SchedulingQueue.lock > activeQueue.lock > nLock.
|
||||
nLock sync.RWMutex
|
||||
|
||||
// podLister is used to verify if the given pod is alive.
|
||||
podLister listersv1.PodLister
|
||||
// nominatedPods is a map keyed by a node name and the value is a list of
|
||||
// pods which are nominated to run on the node. These are pods which can be in
|
||||
// the activeQ or unschedulablePods.
|
||||
nominatedPods map[string][]podRef
|
||||
// nominatedPodToNode is map keyed by a Pod UID to the node name where it is
|
||||
// nominated.
|
||||
nominatedPodToNode map[types.UID]string
|
||||
}
|
||||
|
||||
func newPodNominator(podLister listersv1.PodLister) *nominator {
|
||||
return &nominator{
|
||||
podLister: podLister,
|
||||
nominatedPods: make(map[string][]podRef),
|
||||
nominatedPodToNode: make(map[types.UID]string),
|
||||
}
|
||||
}
|
||||
|
||||
// AddNominatedPod adds a pod to the nominated pods of the given node.
|
||||
// This is called during the preemption process after a node is nominated to run
|
||||
// the pod. We update the structure before sending a request to update the pod
|
||||
// object to avoid races with the following scheduling cycles.
|
||||
func (npm *nominator) AddNominatedPod(logger klog.Logger, pi *framework.PodInfo, nominatingInfo *framework.NominatingInfo) {
|
||||
npm.nLock.Lock()
|
||||
npm.addNominatedPodUnlocked(logger, pi, nominatingInfo)
|
||||
npm.nLock.Unlock()
|
||||
}
|
||||
|
||||
func (npm *nominator) addNominatedPodUnlocked(logger klog.Logger, pi *framework.PodInfo, nominatingInfo *framework.NominatingInfo) {
|
||||
// Always delete the pod if it already exists, to ensure we never store more than
|
||||
// one instance of the pod.
|
||||
npm.deleteUnlocked(pi.Pod)
|
||||
|
||||
var nodeName string
|
||||
if nominatingInfo.Mode() == framework.ModeOverride {
|
||||
nodeName = nominatingInfo.NominatedNodeName
|
||||
} else if nominatingInfo.Mode() == framework.ModeNoop {
|
||||
if pi.Pod.Status.NominatedNodeName == "" {
|
||||
return
|
||||
}
|
||||
nodeName = pi.Pod.Status.NominatedNodeName
|
||||
}
|
||||
|
||||
if npm.podLister != nil {
|
||||
// If the pod was removed or if it was already scheduled, don't nominate it.
|
||||
updatedPod, err := npm.podLister.Pods(pi.Pod.Namespace).Get(pi.Pod.Name)
|
||||
if err != nil {
|
||||
logger.V(4).Info("Pod doesn't exist in podLister, aborted adding it to the nominator", "pod", klog.KObj(pi.Pod))
|
||||
return
|
||||
}
|
||||
if updatedPod.Spec.NodeName != "" {
|
||||
logger.V(4).Info("Pod is already scheduled to a node, aborted adding it to the nominator", "pod", klog.KObj(pi.Pod), "node", updatedPod.Spec.NodeName)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
npm.nominatedPodToNode[pi.Pod.UID] = nodeName
|
||||
for _, np := range npm.nominatedPods[nodeName] {
|
||||
if np.uid == pi.Pod.UID {
|
||||
logger.V(4).Info("Pod already exists in the nominator", "pod", np.uid)
|
||||
return
|
||||
}
|
||||
}
|
||||
npm.nominatedPods[nodeName] = append(npm.nominatedPods[nodeName], podToRef(pi.Pod))
|
||||
}
|
||||
|
||||
// UpdateNominatedPod updates the <oldPod> with <newPod>.
|
||||
func (npm *nominator) UpdateNominatedPod(logger klog.Logger, oldPod *v1.Pod, newPodInfo *framework.PodInfo) {
|
||||
npm.nLock.Lock()
|
||||
defer npm.nLock.Unlock()
|
||||
// In some cases, an Update event with no "NominatedNode" present is received right
|
||||
// after a node("NominatedNode") is reserved for this pod in memory.
|
||||
// In this case, we need to keep reserving the NominatedNode when updating the pod pointer.
|
||||
var nominatingInfo *framework.NominatingInfo
|
||||
// We won't fall into below `if` block if the Update event represents:
|
||||
// (1) NominatedNode info is added
|
||||
// (2) NominatedNode info is updated
|
||||
// (3) NominatedNode info is removed
|
||||
if nominatedNodeName(oldPod) == "" && nominatedNodeName(newPodInfo.Pod) == "" {
|
||||
if nnn, ok := npm.nominatedPodToNode[oldPod.UID]; ok {
|
||||
// This is the only case we should continue reserving the NominatedNode
|
||||
nominatingInfo = &framework.NominatingInfo{
|
||||
NominatingMode: framework.ModeOverride,
|
||||
NominatedNodeName: nnn,
|
||||
}
|
||||
}
|
||||
}
|
||||
// We update irrespective of the nominatedNodeName changed or not, to ensure
|
||||
// that pod pointer is updated.
|
||||
npm.deleteUnlocked(oldPod)
|
||||
npm.addNominatedPodUnlocked(logger, newPodInfo, nominatingInfo)
|
||||
}
|
||||
|
||||
// DeleteNominatedPodIfExists deletes <pod> from nominatedPods.
|
||||
func (npm *nominator) DeleteNominatedPodIfExists(pod *v1.Pod) {
|
||||
npm.nLock.Lock()
|
||||
npm.deleteUnlocked(pod)
|
||||
npm.nLock.Unlock()
|
||||
}
|
||||
|
||||
func (npm *nominator) deleteUnlocked(p *v1.Pod) {
|
||||
nnn, ok := npm.nominatedPodToNode[p.UID]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
for i, np := range npm.nominatedPods[nnn] {
|
||||
if np.uid == p.UID {
|
||||
npm.nominatedPods[nnn] = append(npm.nominatedPods[nnn][:i], npm.nominatedPods[nnn][i+1:]...)
|
||||
if len(npm.nominatedPods[nnn]) == 0 {
|
||||
delete(npm.nominatedPods, nnn)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
delete(npm.nominatedPodToNode, p.UID)
|
||||
}
|
||||
|
||||
func (npm *nominator) nominatedPodsForNode(nodeName string) []podRef {
|
||||
npm.nLock.RLock()
|
||||
defer npm.nLock.RUnlock()
|
||||
return slices.Clone(npm.nominatedPods[nodeName])
|
||||
}
|
||||
|
||||
// nominatedNodeName returns nominated node name of a Pod.
|
||||
func nominatedNodeName(pod *v1.Pod) string {
|
||||
return pod.Status.NominatedNodeName
|
||||
}
|
||||
|
||||
type podRef struct {
|
||||
name string
|
||||
namespace string
|
||||
uid types.UID
|
||||
}
|
||||
|
||||
func podToRef(pod *v1.Pod) podRef {
|
||||
return podRef{
|
||||
name: pod.Name,
|
||||
namespace: pod.Namespace,
|
||||
uid: pod.UID,
|
||||
}
|
||||
}
|
||||
|
||||
func (np podRef) toPod() *v1.Pod {
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: np.name,
|
||||
Namespace: np.namespace,
|
||||
UID: np.uid,
|
||||
},
|
||||
}
|
||||
}
|
1397
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/scheduling_queue.go
generated
vendored
Normal file
1397
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/scheduling_queue.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
63
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/testing.go
generated
vendored
Normal file
63
vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/testing.go
generated
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package queue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
)
|
||||
|
||||
// NewTestQueue creates a priority queue with an empty informer factory.
|
||||
func NewTestQueue(ctx context.Context, lessFn framework.LessFunc, opts ...Option) *PriorityQueue {
|
||||
return NewTestQueueWithObjects(ctx, lessFn, nil, opts...)
|
||||
}
|
||||
|
||||
// NewTestQueueWithObjects creates a priority queue with an informer factory
|
||||
// populated with the provided objects.
|
||||
func NewTestQueueWithObjects(
|
||||
ctx context.Context,
|
||||
lessFn framework.LessFunc,
|
||||
objs []runtime.Object,
|
||||
opts ...Option,
|
||||
) *PriorityQueue {
|
||||
informerFactory := informers.NewSharedInformerFactory(fake.NewClientset(objs...), 0)
|
||||
|
||||
// Because some major functions (e.g., Pop) requires the metric recorder to be set,
|
||||
// we always set a metric recorder here.
|
||||
recorder := metrics.NewMetricsAsyncRecorder(10, 20*time.Microsecond, ctx.Done())
|
||||
// We set it before the options that users provide, so that users can override it.
|
||||
opts = append([]Option{WithMetricsRecorder(*recorder)}, opts...)
|
||||
return NewTestQueueWithInformerFactory(ctx, lessFn, informerFactory, opts...)
|
||||
}
|
||||
|
||||
func NewTestQueueWithInformerFactory(
|
||||
ctx context.Context,
|
||||
lessFn framework.LessFunc,
|
||||
informerFactory informers.SharedInformerFactory,
|
||||
opts ...Option,
|
||||
) *PriorityQueue {
|
||||
pq := NewPriorityQueue(lessFn, informerFactory, opts...)
|
||||
informerFactory.Start(ctx.Done())
|
||||
informerFactory.WaitForCacheSync(ctx.Done())
|
||||
return pq
|
||||
}
|
Reference in New Issue
Block a user