rebase: update replaced k8s.io modules to v0.33.0

Signed-off-by: Niels de Vos <ndevos@ibm.com>
2025-06-13 18:43:34 +00:00 · 2025-05-07 13:13:33 +02:00
parent dd77e72800
commit 107407b44b
1723 changed files with 65035 additions and 175239 deletions
--- a/e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/cache.go
+++ b/e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/cache/cache.go
@ -757,4 +757,12 @@ func (cache *cacheImpl) updateMetrics() {
 	metrics.CacheSize.WithLabelValues("assumed_pods").Set(float64(len(cache.assumedPods)))
 	metrics.CacheSize.WithLabelValues("pods").Set(float64(len(cache.podStates)))
 	metrics.CacheSize.WithLabelValues("nodes").Set(float64(len(cache.nodes)))
+
+	// we intentionally keep them with the deprecation and will remove at v1.34.
+	//nolint:staticcheck
+	metrics.SchedulerCacheSize.WithLabelValues("assumed_pods").Set(float64(len(cache.assumedPods)))
+	//nolint:staticcheck
+	metrics.SchedulerCacheSize.WithLabelValues("pods").Set(float64(len(cache.podStates)))
+	//nolint:staticcheck
+	metrics.SchedulerCacheSize.WithLabelValues("nodes").Set(float64(len(cache.nodes)))
 }
--- a/e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/active_queue.go
+++ b/e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/active_queue.go
@ -20,6 +20,7 @@ import (
 	"container/list"
 	"fmt"
 	"sync"
+	"time"

 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/types"
@ -61,14 +62,63 @@ type activeQueuer interface {
 // underLock() method should be used to protect these methods.
 type unlockedActiveQueuer interface {
 	unlockedActiveQueueReader
-	AddOrUpdate(pInfo *framework.QueuedPodInfo)
+	// add adds a new pod to the activeQ.
+	// The event should show which event triggered this addition and is used for the metric recording.
+	// This method should be called in activeQueue.underLock().
+	add(pInfo *framework.QueuedPodInfo, event string)
 }

 // unlockedActiveQueueReader defines activeQ read-only methods that are not protected by the lock itself.
 // underLock() or underRLock() method should be used to protect these methods.
 type unlockedActiveQueueReader interface {
-	Get(pInfo *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool)
-	Has(pInfo *framework.QueuedPodInfo) bool
+	// get returns the pod matching pInfo inside the activeQ.
+	// Returns false if the pInfo doesn't exist in the queue.
+	// This method should be called in activeQueue.underLock() or activeQueue.underRLock().
+	get(pInfo *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool)
+	// has returns if pInfo exists in the queue.
+	// This method should be called in activeQueue.underLock() or activeQueue.underRLock().
+	has(pInfo *framework.QueuedPodInfo) bool
+}
+
+// unlockedActiveQueue defines activeQ methods that are not protected by the lock itself.
+// activeQueue.underLock() or activeQueue.underRLock() method should be used to protect these methods.
+type unlockedActiveQueue struct {
+	queue *heap.Heap[*framework.QueuedPodInfo]
+}
+
+func newUnlockedActiveQueue(queue *heap.Heap[*framework.QueuedPodInfo]) *unlockedActiveQueue {
+	return &unlockedActiveQueue{
+		queue: queue,
+	}
+}
+
+// add adds a new pod to the activeQ.
+// The event should show which event triggered this addition and is used for the metric recording.
+// This method should be called in activeQueue.underLock().
+func (uaq *unlockedActiveQueue) add(pInfo *framework.QueuedPodInfo, event string) {
+	uaq.queue.AddOrUpdate(pInfo)
+	metrics.SchedulerQueueIncomingPods.WithLabelValues("active", event).Inc()
+}
+
+// get returns the pod matching pInfo inside the activeQ.
+// Returns false if the pInfo doesn't exist in the queue.
+// This method should be called in activeQueue.underLock() or activeQueue.underRLock().
+func (uaq *unlockedActiveQueue) get(pInfo *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool) {
+	return uaq.queue.Get(pInfo)
+}
+
+// has returns if pInfo exists in the queue.
+// This method should be called in activeQueue.underLock() or activeQueue.underRLock().
+func (uaq *unlockedActiveQueue) has(pInfo *framework.QueuedPodInfo) bool {
+	return uaq.queue.Has(pInfo)
+}
+
+// backoffQPopper defines method that is used to pop from the backoffQ when the activeQ is empty.
+type backoffQPopper interface {
+	// popBackoff pops the pInfo from the podBackoffQ.
+	popBackoff() (*framework.QueuedPodInfo, error)
+	// len returns length of the podBackoffQ queue.
+	lenBackoff() int
 }

 // activeQueue implements activeQueuer. All of the fields have to be protected using the lock.
@ -77,15 +127,21 @@ type activeQueue struct {
 	// It protects activeQ, inFlightPods, inFlightEvents, schedulingCycle and closed fields.
 	// Caution: DO NOT take "SchedulingQueue.lock" after taking "lock".
 	// You should always take "SchedulingQueue.lock" first, otherwise the queue could end up in deadlock.
-	// "lock" should not be taken after taking "nLock".
-	// Correct locking order is: SchedulingQueue.lock > lock > nominator.nLock.
+	// "lock" should not be taken after taking "backoffQueue.lock" or "nominator.nLock".
+	// Correct locking order is: SchedulingQueue.lock > lock > backoffQueue.lock > nominator.nLock.
 	lock sync.RWMutex

 	// activeQ is heap structure that scheduler actively looks at to find pods to
 	// schedule. Head of heap is the highest priority pod.
 	queue *heap.Heap[*framework.QueuedPodInfo]

+	// unlockedQueue is a wrapper of queue providing methods that are not locked themselves
+	// and can be used in the underLock() or underRLock().
+	unlockedQueue *unlockedActiveQueue
+
 	// cond is a condition that is notified when the pod is added to activeQ.
+	// When SchedulerPopFromBackoffQ feature is enabled,
+	// condition is also notified when the pod is added to backoffQ.
 	// It is used with lock.
 	cond sync.Cond

@ -125,15 +181,21 @@ type activeQueue struct {
 	isSchedulingQueueHintEnabled bool

 	metricsRecorder metrics.MetricAsyncRecorder
+
+	// backoffQPopper is used to pop from backoffQ when activeQ is empty.
+	// It is non-nil only when SchedulerPopFromBackoffQ feature is enabled.
+	backoffQPopper backoffQPopper
 }

-func newActiveQueue(queue *heap.Heap[*framework.QueuedPodInfo], isSchedulingQueueHintEnabled bool, metricRecorder metrics.MetricAsyncRecorder) *activeQueue {
+func newActiveQueue(queue *heap.Heap[*framework.QueuedPodInfo], isSchedulingQueueHintEnabled bool, metricRecorder metrics.MetricAsyncRecorder, backoffQPopper backoffQPopper) *activeQueue {
 	aq := &activeQueue{
 		queue:                        queue,
 		inFlightPods:                 make(map[types.UID]*list.Element),
 		inFlightEvents:               list.New(),
 		isSchedulingQueueHintEnabled: isSchedulingQueueHintEnabled,
 		metricsRecorder:              metricRecorder,
+		unlockedQueue:                newUnlockedActiveQueue(queue),
+		backoffQPopper:               backoffQPopper,
 	}
 	aq.cond.L = &aq.lock

@ -146,7 +208,7 @@ func newActiveQueue(queue *heap.Heap[*framework.QueuedPodInfo], isSchedulingQueu
 func (aq *activeQueue) underLock(fn func(unlockedActiveQ unlockedActiveQueuer)) {
 	aq.lock.Lock()
 	defer aq.lock.Unlock()
-	fn(aq.queue)
+	fn(aq.unlockedQueue)
 }

 // underLock runs the fn function under the lock.RLock.
@ -155,7 +217,7 @@ func (aq *activeQueue) underLock(fn func(unlockedActiveQ unlockedActiveQueuer))
 func (aq *activeQueue) underRLock(fn func(unlockedActiveQ unlockedActiveQueueReader)) {
 	aq.lock.RLock()
 	defer aq.lock.RUnlock()
-	fn(aq.queue)
+	fn(aq.unlockedQueue)
 }

 // update updates the pod in activeQ if oldPodInfo is already in the queue.
@ -191,7 +253,13 @@ func (aq *activeQueue) pop(logger klog.Logger) (*framework.QueuedPodInfo, error)
 }

 func (aq *activeQueue) unlockedPop(logger klog.Logger) (*framework.QueuedPodInfo, error) {
+	var pInfo *framework.QueuedPodInfo
 	for aq.queue.Len() == 0 {
+		// backoffQPopper is non-nil only if SchedulerPopFromBackoffQ feature is enabled.
+		// In case of non-empty backoffQ, try popping from there.
+		if aq.backoffQPopper != nil && aq.backoffQPopper.lenBackoff() != 0 {
+			break
+		}
 		// When the queue is empty, invocation of Pop() is blocked until new item is enqueued.
 		// When Close() is called, the p.closed is set and the condition is broadcast,
 		// which causes this loop to continue and return from the Pop().
@ -203,9 +271,18 @@ func (aq *activeQueue) unlockedPop(logger klog.Logger) (*framework.QueuedPodInfo
 	}
 	pInfo, err := aq.queue.Pop()
 	if err != nil {
-		return nil, err
+		if aq.backoffQPopper == nil {
+			return nil, err
+		}
+		// Try to pop from backoffQ when activeQ is empty.
+		pInfo, err = aq.backoffQPopper.popBackoff()
+		if err != nil {
+			return nil, err
+		}
+		metrics.SchedulerQueueIncomingPods.WithLabelValues("active", framework.PopFromBackoffQ).Inc()
 	}
 	pInfo.Attempts++
+	pInfo.BackoffExpiration = time.Time{}
 	// In flight, no concurrent events yet.
 	if aq.isSchedulingQueueHintEnabled {
 		// If the pod is already in the map, we shouldn't overwrite the inFlightPods otherwise it'd lead to a memory leak.
@ -354,6 +431,12 @@ func (aq *activeQueue) done(pod types.UID) {
 	aq.lock.Lock()
 	defer aq.lock.Unlock()

+	aq.unlockedDone(pod)
+}
+
+// unlockedDone is used by the activeQueue internally and doesn't take the lock itself.
+// It assumes the lock is already taken outside before the method is called.
+func (aq *activeQueue) unlockedDone(pod types.UID) {
 	inFlightPod, ok := aq.inFlightPods[pod]
 	if !ok {
 		// This Pod is already done()ed.
@ -398,15 +481,15 @@ func (aq *activeQueue) done(pod types.UID) {

 // close closes the activeQueue.
 func (aq *activeQueue) close() {
+	aq.lock.Lock()
+	defer aq.lock.Unlock()
 	// We should call done() for all in-flight pods to clean up the inFlightEvents metrics.
 	// It's safe even if the binding cycle running asynchronously calls done() afterwards
 	// done() will just be a no-op.
 	for pod := range aq.inFlightPods {
-		aq.done(pod)
+		aq.unlockedDone(pod)
 	}
-	aq.lock.Lock()
 	aq.closed = true
-	aq.lock.Unlock()
 }

 // broadcast notifies the pop() operation that new pod(s) was added to the activeQueue.
--- a/e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/backoff_queue.go
+++ b/e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/backoff_queue.go
@ -0,0 +1,405 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package queue
+
+import (
+	"sync"
+	"time"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/klog/v2"
+	"k8s.io/kubernetes/pkg/scheduler/backend/heap"
+	"k8s.io/kubernetes/pkg/scheduler/framework"
+	"k8s.io/kubernetes/pkg/scheduler/metrics"
+	"k8s.io/utils/clock"
+)
+
+// backoffQOrderingWindowDuration is a duration of an ordering window in the podBackoffQ.
+// In each window, represented as a whole second, pods are ordered by priority.
+// It is the same as interval of flushing the pods from the podBackoffQ to the activeQ, to flush the whole windows there.
+// This works only if PopFromBackoffQ feature is enabled.
+// See the KEP-5142 (http://kep.k8s.io/5142) for rationale.
+const backoffQOrderingWindowDuration = time.Second
+
+// backoffQueuer is a wrapper for backoffQ related operations.
+// Its methods that relies on the queues, take the lock inside.
+type backoffQueuer interface {
+	// isPodBackingoff returns true if a pod is still waiting for its backoff timer.
+	// If this returns true, the pod should not be re-tried.
+	// If the pod backoff time is in the actual ordering window, it should still be backing off.
+	isPodBackingoff(podInfo *framework.QueuedPodInfo) bool
+	// popAllBackoffCompleted pops all pods from podBackoffQ and podErrorBackoffQ that completed backoff.
+	popAllBackoffCompleted(logger klog.Logger) []*framework.QueuedPodInfo
+
+	// podInitialBackoffDuration returns initial backoff duration that pod can get.
+	podInitialBackoffDuration() time.Duration
+	// podMaxBackoffDuration returns maximum backoff duration that pod can get.
+	podMaxBackoffDuration() time.Duration
+	// waitUntilAlignedWithOrderingWindow waits until the time reaches a multiple of backoffQOrderingWindowDuration.
+	// It then runs the f function at the backoffQOrderingWindowDuration interval using a ticker.
+	// It's important to align the flushing time, because podBackoffQ's ordering is based on the windows
+	// and whole windows have to be flushed at one time without a visible latency.
+	waitUntilAlignedWithOrderingWindow(f func(), stopCh <-chan struct{})
+
+	// add adds the pInfo to backoffQueue.
+	// The event should show which event triggered this addition and is used for the metric recording.
+	// It also ensures that pInfo is not in both queues.
+	add(logger klog.Logger, pInfo *framework.QueuedPodInfo, event string)
+	// update updates the pod in backoffQueue if oldPodInfo is already in the queue.
+	// It returns new pod info if updated, nil otherwise.
+	update(newPod *v1.Pod, oldPodInfo *framework.QueuedPodInfo) *framework.QueuedPodInfo
+	// delete deletes the pInfo from backoffQueue.
+	// It returns true if the pod was deleted.
+	delete(pInfo *framework.QueuedPodInfo) bool
+	// get returns the pInfo matching given pInfoLookup, if exists.
+	get(pInfoLookup *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool)
+	// has inform if pInfo exists in the queue.
+	has(pInfo *framework.QueuedPodInfo) bool
+	// list returns all pods that are in the queue.
+	list() []*v1.Pod
+	// len returns length of the queue.
+	len() int
+}
+
+// backoffQueue implements backoffQueuer and wraps two queues inside,
+// providing seamless access as if it were one queue.
+type backoffQueue struct {
+	// lock synchronizes all operations related to backoffQ.
+	// It protects both podBackoffQ and podErrorBackoffQ.
+	// Caution: DO NOT take "SchedulingQueue.lock" or "activeQueue.lock" after taking "lock".
+	// You should always take "SchedulingQueue.lock" and "activeQueue.lock" first, otherwise the queue could end up in deadlock.
+	// "lock" should not be taken after taking "nominator.nLock".
+	// Correct locking order is: SchedulingQueue.lock > activeQueue.lock > lock > nominator.nLock.
+	lock sync.RWMutex
+
+	clock clock.WithTicker
+
+	// podBackoffQ is a heap ordered by backoff expiry. Pods which have completed backoff
+	// are popped from this heap before the scheduler looks at activeQ
+	podBackoffQ *heap.Heap[*framework.QueuedPodInfo]
+	// podErrorBackoffQ is a heap ordered by error backoff expiry. Pods which have completed backoff
+	// are popped from this heap before the scheduler looks at activeQ
+	podErrorBackoffQ *heap.Heap[*framework.QueuedPodInfo]
+
+	podInitialBackoff time.Duration
+	podMaxBackoff     time.Duration
+	// activeQLessFn is used as an eventual less function if two backoff times are equal,
+	// when the SchedulerPopFromBackoffQ feature is enabled.
+	activeQLessFn framework.LessFunc
+
+	// isPopFromBackoffQEnabled indicates whether the feature gate SchedulerPopFromBackoffQ is enabled.
+	isPopFromBackoffQEnabled bool
+}
+
+func newBackoffQueue(clock clock.WithTicker, podInitialBackoffDuration time.Duration, podMaxBackoffDuration time.Duration, activeQLessFn framework.LessFunc, popFromBackoffQEnabled bool) *backoffQueue {
+	bq := &backoffQueue{
+		clock:                    clock,
+		podInitialBackoff:        podInitialBackoffDuration,
+		podMaxBackoff:            podMaxBackoffDuration,
+		isPopFromBackoffQEnabled: popFromBackoffQEnabled,
+		activeQLessFn:            activeQLessFn,
+	}
+	podBackoffQLessFn := bq.lessBackoffCompleted
+	if popFromBackoffQEnabled {
+		podBackoffQLessFn = bq.lessBackoffCompletedWithPriority
+	}
+	bq.podBackoffQ = heap.NewWithRecorder(podInfoKeyFunc, podBackoffQLessFn, metrics.NewBackoffPodsRecorder())
+	bq.podErrorBackoffQ = heap.NewWithRecorder(podInfoKeyFunc, bq.lessBackoffCompleted, metrics.NewBackoffPodsRecorder())
+
+	return bq
+}
+
+// podInitialBackoffDuration returns initial backoff duration that pod can get.
+func (bq *backoffQueue) podInitialBackoffDuration() time.Duration {
+	return bq.podInitialBackoff
+}
+
+// podMaxBackoffDuration returns maximum backoff duration that pod can get.
+func (bq *backoffQueue) podMaxBackoffDuration() time.Duration {
+	return bq.podMaxBackoff
+}
+
+// alignToWindow truncates the provided time to the podBackoffQ ordering window.
+// It returns the lowest possible timestamp in the window.
+func (bq *backoffQueue) alignToWindow(t time.Time) time.Time {
+	if !bq.isPopFromBackoffQEnabled {
+		return t
+	}
+	return t.Truncate(backoffQOrderingWindowDuration)
+}
+
+// waitUntilAlignedWithOrderingWindow waits until the time reaches a multiple of backoffQOrderingWindowDuration.
+// It then runs the f function at the backoffQOrderingWindowDuration interval using a ticker.
+// It's important to align the flushing time, because podBackoffQ's ordering is based on the windows
+// and whole windows have to be flushed at one time without a visible latency.
+func (bq *backoffQueue) waitUntilAlignedWithOrderingWindow(f func(), stopCh <-chan struct{}) {
+	now := bq.clock.Now()
+	// Wait until the time reaches the multiple of backoffQOrderingWindowDuration.
+	durationToNextWindow := bq.alignToWindow(now.Add(backoffQOrderingWindowDuration)).Sub(now)
+	timer := bq.clock.NewTimer(durationToNextWindow)
+	select {
+	case <-stopCh:
+		timer.Stop()
+		return
+	case <-timer.C():
+	}
+
+	// Run a ticker to make sure the invocations of f function
+	// are aligned with the backoffQ's ordering window.
+	ticker := bq.clock.NewTicker(backoffQOrderingWindowDuration)
+	for {
+		select {
+		case <-stopCh:
+			return
+		default:
+		}
+
+		f()
+
+		// NOTE: b/c there is no priority selection in golang
+		// it is possible for this to race, meaning we could
+		// trigger ticker.C and stopCh, and ticker.C select falls through.
+		// In order to mitigate we re-check stopCh at the beginning
+		// of every loop to prevent extra executions of f().
+		select {
+		case <-stopCh:
+			ticker.Stop()
+			return
+		case <-ticker.C():
+		}
+	}
+}
+
+// lessBackoffCompletedWithPriority is a less function of podBackoffQ if PopFromBackoffQ feature is enabled.
+// It orders the pods in the same BackoffOrderingWindow the same as the activeQ will do to improve popping order from backoffQ when activeQ is empty.
+func (bq *backoffQueue) lessBackoffCompletedWithPriority(pInfo1, pInfo2 *framework.QueuedPodInfo) bool {
+	bo1 := bq.getBackoffTime(pInfo1)
+	bo2 := bq.getBackoffTime(pInfo2)
+	if !bo1.Equal(bo2) {
+		return bo1.Before(bo2)
+	}
+	// If the backoff time is the same, sort the pod in the same manner as activeQ does.
+	return bq.activeQLessFn(pInfo1, pInfo2)
+}
+
+// lessBackoffCompleted is a less function of podErrorBackoffQ.
+func (bq *backoffQueue) lessBackoffCompleted(pInfo1, pInfo2 *framework.QueuedPodInfo) bool {
+	bo1 := bq.getBackoffTime(pInfo1)
+	bo2 := bq.getBackoffTime(pInfo2)
+	return bo1.Before(bo2)
+}
+
+// isPodBackingoff returns true if a pod is still waiting for its backoff timer.
+// If this returns true, the pod should not be re-tried.
+// If the pod backoff time is in the actual ordering window, it should still be backing off.
+func (bq *backoffQueue) isPodBackingoff(podInfo *framework.QueuedPodInfo) bool {
+	boTime := bq.getBackoffTime(podInfo)
+	// Don't use After, because in case of windows equality we want to return true.
+	return !boTime.Before(bq.alignToWindow(bq.clock.Now()))
+}
+
+// getBackoffTime returns the time that podInfo completes backoff.
+// It caches the result in podInfo.BackoffExpiration and returns this value in subsequent calls.
+// The cache will be cleared when this pod is poped from the scheduling queue again (i.e., at activeQ's pop),
+// because of the fact that the backoff time is calculated based on podInfo.Attempts,
+// which doesn't get changed until the pod's scheduling is retried.
+func (bq *backoffQueue) getBackoffTime(podInfo *framework.QueuedPodInfo) time.Time {
+	if podInfo.Attempts == 0 {
+		// Don't store backoff expiration if the duration is 0
+		// to correctly handle isPodBackingoff, if pod should skip backoff, when it wasn't tried at all.
+		return time.Time{}
+	}
+	if podInfo.BackoffExpiration.IsZero() {
+		duration := bq.calculateBackoffDuration(podInfo)
+		podInfo.BackoffExpiration = bq.alignToWindow(podInfo.Timestamp.Add(duration))
+	}
+	return podInfo.BackoffExpiration
+}
+
+// calculateBackoffDuration is a helper function for calculating the backoffDuration
+// based on the number of attempts the pod has made.
+func (bq *backoffQueue) calculateBackoffDuration(podInfo *framework.QueuedPodInfo) time.Duration {
+	if podInfo.Attempts == 0 {
+		// When the Pod hasn't experienced any scheduling attempts,
+		// they aren't obliged to get a backoff penalty at all.
+		return 0
+	}
+
+	duration := bq.podInitialBackoff
+	for i := 1; i < podInfo.Attempts; i++ {
+		// Use subtraction instead of addition or multiplication to avoid overflow.
+		if duration > bq.podMaxBackoff-duration {
+			return bq.podMaxBackoff
+		}
+		duration += duration
+	}
+	return duration
+}
+
+func (bq *backoffQueue) popAllBackoffCompletedWithQueue(logger klog.Logger, queue *heap.Heap[*framework.QueuedPodInfo]) []*framework.QueuedPodInfo {
+	var poppedPods []*framework.QueuedPodInfo
+	for {
+		pInfo, ok := queue.Peek()
+		if !ok || pInfo == nil {
+			break
+		}
+		pod := pInfo.Pod
+		if bq.isPodBackingoff(pInfo) {
+			break
+		}
+		_, err := queue.Pop()
+		if err != nil {
+			logger.Error(err, "Unable to pop pod from backoff queue despite backoff completion", "pod", klog.KObj(pod))
+			break
+		}
+		poppedPods = append(poppedPods, pInfo)
+	}
+	return poppedPods
+}
+
+// popAllBackoffCompleted pops all pods from podBackoffQ and podErrorBackoffQ that completed backoff.
+func (bq *backoffQueue) popAllBackoffCompleted(logger klog.Logger) []*framework.QueuedPodInfo {
+	bq.lock.Lock()
+	defer bq.lock.Unlock()
+
+	// Ensure both queues are called
+	return append(bq.popAllBackoffCompletedWithQueue(logger, bq.podBackoffQ), bq.popAllBackoffCompletedWithQueue(logger, bq.podErrorBackoffQ)...)
+}
+
+// add adds the pInfo to backoffQueue.
+// The event should show which event triggered this addition and is used for the metric recording.
+// It also ensures that pInfo is not in both queues.
+func (bq *backoffQueue) add(logger klog.Logger, pInfo *framework.QueuedPodInfo, event string) {
+	bq.lock.Lock()
+	defer bq.lock.Unlock()
+
+	// If pod has empty both unschedulable plugins and pending plugins,
+	// it means that it failed because of error and should be moved to podErrorBackoffQ.
+	if pInfo.UnschedulablePlugins.Len() == 0 && pInfo.PendingPlugins.Len() == 0 {
+		bq.podErrorBackoffQ.AddOrUpdate(pInfo)
+		// Ensure the pod is not in the podBackoffQ and report the error if it happens.
+		err := bq.podBackoffQ.Delete(pInfo)
+		if err == nil {
+			logger.Error(nil, "BackoffQueue add() was called with a pod that was already in the podBackoffQ", "pod", klog.KObj(pInfo.Pod))
+			return
+		}
+		metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", event).Inc()
+		return
+	}
+	bq.podBackoffQ.AddOrUpdate(pInfo)
+	// Ensure the pod is not in the podErrorBackoffQ and report the error if it happens.
+	err := bq.podErrorBackoffQ.Delete(pInfo)
+	if err == nil {
+		logger.Error(nil, "BackoffQueue add() was called with a pod that was already in the podErrorBackoffQ", "pod", klog.KObj(pInfo.Pod))
+		return
+	}
+	metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", event).Inc()
+}
+
+// update updates the pod in backoffQueue if oldPodInfo is already in the queue.
+// It returns new pod info if updated, nil otherwise.
+func (bq *backoffQueue) update(newPod *v1.Pod, oldPodInfo *framework.QueuedPodInfo) *framework.QueuedPodInfo {
+	bq.lock.Lock()
+	defer bq.lock.Unlock()
+
+	// If the pod is in the backoff queue, update it there.
+	if pInfo, exists := bq.podBackoffQ.Get(oldPodInfo); exists {
+		_ = pInfo.Update(newPod)
+		bq.podBackoffQ.AddOrUpdate(pInfo)
+		return pInfo
+	}
+	// If the pod is in the error backoff queue, update it there.
+	if pInfo, exists := bq.podErrorBackoffQ.Get(oldPodInfo); exists {
+		_ = pInfo.Update(newPod)
+		bq.podErrorBackoffQ.AddOrUpdate(pInfo)
+		return pInfo
+	}
+	return nil
+}
+
+// delete deletes the pInfo from backoffQueue.
+// It returns true if the pod was deleted.
+func (bq *backoffQueue) delete(pInfo *framework.QueuedPodInfo) bool {
+	bq.lock.Lock()
+	defer bq.lock.Unlock()
+
+	if bq.podBackoffQ.Delete(pInfo) == nil {
+		return true
+	}
+	return bq.podErrorBackoffQ.Delete(pInfo) == nil
+}
+
+// popBackoff pops the pInfo from the podBackoffQ.
+// It returns error if the queue is empty.
+// This doesn't pop the pods from the podErrorBackoffQ.
+func (bq *backoffQueue) popBackoff() (*framework.QueuedPodInfo, error) {
+	bq.lock.Lock()
+	defer bq.lock.Unlock()
+
+	return bq.podBackoffQ.Pop()
+}
+
+// get returns the pInfo matching given pInfoLookup, if exists.
+func (bq *backoffQueue) get(pInfoLookup *framework.QueuedPodInfo) (*framework.QueuedPodInfo, bool) {
+	bq.lock.RLock()
+	defer bq.lock.RUnlock()
+
+	pInfo, exists := bq.podBackoffQ.Get(pInfoLookup)
+	if exists {
+		return pInfo, true
+	}
+	return bq.podErrorBackoffQ.Get(pInfoLookup)
+}
+
+// has inform if pInfo exists in the queue.
+func (bq *backoffQueue) has(pInfo *framework.QueuedPodInfo) bool {
+	bq.lock.RLock()
+	defer bq.lock.RUnlock()
+
+	return bq.podBackoffQ.Has(pInfo) || bq.podErrorBackoffQ.Has(pInfo)
+}
+
+// list returns all pods that are in the queue.
+func (bq *backoffQueue) list() []*v1.Pod {
+	bq.lock.RLock()
+	defer bq.lock.RUnlock()
+
+	var result []*v1.Pod
+	for _, pInfo := range bq.podBackoffQ.List() {
+		result = append(result, pInfo.Pod)
+	}
+	for _, pInfo := range bq.podErrorBackoffQ.List() {
+		result = append(result, pInfo.Pod)
+	}
+	return result
+}
+
+// len returns length of the queue.
+func (bq *backoffQueue) len() int {
+	bq.lock.RLock()
+	defer bq.lock.RUnlock()
+
+	return bq.podBackoffQ.Len() + bq.podErrorBackoffQ.Len()
+}
+
+// lenBackoff returns length of the podBackoffQ.
+func (bq *backoffQueue) lenBackoff() int {
+	bq.lock.RLock()
+	defer bq.lock.RUnlock()
+
+	return bq.podBackoffQ.Len()
+}
--- a/e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/nominator.go
+++ b/e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/nominator.go
@ -35,10 +35,10 @@ import (
 type nominator struct {
 	// nLock synchronizes all operations related to nominator.
 	// It should not be used anywhere else.
-	// Caution: DO NOT take ("SchedulingQueue.lock" or "activeQueue.lock") after taking "nLock".
-	// You should always take "SchedulingQueue.lock" and "activeQueue.lock" first,
+	// Caution: DO NOT take ("SchedulingQueue.lock" or "activeQueue.lock" or "backoffQueue.lock") after taking "nLock".
+	// You should always take "SchedulingQueue.lock" and "activeQueue.lock" and "backoffQueue.lock" first,
 	// otherwise the nominator could end up in deadlock.
-	// Correct locking order is: SchedulingQueue.lock > activeQueue.lock > nLock.
+	// Correct locking order is: SchedulingQueue.lock > activeQueue.lock = backoffQueue.lock > nLock.
 	nLock sync.RWMutex

 	// podLister is used to verify if the given pod is alive.
--- a/e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/scheduling_queue.go
+++ b/e2e/vendor/k8s.io/kubernetes/pkg/scheduler/backend/queue/scheduling_queue.go
@ -132,6 +132,9 @@ type SchedulingQueue interface {
 	PendingPods() ([]*v1.Pod, string)
 	InFlightPods() []*v1.Pod
 	PodsInActiveQ() []*v1.Pod
+	// PodsInBackoffQ returns all the Pods in the backoffQ.
+	PodsInBackoffQ() []*v1.Pod
+	UnschedulablePods() []*v1.Pod
 }

 // NewSchedulingQueue initializes a priority queue as a new scheduling queue.
@ -155,24 +158,18 @@ type PriorityQueue struct {
 	*nominator

 	stop  chan struct{}
-	clock clock.Clock
+	clock clock.WithTicker

 	// lock takes precedence and should be taken first,
-	// before any other locks in the queue (activeQueue.lock or nominator.nLock).
-	// Correct locking order is: lock > activeQueue.lock > nominator.nLock.
+	// before any other locks in the queue (activeQueue.lock or backoffQueue.lock or nominator.nLock).
+	// Correct locking order is: lock > activeQueue.lock > backoffQueue.lock > nominator.nLock.
 	lock sync.RWMutex

-	// pod initial backoff duration.
-	podInitialBackoffDuration time.Duration
-	// pod maximum backoff duration.
-	podMaxBackoffDuration time.Duration
 	// the maximum time a pod can stay in the unschedulablePods.
 	podMaxInUnschedulablePodsDuration time.Duration

-	activeQ activeQueuer
-	// podBackoffQ is a heap ordered by backoff expiry. Pods which have completed backoff
-	// are popped from this heap before the scheduler looks at activeQ
-	podBackoffQ *heap.Heap[*framework.QueuedPodInfo]
+	activeQ  activeQueuer
+	backoffQ backoffQueuer
 	// unschedulablePods holds pods that have been tried and determined unschedulable.
 	unschedulablePods *UnschedulablePods
 	// moveRequestCycle caches the sequence number of scheduling cycle when we
@ -195,6 +192,8 @@ type PriorityQueue struct {

 	// isSchedulingQueueHintEnabled indicates whether the feature gate for the scheduling queue is enabled.
 	isSchedulingQueueHintEnabled bool
+	// isPopFromBackoffQEnabled indicates whether the feature gate SchedulerPopFromBackoffQ is enabled.
+	isPopFromBackoffQEnabled bool
 }

 // QueueingHintFunction is the wrapper of QueueingHintFn that has PluginName.
@ -213,7 +212,7 @@ type clusterEvent struct {
 }

 type priorityQueueOptions struct {
-	clock                             clock.Clock
+	clock                             clock.WithTicker
 	podInitialBackoffDuration         time.Duration
 	podMaxBackoffDuration             time.Duration
 	podMaxInUnschedulablePodsDuration time.Duration
@ -228,7 +227,7 @@ type priorityQueueOptions struct {
 type Option func(*priorityQueueOptions)

 // WithClock sets clock for PriorityQueue, the default clock is clock.RealClock.
-func WithClock(clock clock.Clock) Option {
+func WithClock(clock clock.WithTicker) Option {
 	return func(o *priorityQueueOptions) {
 		o.clock = clock
 	}
@ -331,14 +330,14 @@ func NewPriorityQueue(
 	}

 	isSchedulingQueueHintEnabled := utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints)
+	isPopFromBackoffQEnabled := utilfeature.DefaultFeatureGate.Enabled(features.SchedulerPopFromBackoffQ)

+	backoffQ := newBackoffQueue(options.clock, options.podInitialBackoffDuration, options.podMaxBackoffDuration, lessFn, isPopFromBackoffQEnabled)
 	pq := &PriorityQueue{
 		clock:                             options.clock,
 		stop:                              make(chan struct{}),
-		podInitialBackoffDuration:         options.podInitialBackoffDuration,
-		podMaxBackoffDuration:             options.podMaxBackoffDuration,
 		podMaxInUnschedulablePodsDuration: options.podMaxInUnschedulablePodsDuration,
-		activeQ:                           newActiveQueue(heap.NewWithRecorder(podInfoKeyFunc, heap.LessFunc[*framework.QueuedPodInfo](lessFn), metrics.NewActivePodsRecorder()), isSchedulingQueueHintEnabled, options.metricsRecorder),
+		backoffQ:                          backoffQ,
 		unschedulablePods:                 newUnschedulablePods(metrics.NewUnschedulablePodsRecorder(), metrics.NewGatedPodsRecorder()),
 		preEnqueuePluginMap:               options.preEnqueuePluginMap,
 		queueingHintMap:                   options.queueingHintMap,
@ -346,19 +345,24 @@ func NewPriorityQueue(
 		pluginMetricsSamplePercent:        options.pluginMetricsSamplePercent,
 		moveRequestCycle:                  -1,
 		isSchedulingQueueHintEnabled:      isSchedulingQueueHintEnabled,
+		isPopFromBackoffQEnabled:          isPopFromBackoffQEnabled,
 	}
-	pq.podBackoffQ = heap.NewWithRecorder(podInfoKeyFunc, pq.podsCompareBackoffCompleted, metrics.NewBackoffPodsRecorder())
+	var backoffQPopper backoffQPopper
+	if isPopFromBackoffQEnabled {
+		backoffQPopper = backoffQ
+	}
+	pq.activeQ = newActiveQueue(heap.NewWithRecorder(podInfoKeyFunc, heap.LessFunc[*framework.QueuedPodInfo](lessFn), metrics.NewActivePodsRecorder()), isSchedulingQueueHintEnabled, options.metricsRecorder, backoffQPopper)
 	pq.nsLister = informerFactory.Core().V1().Namespaces().Lister()
 	pq.nominator = newPodNominator(options.podLister)

 	return pq
 }

-// Run starts the goroutine to pump from podBackoffQ to activeQ
+// Run starts the goroutine to pump from backoffQ to activeQ
 func (p *PriorityQueue) Run(logger klog.Logger) {
-	go wait.Until(func() {
+	go p.backoffQ.waitUntilAlignedWithOrderingWindow(func() {
 		p.flushBackoffQCompleted(logger)
-	}, 1.0*time.Second, p.stop)
+	}, p.stop)
 	go wait.Until(func() {
 		p.flushUnschedulablePodsLeftover(logger)
 	}, 30*time.Second, p.stop)
@ -553,25 +557,33 @@ func (p *PriorityQueue) runPreEnqueuePlugin(ctx context.Context, pl framework.Pr
 	return s
 }

-// moveToActiveQ tries to add pod to active queue and remove it from unschedulable and backoff queues.
-// It returns 2 parameters:
-// 1. a boolean flag to indicate whether the pod is added successfully.
-// 2. an error for the caller to act on.
+// moveToActiveQ tries to add the pod to the active queue.
+// If the pod doesn't pass PreEnqueue plugins, it gets added to unschedulablePods instead.
+// It returns a boolean flag to indicate whether the pod is added successfully.
 func (p *PriorityQueue) moveToActiveQ(logger klog.Logger, pInfo *framework.QueuedPodInfo, event string) bool {
 	gatedBefore := pInfo.Gated
-	pInfo.Gated = !p.runPreEnqueuePlugins(context.Background(), pInfo)
+	// If SchedulerPopFromBackoffQ feature gate is enabled,
+	// PreEnqueue plugins were called when the pod was added to the backoffQ.
+	// Don't need to repeat it here when the pod is directly moved from the backoffQ.
+	if !p.isPopFromBackoffQEnabled || event != framework.BackoffComplete {
+		pInfo.Gated = !p.runPreEnqueuePlugins(context.Background(), pInfo)
+	}

 	added := false
 	p.activeQ.underLock(func(unlockedActiveQ unlockedActiveQueuer) {
 		if pInfo.Gated {
 			// Add the Pod to unschedulablePods if it's not passing PreEnqueuePlugins.
-			if unlockedActiveQ.Has(pInfo) {
+			if unlockedActiveQ.has(pInfo) {
 				return
 			}
-			if p.podBackoffQ.Has(pInfo) {
+			if p.backoffQ.has(pInfo) {
 				return
 			}
-			p.unschedulablePods.addOrUpdate(pInfo)
+			if p.unschedulablePods.get(pInfo.Pod) != nil {
+				return
+			}
+			p.unschedulablePods.addOrUpdate(pInfo, event)
+			logger.V(5).Info("Pod moved to an internal scheduling queue, because the pod is gated", "pod", klog.KObj(pInfo.Pod), "event", event, "queue", unschedulablePods)
 			return
 		}
 		if pInfo.InitialAttemptTimestamp == nil {
@ -579,13 +591,12 @@ func (p *PriorityQueue) moveToActiveQ(logger klog.Logger, pInfo *framework.Queue
 			pInfo.InitialAttemptTimestamp = &now
 		}

-		unlockedActiveQ.AddOrUpdate(pInfo)
+		unlockedActiveQ.add(pInfo, event)
 		added = true

 		p.unschedulablePods.delete(pInfo.Pod, gatedBefore)
-		_ = p.podBackoffQ.Delete(pInfo) // Don't need to react when pInfo is not found.
+		p.backoffQ.delete(pInfo)
 		logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event, "queue", activeQ)
-		metrics.SchedulerQueueIncomingPods.WithLabelValues("active", event).Inc()
 		if event == framework.EventUnscheduledPodAdd.Label() || event == framework.EventUnscheduledPodUpdate.Label() {
 			p.AddNominatedPod(logger, pInfo.PodInfo, nil)
 		}
@ -593,6 +604,28 @@ func (p *PriorityQueue) moveToActiveQ(logger klog.Logger, pInfo *framework.Queue
 	return added
 }

+// moveToBackoffQ tries to add the pod to the backoff queue.
+// If SchedulerPopFromBackoffQ feature gate is enabled and the pod doesn't pass PreEnqueue plugins, it gets added to unschedulablePods instead.
+// It returns a boolean flag to indicate whether the pod is added successfully.
+func (p *PriorityQueue) moveToBackoffQ(logger klog.Logger, pInfo *framework.QueuedPodInfo, event string) bool {
+	// If SchedulerPopFromBackoffQ feature gate is enabled,
+	// PreEnqueue plugins are called on inserting pods to the backoffQ,
+	// not to call them again on popping out.
+	if p.isPopFromBackoffQEnabled {
+		pInfo.Gated = !p.runPreEnqueuePlugins(context.Background(), pInfo)
+		if pInfo.Gated {
+			if p.unschedulablePods.get(pInfo.Pod) == nil {
+				p.unschedulablePods.addOrUpdate(pInfo, event)
+				logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event, "queue", unschedulablePods)
+			}
+			return false
+		}
+	}
+	p.backoffQ.add(logger, pInfo, event)
+	logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event, "queue", backoffQ)
+	return true
+}
+
 // Add adds a pod to the active queue. It should be called only when a new pod
 // is added so there is no chance the pod is already in active/unschedulable/backoff queues
 func (p *PriorityQueue) Add(logger klog.Logger, pod *v1.Pod) {
@ -641,10 +674,16 @@ func (p *PriorityQueue) activate(logger klog.Logger, pod *v1.Pod) bool {
 		// If the pod doesn't belong to unschedulablePods or backoffQ, don't activate it.
 		// The pod can be already in activeQ.
 		var exists bool
-		pInfo, exists = p.podBackoffQ.Get(newQueuedPodInfoForLookup(pod))
+		pInfo, exists = p.backoffQ.get(newQueuedPodInfoForLookup(pod))
 		if !exists {
 			return false
 		}
+		// Delete pod from the backoffQ now to make sure it won't be popped from the backoffQ
+		// just before moving it to the activeQ
+		if deleted := p.backoffQ.delete(pInfo); !deleted {
+			// Pod was popped from the backoffQ in the meantime. Don't activate it.
+			return false
+		}
 	}

 	if pInfo == nil {
@ -656,13 +695,6 @@ func (p *PriorityQueue) activate(logger klog.Logger, pod *v1.Pod) bool {
 	return p.moveToActiveQ(logger, pInfo, framework.ForceActivate)
 }

-// isPodBackingoff returns true if a pod is still waiting for its backoff timer.
-// If this returns true, the pod should not be re-tried.
-func (p *PriorityQueue) isPodBackingoff(podInfo *framework.QueuedPodInfo) bool {
-	boTime := p.getBackoffTime(podInfo)
-	return boTime.After(p.clock.Now())
-}
-
 // SchedulingCycle returns current scheduling cycle.
 func (p *PriorityQueue) SchedulingCycle() int64 {
 	return p.activeQ.schedulingCycle()
@ -712,7 +744,7 @@ func (p *PriorityQueue) determineSchedulingHintForInFlightPod(logger klog.Logger
 // addUnschedulableIfNotPresentWithoutQueueingHint inserts a pod that cannot be scheduled into
 // the queue, unless it is already in the queue. Normally, PriorityQueue puts
 // unschedulable pods in `unschedulablePods`. But if there has been a recent move
-// request, then the pod is put in `podBackoffQ`.
+// request, then the pod is put in `backoffQ`.
 // TODO: This function is called only when p.isSchedulingQueueHintEnabled is false,
 // and this will be removed after SchedulingQueueHint goes to stable and the feature gate is removed.
 func (p *PriorityQueue) addUnschedulableWithoutQueueingHint(logger klog.Logger, pInfo *framework.QueuedPodInfo, podSchedulingCycle int64) error {
@ -736,13 +768,14 @@ func (p *PriorityQueue) addUnschedulableWithoutQueueingHint(logger klog.Logger,
 		// - No unschedulable plugins are associated with this Pod,
 		//   meaning something unusual (a temporal failure on kube-apiserver, etc) happened and this Pod gets moved back to the queue.
 		//   In this case, we should retry scheduling it because this Pod may not be retried until the next flush.
-		p.podBackoffQ.AddOrUpdate(pInfo)
-		logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", framework.ScheduleAttemptFailure, "queue", backoffQ)
-		metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", framework.ScheduleAttemptFailure).Inc()
+		if added := p.moveToBackoffQ(logger, pInfo, framework.ScheduleAttemptFailure); added {
+			if p.isPopFromBackoffQEnabled {
+				p.activeQ.broadcast()
+			}
+		}
 	} else {
-		p.unschedulablePods.addOrUpdate(pInfo)
+		p.unschedulablePods.addOrUpdate(pInfo, framework.ScheduleAttemptFailure)
 		logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", framework.ScheduleAttemptFailure, "queue", unschedulablePods)
-		metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", framework.ScheduleAttemptFailure).Inc()
 	}

 	return nil
@ -751,7 +784,7 @@ func (p *PriorityQueue) addUnschedulableWithoutQueueingHint(logger klog.Logger,
 // AddUnschedulableIfNotPresent inserts a pod that cannot be scheduled into
 // the queue, unless it is already in the queue. Normally, PriorityQueue puts
 // unschedulable pods in `unschedulablePods`. But if there has been a recent move
-// request, then the pod is put in `podBackoffQ`.
+// request, then the pod is put in `backoffQ`.
 func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *framework.QueuedPodInfo, podSchedulingCycle int64) error {
 	p.lock.Lock()
 	defer p.lock.Unlock()
@ -767,7 +800,7 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *
 	if p.activeQ.has(pInfo) {
 		return fmt.Errorf("Pod %v is already present in the active queue", klog.KObj(pod))
 	}
-	if p.podBackoffQ.Has(pInfo) {
+	if p.backoffQ.has(pInfo) {
 		return fmt.Errorf("Pod %v is already present in the backoff queue", klog.KObj(pod))
 	}

@ -792,7 +825,7 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *
 	// In this case, we try to requeue this Pod to activeQ/backoffQ.
 	queue := p.requeuePodViaQueueingHint(logger, pInfo, schedulingHint, framework.ScheduleAttemptFailure)
 	logger.V(3).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", framework.ScheduleAttemptFailure, "queue", queue, "schedulingCycle", podSchedulingCycle, "hint", schedulingHint, "unschedulable plugins", rejectorPlugins)
-	if queue == activeQ {
+	if queue == activeQ || (p.isPopFromBackoffQEnabled && queue == backoffQ) {
 		// When the Pod is moved to activeQ, need to let p.cond know so that the Pod will be pop()ed out.
 		p.activeQ.broadcast()
 	}
@ -805,25 +838,12 @@ func (p *PriorityQueue) flushBackoffQCompleted(logger klog.Logger) {
 	p.lock.Lock()
 	defer p.lock.Unlock()
 	activated := false
-	for {
-		pInfo, ok := p.podBackoffQ.Peek()
-		if !ok || pInfo == nil {
-			break
-		}
-		pod := pInfo.Pod
-		if p.isPodBackingoff(pInfo) {
-			break
-		}
-		_, err := p.podBackoffQ.Pop()
-		if err != nil {
-			logger.Error(err, "Unable to pop pod from backoff queue despite backoff completion", "pod", klog.KObj(pod))
-			break
-		}
+	podsCompletedBackoff := p.backoffQ.popAllBackoffCompleted(logger)
+	for _, pInfo := range podsCompletedBackoff {
 		if added := p.moveToActiveQ(logger, pInfo, framework.BackoffComplete); added {
 			activated = true
 		}
 	}
-
 	if activated {
 		p.activeQ.broadcast()
 	}
@ -928,10 +948,8 @@ func (p *PriorityQueue) Update(logger klog.Logger, oldPod, newPod *v1.Pod) {
 		}

 		// If the pod is in the backoff queue, update it there.
-		if pInfo, exists := p.podBackoffQ.Get(oldPodInfo); exists {
-			_ = pInfo.Update(newPod)
+		if pInfo := p.backoffQ.update(newPod, oldPodInfo); pInfo != nil {
 			p.UpdateNominatedPod(logger, oldPod, pInfo.PodInfo)
-			p.podBackoffQ.AddOrUpdate(pInfo)
 			return
 		}
 	}
@ -953,7 +971,7 @@ func (p *PriorityQueue) Update(logger klog.Logger, oldPod, newPod *v1.Pod) {
 					logger.V(5).Info("Pod moved to an internal scheduling queue because the Pod is updated", "pod", klog.KObj(newPod), "event", evt.Label(), "queue", queue)
 					p.unschedulablePods.delete(pInfo.Pod, gated)
 				}
-				if queue == activeQ {
+				if queue == activeQ || (p.isPopFromBackoffQEnabled && queue == backoffQ) {
 					p.activeQ.broadcast()
 					break
 				}
@ -961,21 +979,26 @@ func (p *PriorityQueue) Update(logger klog.Logger, oldPod, newPod *v1.Pod) {
 			return
 		}
 		if isPodUpdated(oldPod, newPod) {
-			if p.isPodBackingoff(pInfo) {
-				p.podBackoffQ.AddOrUpdate(pInfo)
-				p.unschedulablePods.delete(pInfo.Pod, gated)
-				logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", framework.EventUnscheduledPodUpdate.Label(), "queue", backoffQ)
+			// Pod might have completed its backoff time while being in unschedulablePods,
+			// so we should check isPodBackingoff before moving the pod to backoffQ.
+			if p.backoffQ.isPodBackingoff(pInfo) {
+				if added := p.moveToBackoffQ(logger, pInfo, framework.EventUnscheduledPodUpdate.Label()); added {
+					p.unschedulablePods.delete(pInfo.Pod, gated)
+					if p.isPopFromBackoffQEnabled {
+						p.activeQ.broadcast()
+					}
+				}
 				return
 			}

-			if added := p.moveToActiveQ(logger, pInfo, framework.BackoffComplete); added {
+			if added := p.moveToActiveQ(logger, pInfo, framework.EventUnscheduledPodUpdate.Label()); added {
 				p.activeQ.broadcast()
 			}
 			return
 		}

 		// Pod update didn't make it schedulable, keep it in the unschedulable queue.
-		p.unschedulablePods.addOrUpdate(pInfo)
+		p.unschedulablePods.addOrUpdate(pInfo, framework.EventUnscheduledPodUpdate.Label())
 		return
 	}
 	// If pod is not in any of the queues, we put it in the active queue.
@ -992,12 +1015,14 @@ func (p *PriorityQueue) Delete(pod *v1.Pod) {
 	defer p.lock.Unlock()
 	p.DeleteNominatedPodIfExists(pod)
 	pInfo := newQueuedPodInfoForLookup(pod)
-	if err := p.activeQ.delete(pInfo); err != nil {
-		// The item was probably not found in the activeQ.
-		p.podBackoffQ.Delete(pInfo)
-		if pInfo = p.unschedulablePods.get(pod); pInfo != nil {
-			p.unschedulablePods.delete(pod, pInfo.Gated)
-		}
+	if err := p.activeQ.delete(pInfo); err == nil {
+		return
+	}
+	if deleted := p.backoffQ.delete(pInfo); deleted {
+		return
+	}
+	if pInfo = p.unschedulablePods.get(pod); pInfo != nil {
+		p.unschedulablePods.delete(pod, pInfo.Gated)
 	}
 }

@ -1065,28 +1090,24 @@ func (p *PriorityQueue) MoveAllToActiveOrBackoffQueue(logger klog.Logger, event
 // NOTE: this function assumes lock has been acquired in caller
 func (p *PriorityQueue) requeuePodViaQueueingHint(logger klog.Logger, pInfo *framework.QueuedPodInfo, strategy queueingStrategy, event string) string {
 	if strategy == queueSkip {
-		p.unschedulablePods.addOrUpdate(pInfo)
-		metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", event).Inc()
+		p.unschedulablePods.addOrUpdate(pInfo, event)
 		return unschedulablePods
 	}

-	if strategy == queueAfterBackoff && p.isPodBackingoff(pInfo) {
-		p.podBackoffQ.AddOrUpdate(pInfo)
-		metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", event).Inc()
-		return backoffQ
+	// Pod might have completed its backoff time while being in unschedulablePods,
+	// so we should check isPodBackingoff before moving the pod to backoffQ.
+	if strategy == queueAfterBackoff && p.backoffQ.isPodBackingoff(pInfo) {
+		if added := p.moveToBackoffQ(logger, pInfo, event); added {
+			return backoffQ
+		}
+		return unschedulablePods
 	}

 	// Reach here if schedulingHint is QueueImmediately, or schedulingHint is Queue but the pod is not backing off.
 	if added := p.moveToActiveQ(logger, pInfo, event); added {
 		return activeQ
 	}
-	if pInfo.Gated {
-		// In case the pod is gated, the Pod is pushed back to unschedulable Pods pool in moveToActiveQ.
-		return unschedulablePods
-	}
-
-	p.unschedulablePods.addOrUpdate(pInfo)
-	metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", framework.ScheduleAttemptFailure).Inc()
+	// Pod is gated. We don't have to push it back to unschedulable queue, because moveToActiveQ should already have done that.
 	return unschedulablePods
 }

@ -1128,7 +1149,7 @@ func (p *PriorityQueue) movePodsToActiveOrBackoffQueue(logger klog.Logger, podIn
 		p.unschedulablePods.delete(pInfo.Pod, pInfo.Gated)
 		queue := p.requeuePodViaQueueingHint(logger, pInfo, schedulingHint, event.Label())
 		logger.V(4).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event.Label(), "queue", queue, "hint", schedulingHint)
-		if queue == activeQ {
+		if queue == activeQ || (p.isPopFromBackoffQEnabled && queue == backoffQ) {
 			activated = true
 		}
 	}
@ -1180,6 +1201,20 @@ func (p *PriorityQueue) PodsInActiveQ() []*v1.Pod {
 	return p.activeQ.list()
 }

+// PodsInBackoffQ returns all the Pods in the backoffQ.
+func (p *PriorityQueue) PodsInBackoffQ() []*v1.Pod {
+	return p.backoffQ.list()
+}
+
+// UnschedulablePods returns all the pods in unschedulable state.
+func (p *PriorityQueue) UnschedulablePods() []*v1.Pod {
+	var result []*v1.Pod
+	for _, pInfo := range p.unschedulablePods.podInfoMap {
+		result = append(result, pInfo.Pod)
+	}
+	return result
+}
+
 var pendingPodsSummary = "activeQ:%v; backoffQ:%v; unschedulablePods:%v"

 // GetPod searches for a pod in the activeQ, backoffQ, and unschedulablePods.
@ -1197,7 +1232,7 @@ func (p *PriorityQueue) GetPod(name, namespace string) (pInfo *framework.QueuedP
 			},
 		},
 	}
-	if pInfo, ok = p.podBackoffQ.Get(pInfoLookup); ok {
+	if pInfo, ok = p.backoffQ.get(pInfoLookup); ok {
 		return pInfo, true
 	}
 	if pInfo = p.unschedulablePods.get(pInfoLookup.Pod); pInfo != nil {
@ -1205,7 +1240,7 @@ func (p *PriorityQueue) GetPod(name, namespace string) (pInfo *framework.QueuedP
 	}

 	p.activeQ.underRLock(func(unlockedActiveQ unlockedActiveQueueReader) {
-		pInfo, ok = unlockedActiveQ.Get(pInfoLookup)
+		pInfo, ok = unlockedActiveQ.get(pInfoLookup)
 	})
 	return
 }
@ -1216,15 +1251,15 @@ func (p *PriorityQueue) GetPod(name, namespace string) (pInfo *framework.QueuedP
 func (p *PriorityQueue) PendingPods() ([]*v1.Pod, string) {
 	p.lock.RLock()
 	defer p.lock.RUnlock()
-	result := p.activeQ.list()
+	result := p.PodsInActiveQ()
 	activeQLen := len(result)
-	for _, pInfo := range p.podBackoffQ.List() {
-		result = append(result, pInfo.Pod)
-	}
+	backoffQPods := p.PodsInBackoffQ()
+	backoffQLen := len(backoffQPods)
+	result = append(result, backoffQPods...)
 	for _, pInfo := range p.unschedulablePods.podInfoMap {
 		result = append(result, pInfo.Pod)
 	}
-	return result, fmt.Sprintf(pendingPodsSummary, activeQLen, p.podBackoffQ.Len(), len(p.unschedulablePods.podInfoMap))
+	return result, fmt.Sprintf(pendingPodsSummary, activeQLen, backoffQLen, len(p.unschedulablePods.podInfoMap))
 }

 // Note: this function assumes the caller locks both p.lock.RLock and p.activeQ.getLock().RLock.
@ -1232,7 +1267,7 @@ func (p *PriorityQueue) nominatedPodToInfo(np podRef, unlockedActiveQ unlockedAc
 	pod := np.toPod()
 	pInfoLookup := newQueuedPodInfoForLookup(pod)

-	queuedPodInfo, exists := unlockedActiveQ.Get(pInfoLookup)
+	queuedPodInfo, exists := unlockedActiveQ.get(pInfoLookup)
 	if exists {
 		return queuedPodInfo.PodInfo
 	}
@ -1242,7 +1277,7 @@ func (p *PriorityQueue) nominatedPodToInfo(np podRef, unlockedActiveQ unlockedAc
 		return queuedPodInfo.PodInfo
 	}

-	queuedPodInfo, exists = p.podBackoffQ.Get(pInfoLookup)
+	queuedPodInfo, exists = p.backoffQ.get(pInfoLookup)
 	if exists {
 		return queuedPodInfo.PodInfo
 	}
@ -1276,12 +1311,6 @@ func (p *PriorityQueue) NominatedPodsForNode(nodeName string) []*framework.PodIn
 	return pods
 }

-func (p *PriorityQueue) podsCompareBackoffCompleted(pInfo1, pInfo2 *framework.QueuedPodInfo) bool {
-	bo1 := p.getBackoffTime(pInfo1)
-	bo2 := p.getBackoffTime(pInfo2)
-	return bo1.Before(bo2)
-}
-
 // newQueuedPodInfo builds a QueuedPodInfo object.
 func (p *PriorityQueue) newQueuedPodInfo(pod *v1.Pod, plugins ...string) *framework.QueuedPodInfo {
 	now := p.clock.Now()
@ -1296,33 +1325,6 @@ func (p *PriorityQueue) newQueuedPodInfo(pod *v1.Pod, plugins ...string) *framew
 	}
 }

-// getBackoffTime returns the time that podInfo completes backoff
-func (p *PriorityQueue) getBackoffTime(podInfo *framework.QueuedPodInfo) time.Time {
-	duration := p.calculateBackoffDuration(podInfo)
-	backoffTime := podInfo.Timestamp.Add(duration)
-	return backoffTime
-}
-
-// calculateBackoffDuration is a helper function for calculating the backoffDuration
-// based on the number of attempts the pod has made.
-func (p *PriorityQueue) calculateBackoffDuration(podInfo *framework.QueuedPodInfo) time.Duration {
-	if podInfo.Attempts == 0 {
-		// When the Pod hasn't experienced any scheduling attempts,
-		// they aren't obliged to get a backoff penalty at all.
-		return 0
-	}
-
-	duration := p.podInitialBackoffDuration
-	for i := 1; i < podInfo.Attempts; i++ {
-		// Use subtraction instead of addition or multiplication to avoid overflow.
-		if duration > p.podMaxBackoffDuration-duration {
-			return p.podMaxBackoffDuration
-		}
-		duration += duration
-	}
-	return duration
-}
-
 // UnschedulablePods holds pods that cannot be scheduled. This data structure
 // is used to implement unschedulablePods.
 type UnschedulablePods struct {
@ -1335,7 +1337,8 @@ type UnschedulablePods struct {
 }

 // addOrUpdate adds a pod to the unschedulable podInfoMap.
-func (u *UnschedulablePods) addOrUpdate(pInfo *framework.QueuedPodInfo) {
+// The event should show which event triggered the addition and is used for the metric recording.
+func (u *UnschedulablePods) addOrUpdate(pInfo *framework.QueuedPodInfo, event string) {
 	podID := u.keyFunc(pInfo.Pod)
 	if _, exists := u.podInfoMap[podID]; !exists {
 		if pInfo.Gated && u.gatedRecorder != nil {
@ -1343,6 +1346,7 @@ func (u *UnschedulablePods) addOrUpdate(pInfo *framework.QueuedPodInfo) {
 		} else if !pInfo.Gated && u.unschedulableRecorder != nil {
 			u.unschedulableRecorder.Inc()
 		}
+		metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", event).Inc()
 	}
 	u.podInfoMap[podID] = pInfo
 }