Migrate from snapClient.VolumesnapshotV1alpha1Client to

snapClient.SnapshotV1alpha1Client and also update kube dependency Signed-off-by: Humble Chirammal <hchiramm@redhat.com>
2025-06-13 10:33:35 +00:00 · 2019-06-24 14:38:09 +05:30
parent 3bc6771df8
commit 22ff5c0911
1031 changed files with 34242 additions and 177906 deletions
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/csi_volume_predicate.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/csi_volume_predicate.go
@ -20,6 +20,7 @@ import (
 	"fmt"

 	"k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/util/rand"
 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 	"k8s.io/klog"
 	"k8s.io/kubernetes/pkg/features"
@ -29,16 +30,20 @@ import (

 // CSIMaxVolumeLimitChecker defines predicate needed for counting CSI volumes
 type CSIMaxVolumeLimitChecker struct {
-	pvInfo  PersistentVolumeInfo
-	pvcInfo PersistentVolumeClaimInfo
+	pvInfo               PersistentVolumeInfo
+	pvcInfo              PersistentVolumeClaimInfo
+	scInfo               StorageClassInfo
+	randomVolumeIDPrefix string
 }

 // NewCSIMaxVolumeLimitPredicate returns a predicate for counting CSI volumes
 func NewCSIMaxVolumeLimitPredicate(
-	pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) FitPredicate {
+	pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo, scInfo StorageClassInfo) FitPredicate {
 	c := &CSIMaxVolumeLimitChecker{
-		pvInfo:  pvInfo,
-		pvcInfo: pvcInfo,
+		pvInfo:               pvInfo,
+		pvcInfo:              pvcInfo,
+		scInfo:               scInfo,
+		randomVolumeIDPrefix: rand.String(32),
 	}
 	return c.attachableLimitPredicate
 }
@ -129,28 +134,70 @@ func (c *CSIMaxVolumeLimitChecker) filterAttachableVolumes(
 			continue
 		}

-		pvName := pvc.Spec.VolumeName
-		// TODO - the actual handling of unbound PVCs will be fixed by late binding design.
-		if pvName == "" {
-			klog.V(4).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName)
+		driverName, volumeHandle := c.getCSIDriver(pvc)
+		// if we can't find driver name or volume handle - we don't count this volume.
+		if driverName == "" || volumeHandle == "" {
 			continue
 		}
-		pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName)
-
-		if err != nil {
-			klog.V(4).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
-			continue
-		}
-
-		csiSource := pv.Spec.PersistentVolumeSource.CSI
-		if csiSource == nil {
-			klog.V(4).Infof("Not considering non-CSI volume %s/%s", namespace, pvcName)
-			continue
-		}
-		driverName := csiSource.Driver
 		volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
-		result[csiSource.VolumeHandle] = volumeLimitKey
+		result[volumeHandle] = volumeLimitKey

 	}
 	return nil
 }
+
+func (c *CSIMaxVolumeLimitChecker) getCSIDriver(pvc *v1.PersistentVolumeClaim) (string, string) {
+	pvName := pvc.Spec.VolumeName
+	namespace := pvc.Namespace
+	pvcName := pvc.Name
+
+	placeHolderCSIDriver := ""
+	placeHolderHandle := ""
+	if pvName == "" {
+		klog.V(5).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName)
+		return c.getDriverNameFromSC(pvc)
+	}
+	pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName)
+
+	if err != nil {
+		klog.V(4).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
+		// If we can't fetch PV associated with PVC, may be it got deleted
+		// or PVC was prebound to a PVC that hasn't been created yet.
+		// fallback to using StorageClass for volume counting
+		return c.getDriverNameFromSC(pvc)
+	}
+
+	csiSource := pv.Spec.PersistentVolumeSource.CSI
+	if csiSource == nil {
+		klog.V(5).Infof("Not considering non-CSI volume %s/%s", namespace, pvcName)
+		return placeHolderCSIDriver, placeHolderHandle
+	}
+	return csiSource.Driver, csiSource.VolumeHandle
+}
+
+func (c *CSIMaxVolumeLimitChecker) getDriverNameFromSC(pvc *v1.PersistentVolumeClaim) (string, string) {
+	namespace := pvc.Namespace
+	pvcName := pvc.Name
+	scName := pvc.Spec.StorageClassName
+
+	placeHolderCSIDriver := ""
+	placeHolderHandle := ""
+	if scName == nil {
+		// if StorageClass is not set or found, then PVC must be using immediate binding mode
+		// and hence it must be bound before scheduling. So it is safe to not count it.
+		klog.V(5).Infof("pvc %s/%s has no storageClass", namespace, pvcName)
+		return placeHolderCSIDriver, placeHolderHandle
+	}
+
+	storageClass, err := c.scInfo.GetStorageClassInfo(*scName)
+	if err != nil {
+		klog.V(5).Infof("no storage %s found for pvc %s/%s", *scName, namespace, pvcName)
+		return placeHolderCSIDriver, placeHolderHandle
+	}
+
+	// We use random prefix to avoid conflict with volume-ids. If PVC is bound in the middle
+	// predicate and there is another pod(on same node) that uses same volume then we will overcount
+	// the volume and consider both volumes as different.
+	volumeHandle := fmt.Sprintf("%s-%s/%s", c.randomVolumeIDPrefix, namespace, pvcName)
+	return storageClass.Provisioner, volumeHandle
+}
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/metadata.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/metadata.go
@ -55,13 +55,6 @@ type topologyPair struct {
 	value string
 }

-//  Note that predicateMetadata and matchingPodAntiAffinityTerm need to be declared in the same file
-//  due to the way declarations are processed in predicate declaration unit tests.
-type matchingPodAntiAffinityTerm struct {
-	term *v1.PodAffinityTerm
-	node *v1.Node
-}
-
 type podSet map[*v1.Pod]struct{}

 type topologyPairSet map[topologyPair]struct{}
@ -107,13 +100,10 @@ var _ PredicateMetadata = &predicateMetadata{}
 // and used to modify the return values of PredicateMetadataProducer
 type predicateMetadataProducer func(pm *predicateMetadata)

-var predicateMetaProducerRegisterLock sync.Mutex
 var predicateMetadataProducers = make(map[string]predicateMetadataProducer)

 // RegisterPredicateMetadataProducer registers a PredicateMetadataProducer.
 func RegisterPredicateMetadataProducer(predicateName string, precomp predicateMetadataProducer) {
-	predicateMetaProducerRegisterLock.Lock()
-	defer predicateMetaProducerRegisterLock.Unlock()
 	predicateMetadataProducers[predicateName] = precomp
 }

@ -399,6 +389,8 @@ func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*s
 		}
 	}

+	ctx, cancel := context.WithCancel(context.Background())
+
 	processNode := func(i int) {
 		nodeInfo := nodeInfoMap[allNodeNames[i]]
 		node := nodeInfo.Node()
@ -410,12 +402,13 @@ func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*s
 			existingPodTopologyMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, existingPod, node)
 			if err != nil {
 				catchError(err)
+				cancel()
 				return
 			}
 			appendTopologyPairsMaps(existingPodTopologyMaps)
 		}
 	}
-	workqueue.ParallelizeUntil(context.TODO(), 16, len(allNodeNames), processNode)
+	workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processNode)
 	return topologyMaps, firstError
 }

@ -464,6 +457,8 @@ func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[s
 	}
 	antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)

+	ctx, cancel := context.WithCancel(context.Background())
+
 	processNode := func(i int) {
 		nodeInfo := nodeInfoMap[allNodeNames[i]]
 		node := nodeInfo.Node()
@ -489,6 +484,7 @@ func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[s
 				selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
 				if err != nil {
 					catchError(err)
+					cancel()
 					return
 				}
 				if priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
@ -503,7 +499,7 @@ func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[s
 			appendResult(node.Name, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps)
 		}
 	}
-	workqueue.ParallelizeUntil(context.TODO(), 16, len(allNodeNames), processNode)
+	workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processNode)
 	return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, firstError
 }

--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/predicates.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/predicates.go
@ -173,11 +173,6 @@ func Ordering() []string {
 	return predicatesOrdering
 }

-// SetPredicatesOrdering sets the ordering of predicates.
-func SetPredicatesOrdering(names []string) {
-	predicatesOrdering = names
-}
-
 // GetPersistentVolumeInfo returns a persistent volume object by PV ID.
 func (c *CachedPersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.PersistentVolume, error) {
 	return c.Get(pvID)
@ -687,7 +682,7 @@ func (c *VolumeZoneChecker) predicate(pod *v1.Pod, meta PredicateMetadata, nodeI
 						}
 					}
 				}
-				return false, nil, fmt.Errorf("PersistentVolumeClaim is not bound: %q", pvcName)
+				return false, nil, fmt.Errorf("PersistentVolumeClaim was not found: %q", pvcName)
 			}

 			pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName)
@ -696,7 +691,7 @@ func (c *VolumeZoneChecker) predicate(pod *v1.Pod, meta PredicateMetadata, nodeI
 			}

 			if pv == nil {
-				return false, nil, fmt.Errorf("PersistentVolume not found: %q", pvName)
+				return false, nil, fmt.Errorf("PersistentVolume was not found: %q", pvName)
 			}

 			for k, v := range pv.ObjectMeta.Labels {
@ -978,16 +973,18 @@ func (s *ServiceAffinity) serviceAffinityMetadataProducer(pm *predicateMetadata)
 		return
 	}
 	pm.serviceAffinityInUse = true
-	var errSvc, errList error
+	var err error
 	// Store services which match the pod.
-	pm.serviceAffinityMatchingPodServices, errSvc = s.serviceLister.GetPodServices(pm.pod)
-	selector := CreateSelectorFromLabels(pm.pod.Labels)
-	allMatches, errList := s.podLister.List(selector)
-
-	// In the future maybe we will return them as part of the function.
-	if errSvc != nil || errList != nil {
-		klog.Errorf("Some Error were found while precomputing svc affinity: \nservices:%v , \npods:%v", errSvc, errList)
+	pm.serviceAffinityMatchingPodServices, err = s.serviceLister.GetPodServices(pm.pod)
+	if err != nil {
+		klog.Errorf("Error precomputing service affinity: could not list services: %v", err)
 	}
+	selector := CreateSelectorFromLabels(pm.pod.Labels)
+	allMatches, err := s.podLister.List(selector)
+	if err != nil {
+		klog.Errorf("Error precomputing service affinity: could not list pods: %v", err)
+	}
+
 	// consider only the pods that belong to the same namespace
 	pm.serviceAffinityMatchingPodList = FilterPodsByNamespace(allMatches, pm.pod.Namespace)
 }
@ -1093,6 +1090,9 @@ func PodFitsHostPorts(pod *v1.Pod, meta PredicateMetadata, nodeInfo *schedulerno

 // search two arrays and return true if they have at least one common element; return false otherwise
 func haveOverlap(a1, a2 []string) bool {
+	if len(a1) > len(a2) {
+		a1, a2 = a2, a1
+	}
 	m := map[string]bool{}

 	for _, val := range a1 {
@ -1293,11 +1293,11 @@ func getMatchingAntiAffinityTopologyPairsOfPod(newPod *v1.Pod, existingPod *v1.P

 	topologyMaps := newTopologyPairsMaps()
 	for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
-		namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term)
 		selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
 		if err != nil {
 			return nil, err
 		}
+		namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(existingPod, &term)
 		if priorityutil.PodMatchesTermsNamespaceAndSelector(newPod, namespaces, selector) {
 			if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
 				pair := topologyPair{key: term.TopologyKey, value: topologyValue}
@ -1315,7 +1315,8 @@ func (c *PodAffinityChecker) getMatchingAntiAffinityTopologyPairsOfPods(pod *v1.
 		existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
 		if err != nil {
 			if apierrors.IsNotFound(err) {
-				klog.Errorf("Node not found, %v", existingPod.Spec.NodeName)
+				klog.Errorf("Pod %s has NodeName %q but node is not found",
+					podName(existingPod), existingPod.Spec.NodeName)
 				continue
 			}
 			return nil, err
@ -1344,12 +1345,12 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta
 		// present in nodeInfo. Pods on other nodes pass the filter.
 		filteredPods, err := c.podLister.FilteredList(nodeInfo.Filter, labels.Everything())
 		if err != nil {
-			errMessage := fmt.Sprintf("Failed to get all pods, %+v", err)
+			errMessage := fmt.Sprintf("Failed to get all pods: %v", err)
 			klog.Error(errMessage)
 			return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
 		}
 		if topologyMaps, err = c.getMatchingAntiAffinityTopologyPairsOfPods(pod, filteredPods); err != nil {
-			errMessage := fmt.Sprintf("Failed to get all terms that pod %+v matches, err: %+v", podName(pod), err)
+			errMessage := fmt.Sprintf("Failed to get all terms that match pod %s: %v", podName(pod), err)
 			klog.Error(errMessage)
 			return ErrExistingPodsAntiAffinityRulesNotMatch, errors.New(errMessage)
 		}
@ -1454,7 +1455,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
 			if !matchFound && len(affinityTerms) > 0 {
 				affTermsMatch, termsSelectorMatch, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, affinityTerms)
 				if err != nil {
-					errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
+					errMessage := fmt.Sprintf("Cannot schedule pod %s onto node %s, because of PodAffinity: %v", podName(pod), node.Name, err)
 					klog.Error(errMessage)
 					return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
 				}
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/utils.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/utils.go
@ -77,3 +77,13 @@ func portsConflict(existingPorts schedulernodeinfo.HostPortInfo, wantPorts []*v1

 	return false
 }
+
+// SetPredicatesOrderingDuringTest sets the predicatesOrdering to the specified
+// value, and returns a function that restores the original value.
+func SetPredicatesOrderingDuringTest(value []string) func() {
+	origVal := predicatesOrdering
+	predicatesOrdering = value
+	return func() {
+		predicatesOrdering = origVal
+	}
+}
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/metrics/metric_recorder.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/metrics/metric_recorder.go
@ -0,0 +1,72 @@
+/*
+Copyright 2019 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package metrics
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+// MetricRecorder represents a metric recorder which takes action when the
+// metric Inc(), Dec() and Clear()
+type MetricRecorder interface {
+	Inc()
+	Dec()
+	Clear()
+}
+
+var _ MetricRecorder = &PendingPodsRecorder{}
+
+// PendingPodsRecorder is an implementation of MetricRecorder
+type PendingPodsRecorder struct {
+	recorder prometheus.Gauge
+}
+
+// NewActivePodsRecorder returns ActivePods in a Prometheus metric fashion
+func NewActivePodsRecorder() *PendingPodsRecorder {
+	return &PendingPodsRecorder{
+		recorder: ActivePods,
+	}
+}
+
+// NewUnschedulablePodsRecorder returns UnschedulablePods in a Prometheus metric fashion
+func NewUnschedulablePodsRecorder() *PendingPodsRecorder {
+	return &PendingPodsRecorder{
+		recorder: UnschedulablePods,
+	}
+}
+
+// NewBackoffPodsRecorder returns BackoffPods in a Prometheus metric fashion
+func NewBackoffPodsRecorder() *PendingPodsRecorder {
+	return &PendingPodsRecorder{
+		recorder: BackoffPods,
+	}
+}
+
+// Inc increases a metric counter by 1, in an atomic way
+func (r *PendingPodsRecorder) Inc() {
+	r.recorder.Inc()
+}
+
+// Dec decreases a metric counter by 1, in an atomic way
+func (r *PendingPodsRecorder) Dec() {
+	r.recorder.Dec()
+}
+
+// Clear set a metric counter to 0, in an atomic way
+func (r *PendingPodsRecorder) Clear() {
+	r.recorder.Set(float64(0))
+}
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/metrics/metrics.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/metrics/metrics.go
@ -21,7 +21,7 @@ import (
 	"time"

 	"github.com/prometheus/client_golang/prometheus"
-	"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
+	volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling"
 )

 const (
@ -192,6 +192,16 @@ var (
 			Help:      "Total preemption attempts in the cluster till now",
 		})

+	pendingPods = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Subsystem: SchedulerSubsystem,
+			Name:      "pending_pods",
+			Help:      "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.",
+		}, []string{"queue"})
+	ActivePods        = pendingPods.With(prometheus.Labels{"queue": "active"})
+	BackoffPods       = pendingPods.With(prometheus.Labels{"queue": "backoff"})
+	UnschedulablePods = pendingPods.With(prometheus.Labels{"queue": "unschedulable"})
+
 	metricsList = []prometheus.Collector{
 		scheduleAttempts,
 		SchedulingLatency,
@ -210,6 +220,7 @@ var (
 		DeprecatedSchedulingAlgorithmPremptionEvaluationDuration,
 		PreemptionVictims,
 		PreemptionAttempts,
+		pendingPods,
 	}
 )

@ -223,7 +234,7 @@ func Register() {
 			prometheus.MustRegister(metric)
 		}

-		persistentvolume.RegisterVolumeSchedulingMetrics()
+		volumescheduling.RegisterVolumeSchedulingMetrics()
 	})
 }

--- a/vendor/k8s.io/kubernetes/pkg/scheduler/util/backoff_utils.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/util/backoff_utils.go
@ -1,220 +0,0 @@
-/*
-Copyright 2017 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package util
-
-import (
-	"fmt"
-	"sync"
-	"sync/atomic"
-	"time"
-
-	ktypes "k8s.io/apimachinery/pkg/types"
-
-	"k8s.io/klog"
-)
-
-type clock interface {
-	Now() time.Time
-}
-
-type realClock struct{}
-
-func (realClock) Now() time.Time {
-	return time.Now()
-}
-
-// backoffEntry is single threaded.  in particular, it only allows a single action to be waiting on backoff at a time.
-// It is also not safe to copy this object.
-type backoffEntry struct {
-	initialized bool
-	podName     ktypes.NamespacedName
-	backoff     time.Duration
-	lastUpdate  time.Time
-	reqInFlight int32
-}
-
-// tryLock attempts to acquire a lock via atomic compare and swap.
-// returns true if the lock was acquired, false otherwise
-func (b *backoffEntry) tryLock() bool {
-	return atomic.CompareAndSwapInt32(&b.reqInFlight, 0, 1)
-}
-
-// unlock returns the lock.  panics if the lock isn't held
-func (b *backoffEntry) unlock() {
-	if !atomic.CompareAndSwapInt32(&b.reqInFlight, 1, 0) {
-		panic(fmt.Sprintf("unexpected state on unlocking: %+v", b))
-	}
-}
-
-// backoffTime returns the Time when a backoffEntry completes backoff
-func (b *backoffEntry) backoffTime() time.Time {
-	return b.lastUpdate.Add(b.backoff)
-}
-
-// getBackoff returns the duration until this entry completes backoff
-func (b *backoffEntry) getBackoff(maxDuration time.Duration) time.Duration {
-	if !b.initialized {
-		b.initialized = true
-		return b.backoff
-	}
-	newDuration := b.backoff * 2
-	if newDuration > maxDuration {
-		newDuration = maxDuration
-	}
-	b.backoff = newDuration
-	klog.V(4).Infof("Backing off %s", newDuration.String())
-	return newDuration
-}
-
-// PodBackoff is used to restart a pod with back-off delay.
-type PodBackoff struct {
-	// expiryQ stores backoffEntry orderedy by lastUpdate until they reach maxDuration and are GC'd
-	expiryQ         *Heap
-	lock            sync.Mutex
-	clock           clock
-	defaultDuration time.Duration
-	maxDuration     time.Duration
-}
-
-// MaxDuration returns the max time duration of the back-off.
-func (p *PodBackoff) MaxDuration() time.Duration {
-	return p.maxDuration
-}
-
-// CreateDefaultPodBackoff creates a default pod back-off object.
-func CreateDefaultPodBackoff() *PodBackoff {
-	return CreatePodBackoff(1*time.Second, 60*time.Second)
-}
-
-// CreatePodBackoff creates a pod back-off object by default duration and max duration.
-func CreatePodBackoff(defaultDuration, maxDuration time.Duration) *PodBackoff {
-	return CreatePodBackoffWithClock(defaultDuration, maxDuration, realClock{})
-}
-
-// CreatePodBackoffWithClock creates a pod back-off object by default duration, max duration and clock.
-func CreatePodBackoffWithClock(defaultDuration, maxDuration time.Duration, clock clock) *PodBackoff {
-	return &PodBackoff{
-		expiryQ:         NewHeap(backoffEntryKeyFunc, backoffEntryCompareUpdate),
-		clock:           clock,
-		defaultDuration: defaultDuration,
-		maxDuration:     maxDuration,
-	}
-}
-
-// getEntry returns the backoffEntry for a given podID
-func (p *PodBackoff) getEntry(podID ktypes.NamespacedName) *backoffEntry {
-	entry, exists, _ := p.expiryQ.GetByKey(podID.String())
-	var be *backoffEntry
-	if !exists {
-		be = &backoffEntry{
-			initialized: false,
-			podName:     podID,
-			backoff:     p.defaultDuration,
-		}
-		p.expiryQ.Update(be)
-	} else {
-		be = entry.(*backoffEntry)
-	}
-	return be
-}
-
-// BackoffPod updates the backoff for a podId and returns the duration until backoff completion
-func (p *PodBackoff) BackoffPod(podID ktypes.NamespacedName) time.Duration {
-	p.lock.Lock()
-	defer p.lock.Unlock()
-	entry := p.getEntry(podID)
-	entry.lastUpdate = p.clock.Now()
-	p.expiryQ.Update(entry)
-	return entry.getBackoff(p.maxDuration)
-}
-
-// TryBackoffAndWait tries to acquire the backoff lock
-func (p *PodBackoff) TryBackoffAndWait(podID ktypes.NamespacedName, stop <-chan struct{}) bool {
-	p.lock.Lock()
-	entry := p.getEntry(podID)
-
-	if !entry.tryLock() {
-		p.lock.Unlock()
-		return false
-	}
-	defer entry.unlock()
-	duration := entry.getBackoff(p.maxDuration)
-	p.lock.Unlock()
-	select {
-	case <-time.After(duration):
-		return true
-	case <-stop:
-		return false
-	}
-}
-
-// Gc execute garbage collection on the pod back-off.
-func (p *PodBackoff) Gc() {
-	p.lock.Lock()
-	defer p.lock.Unlock()
-	now := p.clock.Now()
-	var be *backoffEntry
-	for {
-		entry := p.expiryQ.Peek()
-		if entry == nil {
-			break
-		}
-		be = entry.(*backoffEntry)
-		if now.Sub(be.lastUpdate) > p.maxDuration {
-			p.expiryQ.Pop()
-		} else {
-			break
-		}
-	}
-}
-
-// GetBackoffTime returns the time that podID completes backoff
-func (p *PodBackoff) GetBackoffTime(podID ktypes.NamespacedName) (time.Time, bool) {
-	p.lock.Lock()
-	defer p.lock.Unlock()
-	rawBe, exists, _ := p.expiryQ.GetByKey(podID.String())
-	if !exists {
-		return time.Time{}, false
-	}
-	be := rawBe.(*backoffEntry)
-	return be.lastUpdate.Add(be.backoff), true
-}
-
-// ClearPodBackoff removes all tracking information for podID (clears expiry)
-func (p *PodBackoff) ClearPodBackoff(podID ktypes.NamespacedName) bool {
-	p.lock.Lock()
-	defer p.lock.Unlock()
-	entry, exists, _ := p.expiryQ.GetByKey(podID.String())
-	if exists {
-		err := p.expiryQ.Delete(entry)
-		return err == nil
-	}
-	return false
-}
-
-// backoffEntryKeyFunc is the keying function used for mapping a backoffEntry to string for heap
-func backoffEntryKeyFunc(b interface{}) (string, error) {
-	be := b.(*backoffEntry)
-	return be.podName.String(), nil
-}
-
-// backoffEntryCompareUpdate returns true when b1's backoff time is before b2's
-func backoffEntryCompareUpdate(b1, b2 interface{}) bool {
-	be1 := b1.(*backoffEntry)
-	be2 := b2.(*backoffEntry)
-	return be1.lastUpdate.Before(be2.lastUpdate)
-}
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/util/heap.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/util/heap.go
@ -25,6 +25,7 @@ import (
 	"fmt"

 	"k8s.io/client-go/tools/cache"
+	"k8s.io/kubernetes/pkg/scheduler/metrics"
 )

 // KeyFunc is a function type to get the key from an object.
@ -127,6 +128,9 @@ type Heap struct {
 	// data stores objects and has a queue that keeps their ordering according
 	// to the heap invariant.
 	data *heapData
+	// metricRecorder updates the counter when elements of a heap get added or
+	// removed, and it does nothing if it's nil
+	metricRecorder metrics.MetricRecorder
 }

 // Add inserts an item, and puts it in the queue. The item is updated if it
@ -141,6 +145,9 @@ func (h *Heap) Add(obj interface{}) error {
 		heap.Fix(h.data, h.data.items[key].index)
 	} else {
 		heap.Push(h.data, &itemKeyValue{key, obj})
+		if h.metricRecorder != nil {
+			h.metricRecorder.Inc()
+		}
 	}
 	return nil
 }
@ -154,6 +161,9 @@ func (h *Heap) AddIfNotPresent(obj interface{}) error {
 	}
 	if _, exists := h.data.items[key]; !exists {
 		heap.Push(h.data, &itemKeyValue{key, obj})
+		if h.metricRecorder != nil {
+			h.metricRecorder.Inc()
+		}
 	}
 	return nil
 }
@ -172,6 +182,9 @@ func (h *Heap) Delete(obj interface{}) error {
 	}
 	if item, ok := h.data.items[key]; ok {
 		heap.Remove(h.data, item.index)
+		if h.metricRecorder != nil {
+			h.metricRecorder.Dec()
+		}
 		return nil
 	}
 	return fmt.Errorf("object not found")
@ -186,6 +199,9 @@ func (h *Heap) Peek() interface{} {
 func (h *Heap) Pop() (interface{}, error) {
 	obj := heap.Pop(h.data)
 	if obj != nil {
+		if h.metricRecorder != nil {
+			h.metricRecorder.Dec()
+		}
 		return obj, nil
 	}
 	return nil, fmt.Errorf("object was removed from heap data")
@ -225,6 +241,11 @@ func (h *Heap) Len() int {

 // NewHeap returns a Heap which can be used to queue up items to process.
 func NewHeap(keyFn KeyFunc, lessFn LessFunc) *Heap {
+	return NewHeapWithRecorder(keyFn, lessFn, nil)
+}
+
+// NewHeapWithRecorder wraps an optional metricRecorder to compose a Heap object.
+func NewHeapWithRecorder(keyFn KeyFunc, lessFn LessFunc, metricRecorder metrics.MetricRecorder) *Heap {
 	return &Heap{
 		data: &heapData{
 			items:    map[string]*heapItem{},
@ -232,5 +253,6 @@ func NewHeap(keyFn KeyFunc, lessFn LessFunc) *Heap {
 			keyFunc:  keyFn,
 			lessFunc: lessFn,
 		},
+		metricRecorder: metricRecorder,
 	}
 }
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/util/utils.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/util/utils.go
@ -20,9 +20,13 @@ import (
 	"sort"

 	"k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apiserver/pkg/util/feature"
+	"k8s.io/klog"
 	"k8s.io/kubernetes/pkg/apis/scheduling"
 	"k8s.io/kubernetes/pkg/features"
+	"k8s.io/kubernetes/pkg/scheduler/api"
+	"time"
 )

 // GetContainerPorts returns the used host ports of Pods: if 'port' was used, a 'port:true' pair
@ -52,7 +56,7 @@ func GetPodFullName(pod *v1.Pod) string {
 	return pod.Name + "_" + pod.Namespace
 }

-// GetPodPriority return priority of the given pod.
+// GetPodPriority returns priority of the given pod.
 func GetPodPriority(pod *v1.Pod) int32 {
 	if pod.Spec.Priority != nil {
 		return *pod.Spec.Priority
@ -63,6 +67,45 @@ func GetPodPriority(pod *v1.Pod) int32 {
 	return scheduling.DefaultPriorityWhenNoDefaultClassExists
 }

+// GetPodStartTime returns start time of the given pod.
+func GetPodStartTime(pod *v1.Pod) *metav1.Time {
+	if pod.Status.StartTime != nil {
+		return pod.Status.StartTime
+	}
+	// Should not reach here as the start time of a running time should not be nil
+	// Return current timestamp as the default value.
+	// This will not affect the calculation of earliest timestamp of all the pods on one node,
+	// because current timestamp is always after the StartTime of any pod in good state.
+	klog.Errorf("pod.Status.StartTime is nil for pod %s. Should not reach here.", pod.Name)
+	return &metav1.Time{Time: time.Now()}
+}
+
+// GetEarliestPodStartTime returns the earliest start time of all pods that
+// have the highest priority among all victims.
+func GetEarliestPodStartTime(victims *api.Victims) *metav1.Time {
+	if len(victims.Pods) == 0 {
+		// should not reach here.
+		klog.Errorf("victims.Pods is empty. Should not reach here.")
+		return nil
+	}
+
+	earliestPodStartTime := GetPodStartTime(victims.Pods[0])
+	highestPriority := GetPodPriority(victims.Pods[0])
+
+	for _, pod := range victims.Pods {
+		if GetPodPriority(pod) == highestPriority {
+			if GetPodStartTime(pod).Before(earliestPodStartTime) {
+				earliestPodStartTime = GetPodStartTime(pod)
+			}
+		} else if GetPodPriority(pod) > highestPriority {
+			highestPriority = GetPodPriority(pod)
+			earliestPodStartTime = GetPodStartTime(pod)
+		}
+	}
+
+	return earliestPodStartTime
+}
+
 // SortableList is a list that implements sort.Interface.
 type SortableList struct {
 	Items    []interface{}
@ -91,9 +134,15 @@ func (l *SortableList) Sort() {
 	sort.Sort(l)
 }

-// HigherPriorityPod return true when priority of the first pod is higher than
-// the second one. It takes arguments of the type "interface{}" to be used with
-// SortableList, but expects those arguments to be *v1.Pod.
-func HigherPriorityPod(pod1, pod2 interface{}) bool {
-	return GetPodPriority(pod1.(*v1.Pod)) > GetPodPriority(pod2.(*v1.Pod))
+// MoreImportantPod return true when priority of the first pod is higher than
+// the second one. If two pods' priorities are equal, compare their StartTime.
+// It takes arguments of the type "interface{}" to be used with SortableList,
+// but expects those arguments to be *v1.Pod.
+func MoreImportantPod(pod1, pod2 interface{}) bool {
+	p1 := GetPodPriority(pod1.(*v1.Pod))
+	p2 := GetPodPriority(pod2.(*v1.Pod))
+	if p1 != p2 {
+		return p1 > p2
+	}
+	return GetPodStartTime(pod1.(*v1.Pod)).Before(GetPodStartTime(pod2.(*v1.Pod)))
 }
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/volumebinder/volume_binder.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/volumebinder/volume_binder.go
@ -23,12 +23,12 @@ import (
 	coreinformers "k8s.io/client-go/informers/core/v1"
 	storageinformers "k8s.io/client-go/informers/storage/v1"
 	clientset "k8s.io/client-go/kubernetes"
-	"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
+	volumescheduling "k8s.io/kubernetes/pkg/controller/volume/scheduling"
 )

 // VolumeBinder sets up the volume binding library
 type VolumeBinder struct {
-	Binder persistentvolume.SchedulerVolumeBinder
+	Binder volumescheduling.SchedulerVolumeBinder
 }

 // NewVolumeBinder sets up the volume binding library and binding queue
@ -41,14 +41,14 @@ func NewVolumeBinder(
 	bindTimeout time.Duration) *VolumeBinder {

 	return &VolumeBinder{
-		Binder: persistentvolume.NewVolumeBinder(client, nodeInformer, pvcInformer, pvInformer, storageClassInformer, bindTimeout),
+		Binder: volumescheduling.NewVolumeBinder(client, nodeInformer, pvcInformer, pvInformer, storageClassInformer, bindTimeout),
 	}
 }

 // NewFakeVolumeBinder sets up a fake volume binder and binding queue
-func NewFakeVolumeBinder(config *persistentvolume.FakeVolumeBinderConfig) *VolumeBinder {
+func NewFakeVolumeBinder(config *volumescheduling.FakeVolumeBinderConfig) *VolumeBinder {
 	return &VolumeBinder{
-		Binder: persistentvolume.NewFakeVolumeBinder(config),
+		Binder: volumescheduling.NewFakeVolumeBinder(config),
 	}
 }