Update to kube v1.17

Signed-off-by: Humble Chirammal <hchiramm@redhat.com>
2025-06-13 10:33:35 +00:00 · 2020-01-14 16:08:55 +05:30
parent 327fcd1b1b
commit 3af1e26d7c
1710 changed files with 289562 additions and 168638 deletions
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/csi_volume_predicate.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/csi_volume_predicate.go
@ -19,83 +19,122 @@ package predicates
 import (
 	"fmt"

-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
+	storagev1 "k8s.io/api/storage/v1"
 	"k8s.io/apimachinery/pkg/util/rand"
-	utilfeature "k8s.io/apiserver/pkg/util/feature"
+	corelisters "k8s.io/client-go/listers/core/v1"
+	storagelisters "k8s.io/client-go/listers/storage/v1"
+	csitrans "k8s.io/csi-translation-lib"
 	"k8s.io/klog"
-	"k8s.io/kubernetes/pkg/features"
+	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
 	schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
 	volumeutil "k8s.io/kubernetes/pkg/volume/util"
 )

+// InTreeToCSITranslator contains methods required to check migratable status
+// and perform translations from InTree PV's to CSI
+type InTreeToCSITranslator interface {
+	IsPVMigratable(pv *v1.PersistentVolume) bool
+	IsMigratableIntreePluginByName(inTreePluginName string) bool
+	GetInTreePluginNameFromSpec(pv *v1.PersistentVolume, vol *v1.Volume) (string, error)
+	GetCSINameFromInTreeName(pluginName string) (string, error)
+	TranslateInTreePVToCSI(pv *v1.PersistentVolume) (*v1.PersistentVolume, error)
+}
+
 // CSIMaxVolumeLimitChecker defines predicate needed for counting CSI volumes
 type CSIMaxVolumeLimitChecker struct {
-	pvInfo               PersistentVolumeInfo
-	pvcInfo              PersistentVolumeClaimInfo
-	scInfo               StorageClassInfo
+	csiNodeLister storagelisters.CSINodeLister
+	pvLister      corelisters.PersistentVolumeLister
+	pvcLister     corelisters.PersistentVolumeClaimLister
+	scLister      storagelisters.StorageClassLister
+
 	randomVolumeIDPrefix string
+
+	translator InTreeToCSITranslator
 }

 // NewCSIMaxVolumeLimitPredicate returns a predicate for counting CSI volumes
 func NewCSIMaxVolumeLimitPredicate(
-	pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo, scInfo StorageClassInfo) FitPredicate {
+	csiNodeLister storagelisters.CSINodeLister, pvLister corelisters.PersistentVolumeLister, pvcLister corelisters.PersistentVolumeClaimLister, scLister storagelisters.StorageClassLister) FitPredicate {
 	c := &CSIMaxVolumeLimitChecker{
-		pvInfo:               pvInfo,
-		pvcInfo:              pvcInfo,
-		scInfo:               scInfo,
+		csiNodeLister:        csiNodeLister,
+		pvLister:             pvLister,
+		pvcLister:            pvcLister,
+		scLister:             scLister,
 		randomVolumeIDPrefix: rand.String(32),
+		translator:           csitrans.New(),
 	}
 	return c.attachableLimitPredicate
 }

-func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
-	pod *v1.Pod, meta PredicateMetadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
-
-	// if feature gate is disable we return
-	if !utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
-		return true, nil, nil
+func getVolumeLimits(nodeInfo *schedulernodeinfo.NodeInfo, csiNode *storagev1.CSINode) map[v1.ResourceName]int64 {
+	// TODO: stop getting values from Node object in v1.18
+	nodeVolumeLimits := nodeInfo.VolumeLimits()
+	if csiNode != nil {
+		for i := range csiNode.Spec.Drivers {
+			d := csiNode.Spec.Drivers[i]
+			if d.Allocatable != nil && d.Allocatable.Count != nil {
+				// TODO: drop GetCSIAttachLimitKey once we don't get values from Node object (v1.18)
+				k := v1.ResourceName(volumeutil.GetCSIAttachLimitKey(d.Name))
+				nodeVolumeLimits[k] = int64(*d.Allocatable.Count)
+			}
+		}
 	}
-	// If a pod doesn't have any volume attached to it, the predicate will always be true.
-	// Thus we make a fast path for it, to avoid unnecessary computations in this case.
+	return nodeVolumeLimits
+}
+
+func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
+	pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
+	// If the new pod doesn't have any volume attached to it, the predicate will always be true
 	if len(pod.Spec.Volumes) == 0 {
 		return true, nil, nil
 	}

-	nodeVolumeLimits := nodeInfo.VolumeLimits()
-
-	// if node does not have volume limits this predicate should exit
-	if len(nodeVolumeLimits) == 0 {
-		return true, nil, nil
+	node := nodeInfo.Node()
+	if node == nil {
+		return false, nil, fmt.Errorf("node not found")
+	}
+
+	// If CSINode doesn't exist, the predicate may read the limits from Node object
+	csiNode, err := c.csiNodeLister.Get(node.Name)
+	if err != nil {
+		// TODO: return the error once CSINode is created by default (2 releases)
+		klog.V(5).Infof("Could not get a CSINode object for the node: %v", err)
 	}

-	// a map of unique volume name/csi volume handle and volume limit key
 	newVolumes := make(map[string]string)
-	if err := c.filterAttachableVolumes(pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
+	if err := c.filterAttachableVolumes(csiNode, pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
 		return false, nil, err
 	}

+	// If the pod doesn't have any new CSI volumes, the predicate will always be true
 	if len(newVolumes) == 0 {
 		return true, nil, nil
 	}

-	// a map of unique volume name/csi volume handle and volume limit key
+	// If the node doesn't have volume limits, the predicate will always be true
+	nodeVolumeLimits := getVolumeLimits(nodeInfo, csiNode)
+	if len(nodeVolumeLimits) == 0 {
+		return true, nil, nil
+	}
+
 	attachedVolumes := make(map[string]string)
 	for _, existingPod := range nodeInfo.Pods() {
-		if err := c.filterAttachableVolumes(existingPod.Spec.Volumes, existingPod.Namespace, attachedVolumes); err != nil {
+		if err := c.filterAttachableVolumes(csiNode, existingPod.Spec.Volumes, existingPod.Namespace, attachedVolumes); err != nil {
 			return false, nil, err
 		}
 	}

-	newVolumeCount := map[string]int{}
 	attachedVolumeCount := map[string]int{}
-
-	for volumeName, volumeLimitKey := range attachedVolumes {
-		if _, ok := newVolumes[volumeName]; ok {
-			delete(newVolumes, volumeName)
+	for volumeUniqueName, volumeLimitKey := range attachedVolumes {
+		if _, ok := newVolumes[volumeUniqueName]; ok {
+			// Don't count single volume used in multiple pods more than once
+			delete(newVolumes, volumeUniqueName)
 		}
 		attachedVolumeCount[volumeLimitKey]++
 	}

+	newVolumeCount := map[string]int{}
 	for _, volumeLimitKey := range newVolumes {
 		newVolumeCount[volumeLimitKey]++
 	}
@ -114,8 +153,7 @@ func (c *CSIMaxVolumeLimitChecker) attachableLimitPredicate(
 }

 func (c *CSIMaxVolumeLimitChecker) filterAttachableVolumes(
-	volumes []v1.Volume, namespace string, result map[string]string) error {
-
+	csiNode *storagev1.CSINode, volumes []v1.Volume, namespace string, result map[string]string) error {
 	for _, vol := range volumes {
 		// CSI volumes can only be used as persistent volumes
 		if vol.PersistentVolumeClaim == nil {
@ -127,77 +165,121 @@ func (c *CSIMaxVolumeLimitChecker) filterAttachableVolumes(
 			return fmt.Errorf("PersistentVolumeClaim had no name")
 		}

-		pvc, err := c.pvcInfo.GetPersistentVolumeClaimInfo(namespace, pvcName)
+		pvc, err := c.pvcLister.PersistentVolumeClaims(namespace).Get(pvcName)

 		if err != nil {
-			klog.V(4).Infof("Unable to look up PVC info for %s/%s", namespace, pvcName)
+			klog.V(5).Infof("Unable to look up PVC info for %s/%s", namespace, pvcName)
 			continue
 		}

-		driverName, volumeHandle := c.getCSIDriver(pvc)
-		// if we can't find driver name or volume handle - we don't count this volume.
+		driverName, volumeHandle := c.getCSIDriverInfo(csiNode, pvc)
 		if driverName == "" || volumeHandle == "" {
+			klog.V(5).Infof("Could not find a CSI driver name or volume handle, not counting volume")
 			continue
 		}
-		volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
-		result[volumeHandle] = volumeLimitKey

+		volumeUniqueName := fmt.Sprintf("%s/%s", driverName, volumeHandle)
+		volumeLimitKey := volumeutil.GetCSIAttachLimitKey(driverName)
+		result[volumeUniqueName] = volumeLimitKey
 	}
 	return nil
 }

-func (c *CSIMaxVolumeLimitChecker) getCSIDriver(pvc *v1.PersistentVolumeClaim) (string, string) {
+// getCSIDriverInfo returns the CSI driver name and volume ID of a given PVC.
+// If the PVC is from a migrated in-tree plugin, this function will return
+// the information of the CSI driver that the plugin has been migrated to.
+func (c *CSIMaxVolumeLimitChecker) getCSIDriverInfo(csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
 	pvName := pvc.Spec.VolumeName
 	namespace := pvc.Namespace
 	pvcName := pvc.Name

-	placeHolderCSIDriver := ""
-	placeHolderHandle := ""
 	if pvName == "" {
 		klog.V(5).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName)
-		return c.getDriverNameFromSC(pvc)
+		return c.getCSIDriverInfoFromSC(csiNode, pvc)
 	}
-	pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName)

+	pv, err := c.pvLister.Get(pvName)
 	if err != nil {
-		klog.V(4).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
+		klog.V(5).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName)
 		// If we can't fetch PV associated with PVC, may be it got deleted
 		// or PVC was prebound to a PVC that hasn't been created yet.
 		// fallback to using StorageClass for volume counting
-		return c.getDriverNameFromSC(pvc)
+		return c.getCSIDriverInfoFromSC(csiNode, pvc)
 	}

 	csiSource := pv.Spec.PersistentVolumeSource.CSI
 	if csiSource == nil {
-		klog.V(5).Infof("Not considering non-CSI volume %s/%s", namespace, pvcName)
-		return placeHolderCSIDriver, placeHolderHandle
+		// We make a fast path for non-CSI volumes that aren't migratable
+		if !c.translator.IsPVMigratable(pv) {
+			return "", ""
+		}
+
+		pluginName, err := c.translator.GetInTreePluginNameFromSpec(pv, nil)
+		if err != nil {
+			klog.V(5).Infof("Unable to look up plugin name from PV spec: %v", err)
+			return "", ""
+		}
+
+		if !isCSIMigrationOn(csiNode, pluginName) {
+			klog.V(5).Infof("CSI Migration of plugin %s is not enabled", pluginName)
+			return "", ""
+		}
+
+		csiPV, err := c.translator.TranslateInTreePVToCSI(pv)
+		if err != nil {
+			klog.V(5).Infof("Unable to translate in-tree volume to CSI: %v", err)
+			return "", ""
+		}
+
+		if csiPV.Spec.PersistentVolumeSource.CSI == nil {
+			klog.V(5).Infof("Unable to get a valid volume source for translated PV %s", pvName)
+			return "", ""
+		}
+
+		csiSource = csiPV.Spec.PersistentVolumeSource.CSI
 	}
+
 	return csiSource.Driver, csiSource.VolumeHandle
 }

-func (c *CSIMaxVolumeLimitChecker) getDriverNameFromSC(pvc *v1.PersistentVolumeClaim) (string, string) {
+// getCSIDriverInfoFromSC returns the CSI driver name and a random volume ID of a given PVC's StorageClass.
+func (c *CSIMaxVolumeLimitChecker) getCSIDriverInfoFromSC(csiNode *storagev1.CSINode, pvc *v1.PersistentVolumeClaim) (string, string) {
 	namespace := pvc.Namespace
 	pvcName := pvc.Name
-	scName := pvc.Spec.StorageClassName
+	scName := v1helper.GetPersistentVolumeClaimClass(pvc)

-	placeHolderCSIDriver := ""
-	placeHolderHandle := ""
-	if scName == nil {
-		// if StorageClass is not set or found, then PVC must be using immediate binding mode
-		// and hence it must be bound before scheduling. So it is safe to not count it.
-		klog.V(5).Infof("pvc %s/%s has no storageClass", namespace, pvcName)
-		return placeHolderCSIDriver, placeHolderHandle
+	// If StorageClass is not set or not found, then PVC must be using immediate binding mode
+	// and hence it must be bound before scheduling. So it is safe to not count it.
+	if scName == "" {
+		klog.V(5).Infof("PVC %s/%s has no StorageClass", namespace, pvcName)
+		return "", ""
 	}

-	storageClass, err := c.scInfo.GetStorageClassInfo(*scName)
+	storageClass, err := c.scLister.Get(scName)
 	if err != nil {
-		klog.V(5).Infof("no storage %s found for pvc %s/%s", *scName, namespace, pvcName)
-		return placeHolderCSIDriver, placeHolderHandle
+		klog.V(5).Infof("Could not get StorageClass for PVC %s/%s: %v", namespace, pvcName, err)
+		return "", ""
 	}

-	// We use random prefix to avoid conflict with volume-ids. If PVC is bound in the middle
-	// predicate and there is another pod(on same node) that uses same volume then we will overcount
+	// We use random prefix to avoid conflict with volume IDs. If PVC is bound during the execution of the
+	// predicate and there is another pod on the same node that uses same volume, then we will overcount
 	// the volume and consider both volumes as different.
 	volumeHandle := fmt.Sprintf("%s-%s/%s", c.randomVolumeIDPrefix, namespace, pvcName)
-	return storageClass.Provisioner, volumeHandle
+
+	provisioner := storageClass.Provisioner
+	if c.translator.IsMigratableIntreePluginByName(provisioner) {
+		if !isCSIMigrationOn(csiNode, provisioner) {
+			klog.V(5).Infof("CSI Migration of plugin %s is not enabled", provisioner)
+			return "", ""
+		}
+
+		driverName, err := c.translator.GetCSINameFromInTreeName(provisioner)
+		if err != nil {
+			klog.V(5).Infof("Unable to look up driver name from plugin name: %v", err)
+			return "", ""
+		}
+		return driverName, volumeHandle
+	}
+
+	return provisioner, volumeHandle
 }
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/error.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/error.go
@ -75,11 +75,45 @@ var (
 	ErrVolumeNodeConflict = newPredicateFailureError("VolumeNodeAffinityConflict", "node(s) had volume node affinity conflict")
 	// ErrVolumeBindConflict is used for VolumeBindingNoMatch predicate error.
 	ErrVolumeBindConflict = newPredicateFailureError("VolumeBindingNoMatch", "node(s) didn't find available persistent volumes to bind")
+	// ErrTopologySpreadConstraintsNotMatch is used for EvenPodsSpread predicate error.
+	ErrTopologySpreadConstraintsNotMatch = newPredicateFailureError("EvenPodsSpreadNotMatch", "node(s) didn't match pod topology spread constraints")
 	// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
 	// as ErrFakePredicate.
 	ErrFakePredicate = newPredicateFailureError("FakePredicateError", "Nodes failed the fake predicate")
 )

+var unresolvablePredicateFailureErrors = map[PredicateFailureReason]struct{}{
+	ErrNodeSelectorNotMatch:      {},
+	ErrPodAffinityRulesNotMatch:  {},
+	ErrPodNotMatchHostName:       {},
+	ErrTaintsTolerationsNotMatch: {},
+	ErrNodeLabelPresenceViolated: {},
+	// Node conditions won't change when scheduler simulates removal of preemption victims.
+	// So, it is pointless to try nodes that have not been able to host the pod due to node
+	// conditions. These include ErrNodeNotReady, ErrNodeUnderPIDPressure, ErrNodeUnderMemoryPressure, ....
+	ErrNodeNotReady:            {},
+	ErrNodeNetworkUnavailable:  {},
+	ErrNodeUnderDiskPressure:   {},
+	ErrNodeUnderPIDPressure:    {},
+	ErrNodeUnderMemoryPressure: {},
+	ErrNodeUnschedulable:       {},
+	ErrNodeUnknownCondition:    {},
+	ErrVolumeZoneConflict:      {},
+	ErrVolumeNodeConflict:      {},
+	ErrVolumeBindConflict:      {},
+}
+
+// UnresolvablePredicateExists checks if there is at least one unresolvable predicate failure reason, if true
+// returns the first one in the list.
+func UnresolvablePredicateExists(reasons []PredicateFailureReason) PredicateFailureReason {
+	for _, r := range reasons {
+		if _, ok := unresolvablePredicateFailureErrors[r]; ok {
+			return r
+		}
+	}
+	return nil
+}
+
 // InsufficientResourceError is an error type that indicates what kind of resource limit is
 // hit and caused the unfitting failure.
 type InsufficientResourceError struct {
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/metadata.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/metadata.go
@ -19,35 +19,31 @@ package predicates
 import (
 	"context"
 	"fmt"
+	"math"
 	"sync"

 	"k8s.io/klog"

-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/client-go/util/workqueue"
-	"k8s.io/kubernetes/pkg/scheduler/algorithm"
 	priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
+	schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers"
 	schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
 	schedutil "k8s.io/kubernetes/pkg/scheduler/util"
 )

-// PredicateMetadata interface represents anything that can access a predicate metadata.
-type PredicateMetadata interface {
-	ShallowCopy() PredicateMetadata
-	AddPod(addedPod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) error
-	RemovePod(deletedPod *v1.Pod) error
+// Metadata interface represents anything that can access a predicate metadata.
+type Metadata interface {
+	ShallowCopy() Metadata
+	AddPod(addedPod *v1.Pod, node *v1.Node) error
+	RemovePod(deletedPod *v1.Pod, node *v1.Node) error
 }

-// PredicateMetadataProducer is a function that computes predicate metadata for a given pod.
-type PredicateMetadataProducer func(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo) PredicateMetadata
-
-// PredicateMetadataFactory defines a factory of predicate metadata.
-type PredicateMetadataFactory struct {
-	podLister algorithm.PodLister
-}
+// MetadataProducer is a function that computes predicate metadata for a given pod.
+type MetadataProducer func(pod *v1.Pod, sharedLister schedulerlisters.SharedLister) Metadata

 // AntiAffinityTerm's topology key value used in predicate metadata
 type topologyPair struct {
@ -66,14 +62,130 @@ type topologyPairsMaps struct {
 	podToTopologyPairs map[string]topologyPairSet
 }

-// NOTE: When new fields are added/removed or logic is changed, please make sure that
-// RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes.
-type predicateMetadata struct {
-	pod           *v1.Pod
-	podBestEffort bool
-	podRequest    *schedulernodeinfo.Resource
-	podPorts      []*v1.ContainerPort
+type criticalPath struct {
+	// topologyValue denotes the topology value mapping to topology key.
+	topologyValue string
+	// matchNum denotes the number of matching pods.
+	matchNum int32
+}

+// CAVEAT: the reason that `[2]criticalPath` can work is based on the implementation of current
+// preemption algorithm, in particular the following 2 facts:
+// Fact 1: we only preempt pods on the same node, instead of pods on multiple nodes.
+// Fact 2: each node is evaluated on a separate copy of the metadata during its preemption cycle.
+// If we plan to turn to a more complex algorithm like "arbitrary pods on multiple nodes", this
+// structure needs to be revisited.
+type criticalPaths [2]criticalPath
+
+func newCriticalPaths() *criticalPaths {
+	return &criticalPaths{{matchNum: math.MaxInt32}, {matchNum: math.MaxInt32}}
+}
+
+func (paths *criticalPaths) update(tpVal string, num int32) {
+	// first verify if `tpVal` exists or not
+	i := -1
+	if tpVal == paths[0].topologyValue {
+		i = 0
+	} else if tpVal == paths[1].topologyValue {
+		i = 1
+	}
+
+	if i >= 0 {
+		// `tpVal` exists
+		paths[i].matchNum = num
+		if paths[0].matchNum > paths[1].matchNum {
+			// swap paths[0] and paths[1]
+			paths[0], paths[1] = paths[1], paths[0]
+		}
+	} else {
+		// `tpVal` doesn't exist
+		if num < paths[0].matchNum {
+			// update paths[1] with paths[0]
+			paths[1] = paths[0]
+			// update paths[0]
+			paths[0].topologyValue, paths[0].matchNum = tpVal, num
+		} else if num < paths[1].matchNum {
+			// update paths[1]
+			paths[1].topologyValue, paths[1].matchNum = tpVal, num
+		}
+	}
+}
+
+// evenPodsSpreadMetadata combines tpKeyToCriticalPaths and tpPairToMatchNum
+// to represent:
+// (1) critical paths where the least pods are matched on each spread constraint.
+// (2) number of pods matched on each spread constraint.
+type evenPodsSpreadMetadata struct {
+	constraints []topologySpreadConstraint
+	// We record 2 critical paths instead of all critical paths here.
+	// criticalPaths[0].matchNum always holds the minimum matching number.
+	// criticalPaths[1].matchNum is always greater or equal to criticalPaths[0].matchNum, but
+	// it's not guaranteed to be the 2nd minimum match number.
+	tpKeyToCriticalPaths map[string]*criticalPaths
+	// tpPairToMatchNum is keyed with topologyPair, and valued with the number of matching pods.
+	tpPairToMatchNum map[topologyPair]int32
+}
+
+// topologySpreadConstraint is an internal version for a hard (DoNotSchedule
+// unsatisfiable constraint action) v1.TopologySpreadConstraint and where the
+// selector is parsed.
+type topologySpreadConstraint struct {
+	maxSkew     int32
+	topologyKey string
+	selector    labels.Selector
+}
+
+type serviceAffinityMetadata struct {
+	matchingPodList     []*v1.Pod
+	matchingPodServices []*v1.Service
+}
+
+func (m *serviceAffinityMetadata) addPod(addedPod *v1.Pod, pod *v1.Pod, node *v1.Node) {
+	// If addedPod is in the same namespace as the pod, update the list
+	// of matching pods if applicable.
+	if m == nil || addedPod.Namespace != pod.Namespace {
+		return
+	}
+
+	selector := CreateSelectorFromLabels(pod.Labels)
+	if selector.Matches(labels.Set(addedPod.Labels)) {
+		m.matchingPodList = append(m.matchingPodList, addedPod)
+	}
+}
+
+func (m *serviceAffinityMetadata) removePod(deletedPod *v1.Pod, node *v1.Node) {
+	deletedPodFullName := schedutil.GetPodFullName(deletedPod)
+
+	if m == nil ||
+		len(m.matchingPodList) == 0 ||
+		deletedPod.Namespace != m.matchingPodList[0].Namespace {
+		return
+	}
+
+	for i, pod := range m.matchingPodList {
+		if schedutil.GetPodFullName(pod) == deletedPodFullName {
+			m.matchingPodList = append(m.matchingPodList[:i], m.matchingPodList[i+1:]...)
+			break
+		}
+	}
+}
+
+func (m *serviceAffinityMetadata) clone() *serviceAffinityMetadata {
+	if m == nil {
+		return nil
+	}
+
+	copy := serviceAffinityMetadata{}
+
+	copy.matchingPodServices = append([]*v1.Service(nil),
+		m.matchingPodServices...)
+	copy.matchingPodList = append([]*v1.Pod(nil),
+		m.matchingPodList...)
+
+	return &copy
+}
+
+type podAffinityMetadata struct {
 	topologyPairsAntiAffinityPodsMap *topologyPairsMaps
 	// A map of topology pairs to a list of Pods that can potentially match
 	// the affinity terms of the "pod" and its inverse.
@ -81,9 +193,70 @@ type predicateMetadata struct {
 	// A map of topology pairs to a list of Pods that can potentially match
 	// the anti-affinity terms of the "pod" and its inverse.
 	topologyPairsPotentialAntiAffinityPods *topologyPairsMaps
-	serviceAffinityInUse                   bool
-	serviceAffinityMatchingPodList         []*v1.Pod
-	serviceAffinityMatchingPodServices     []*v1.Service
+}
+
+func (m *podAffinityMetadata) addPod(addedPod *v1.Pod, pod *v1.Pod, node *v1.Node) error {
+	// Add matching anti-affinity terms of the addedPod to the map.
+	topologyPairsMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, addedPod, node)
+	if err != nil {
+		return err
+	}
+	m.topologyPairsAntiAffinityPodsMap.appendMaps(topologyPairsMaps)
+	// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
+	affinity := pod.Spec.Affinity
+	podNodeName := addedPod.Spec.NodeName
+	if affinity != nil && len(podNodeName) > 0 {
+		// It is assumed that when the added pod matches affinity of the pod, all the terms must match,
+		// this should be changed when the implementation of targetPodMatchesAffinityOfPod/podMatchesAffinityTermProperties
+		// is changed
+		if targetPodMatchesAffinityOfPod(pod, addedPod) {
+			affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
+			for _, term := range affinityTerms {
+				if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
+					pair := topologyPair{key: term.TopologyKey, value: topologyValue}
+					m.topologyPairsPotentialAffinityPods.addTopologyPair(pair, addedPod)
+				}
+			}
+		}
+		if targetPodMatchesAntiAffinityOfPod(pod, addedPod) {
+			antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
+			for _, term := range antiAffinityTerms {
+				if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
+					pair := topologyPair{key: term.TopologyKey, value: topologyValue}
+					m.topologyPairsPotentialAntiAffinityPods.addTopologyPair(pair, addedPod)
+				}
+			}
+		}
+	}
+
+	return nil
+}
+
+func (m *podAffinityMetadata) removePod(deletedPod *v1.Pod) {
+	if m == nil {
+		return
+	}
+
+	m.topologyPairsAntiAffinityPodsMap.removePod(deletedPod)
+	// Delete pod from the matching affinity or anti-affinity topology pairs maps.
+	m.topologyPairsPotentialAffinityPods.removePod(deletedPod)
+	m.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod)
+}
+
+func (m *podAffinityMetadata) clone() *podAffinityMetadata {
+	if m == nil {
+		return nil
+	}
+
+	copy := podAffinityMetadata{}
+	copy.topologyPairsPotentialAffinityPods = m.topologyPairsPotentialAffinityPods.clone()
+	copy.topologyPairsPotentialAntiAffinityPods = m.topologyPairsPotentialAntiAffinityPods.clone()
+	copy.topologyPairsAntiAffinityPodsMap = m.topologyPairsAntiAffinityPodsMap.clone()
+
+	return &copy
+}
+
+type podFitsResourcesMetadata struct {
 	// ignoredExtendedResources is a set of extended resource names that will
 	// be ignored in the PodFitsResources predicate.
 	//
@ -91,72 +264,129 @@ type predicateMetadata struct {
 	// which should be accounted only by the extenders. This set is synthesized
 	// from scheduler extender configuration and does not change per pod.
 	ignoredExtendedResources sets.String
+	podRequest               *schedulernodeinfo.Resource
 }

-// Ensure that predicateMetadata implements algorithm.PredicateMetadata.
-var _ PredicateMetadata = &predicateMetadata{}
+func (m *podFitsResourcesMetadata) clone() *podFitsResourcesMetadata {
+	if m == nil {
+		return nil
+	}
+
+	copy := podFitsResourcesMetadata{}
+	copy.ignoredExtendedResources = m.ignoredExtendedResources
+	copy.podRequest = m.podRequest
+
+	return &copy
+}
+
+type podFitsHostPortsMetadata struct {
+	podPorts []*v1.ContainerPort
+}
+
+func (m *podFitsHostPortsMetadata) clone() *podFitsHostPortsMetadata {
+	if m == nil {
+		return nil
+	}
+
+	copy := podFitsHostPortsMetadata{}
+	copy.podPorts = append([]*v1.ContainerPort(nil), m.podPorts...)
+
+	return &copy
+}
+
+// NOTE: When new fields are added/removed or logic is changed, please make sure that
+// RemovePod, AddPod, and ShallowCopy functions are updated to work with the new changes.
+type predicateMetadata struct {
+	pod           *v1.Pod
+	podBestEffort bool
+
+	// evenPodsSpreadMetadata holds info of the minimum match number on each topology spread constraint,
+	// and the match number of all valid topology pairs.
+	evenPodsSpreadMetadata *evenPodsSpreadMetadata
+
+	serviceAffinityMetadata  *serviceAffinityMetadata
+	podAffinityMetadata      *podAffinityMetadata
+	podFitsResourcesMetadata *podFitsResourcesMetadata
+	podFitsHostPortsMetadata *podFitsHostPortsMetadata
+}
+
+// Ensure that predicateMetadata implements algorithm.Metadata.
+var _ Metadata = &predicateMetadata{}

 // predicateMetadataProducer function produces predicate metadata. It is stored in a global variable below
-// and used to modify the return values of PredicateMetadataProducer
+// and used to modify the return values of MetadataProducer
 type predicateMetadataProducer func(pm *predicateMetadata)

 var predicateMetadataProducers = make(map[string]predicateMetadataProducer)

-// RegisterPredicateMetadataProducer registers a PredicateMetadataProducer.
+// RegisterPredicateMetadataProducer registers a MetadataProducer.
 func RegisterPredicateMetadataProducer(predicateName string, precomp predicateMetadataProducer) {
 	predicateMetadataProducers[predicateName] = precomp
 }

-// EmptyPredicateMetadataProducer returns a no-op MetadataProducer type.
-func EmptyPredicateMetadataProducer(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo) PredicateMetadata {
+// EmptyMetadataProducer returns a no-op MetadataProducer type.
+func EmptyMetadataProducer(pod *v1.Pod, sharedLister schedulerlisters.SharedLister) Metadata {
 	return nil
 }

 // RegisterPredicateMetadataProducerWithExtendedResourceOptions registers a
-// PredicateMetadataProducer that creates predicate metadata with the provided
+// MetadataProducer that creates predicate metadata with the provided
 // options for extended resources.
 //
 // See the comments in "predicateMetadata" for the explanation of the options.
 func RegisterPredicateMetadataProducerWithExtendedResourceOptions(ignoredExtendedResources sets.String) {
 	RegisterPredicateMetadataProducer("PredicateWithExtendedResourceOptions", func(pm *predicateMetadata) {
-		pm.ignoredExtendedResources = ignoredExtendedResources
+		pm.podFitsResourcesMetadata.ignoredExtendedResources = ignoredExtendedResources
 	})
 }

-// NewPredicateMetadataFactory creates a PredicateMetadataFactory.
-func NewPredicateMetadataFactory(podLister algorithm.PodLister) PredicateMetadataProducer {
-	factory := &PredicateMetadataFactory{
-		podLister,
-	}
-	return factory.GetMetadata
-}
+// MetadataProducerFactory is a factory to produce Metadata.
+type MetadataProducerFactory struct{}

-// GetMetadata returns the predicateMetadata used which will be used by various predicates.
-func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInfoMap map[string]*schedulernodeinfo.NodeInfo) PredicateMetadata {
+// GetPredicateMetadata returns the predicateMetadata which will be used by various predicates.
+func (f *MetadataProducerFactory) GetPredicateMetadata(pod *v1.Pod, sharedLister schedulerlisters.SharedLister) Metadata {
 	// If we cannot compute metadata, just return nil
 	if pod == nil {
 		return nil
 	}
-	// existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity
-	existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, nodeNameToInfoMap)
+
+	var allNodes []*schedulernodeinfo.NodeInfo
+	var havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo
+	if sharedLister != nil {
+		var err error
+		allNodes, err = sharedLister.NodeInfos().List()
+		if err != nil {
+			klog.Errorf("failed to list NodeInfos: %v", err)
+			return nil
+		}
+		havePodsWithAffinityNodes, err = sharedLister.NodeInfos().HavePodsWithAffinityList()
+		if err != nil {
+			klog.Errorf("failed to list NodeInfos: %v", err)
+			return nil
+		}
+
+	}
+
+	// evenPodsSpreadMetadata represents how existing pods match "pod"
+	// on its spread constraints
+	evenPodsSpreadMetadata, err := getEvenPodsSpreadMetadata(pod, allNodes)
 	if err != nil {
+		klog.Errorf("Error calculating spreadConstraintsMap: %v", err)
 		return nil
 	}
-	// incomingPodAffinityMap will be used later for efficient check on incoming pod's affinity
-	// incomingPodAntiAffinityMap will be used later for efficient check on incoming pod's anti-affinity
-	incomingPodAffinityMap, incomingPodAntiAffinityMap, err := getTPMapMatchingIncomingAffinityAntiAffinity(pod, nodeNameToInfoMap)
+
+	podAffinityMetadata, err := getPodAffinityMetadata(pod, allNodes, havePodsWithAffinityNodes)
 	if err != nil {
-		klog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
+		klog.Errorf("Error calculating podAffinityMetadata: %v", err)
 		return nil
 	}
+
 	predicateMetadata := &predicateMetadata{
-		pod:                                    pod,
-		podBestEffort:                          isPodBestEffort(pod),
-		podRequest:                             GetResourceRequest(pod),
-		podPorts:                               schedutil.GetContainerPorts(pod),
-		topologyPairsPotentialAffinityPods:     incomingPodAffinityMap,
-		topologyPairsPotentialAntiAffinityPods: incomingPodAntiAffinityMap,
-		topologyPairsAntiAffinityPodsMap:       existingPodAntiAffinityMap,
+		pod:                      pod,
+		evenPodsSpreadMetadata:   evenPodsSpreadMetadata,
+		podAffinityMetadata:      podAffinityMetadata,
+		podFitsResourcesMetadata: getPodFitsResourcesMetedata(pod),
+		podFitsHostPortsMetadata: getPodFitsHostPortsMetadata(pod),
 	}
 	for predicateName, precomputeFunc := range predicateMetadataProducers {
 		klog.V(10).Infof("Precompute: %v", predicateName)
@ -165,152 +395,287 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
 	return predicateMetadata
 }

-// returns a pointer to a new topologyPairsMaps
-func newTopologyPairsMaps() *topologyPairsMaps {
-	return &topologyPairsMaps{topologyPairToPods: make(map[topologyPair]podSet),
-		podToTopologyPairs: make(map[string]topologyPairSet)}
+func getPodFitsHostPortsMetadata(pod *v1.Pod) *podFitsHostPortsMetadata {
+	return &podFitsHostPortsMetadata{
+		podPorts: schedutil.GetContainerPorts(pod),
+	}
 }

-func (topologyPairsMaps *topologyPairsMaps) addTopologyPair(pair topologyPair, pod *v1.Pod) {
-	podFullName := schedutil.GetPodFullName(pod)
-	if topologyPairsMaps.topologyPairToPods[pair] == nil {
-		topologyPairsMaps.topologyPairToPods[pair] = make(map[*v1.Pod]struct{})
+func getPodFitsResourcesMetedata(pod *v1.Pod) *podFitsResourcesMetadata {
+	return &podFitsResourcesMetadata{
+		podRequest: GetResourceRequest(pod),
 	}
-	topologyPairsMaps.topologyPairToPods[pair][pod] = struct{}{}
-	if topologyPairsMaps.podToTopologyPairs[podFullName] == nil {
-		topologyPairsMaps.podToTopologyPairs[podFullName] = make(map[topologyPair]struct{})
-	}
-	topologyPairsMaps.podToTopologyPairs[podFullName][pair] = struct{}{}
 }

-func (topologyPairsMaps *topologyPairsMaps) removePod(deletedPod *v1.Pod) {
-	deletedPodFullName := schedutil.GetPodFullName(deletedPod)
-	for pair := range topologyPairsMaps.podToTopologyPairs[deletedPodFullName] {
-		delete(topologyPairsMaps.topologyPairToPods[pair], deletedPod)
-		if len(topologyPairsMaps.topologyPairToPods[pair]) == 0 {
-			delete(topologyPairsMaps.topologyPairToPods, pair)
+func getPodAffinityMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo, havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo) (*podAffinityMetadata, error) {
+	// existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity
+	existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, havePodsWithAffinityNodes)
+	if err != nil {
+		return nil, err
+	}
+	// incomingPodAffinityMap will be used later for efficient check on incoming pod's affinity
+	// incomingPodAntiAffinityMap will be used later for efficient check on incoming pod's anti-affinity
+	incomingPodAffinityMap, incomingPodAntiAffinityMap, err := getTPMapMatchingIncomingAffinityAntiAffinity(pod, allNodes)
+	if err != nil {
+		return nil, err
+	}
+
+	return &podAffinityMetadata{
+		topologyPairsPotentialAffinityPods:     incomingPodAffinityMap,
+		topologyPairsPotentialAntiAffinityPods: incomingPodAntiAffinityMap,
+		topologyPairsAntiAffinityPodsMap:       existingPodAntiAffinityMap,
+	}, nil
+}
+
+func getEvenPodsSpreadMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (*evenPodsSpreadMetadata, error) {
+	// We have feature gating in APIServer to strip the spec
+	// so don't need to re-check feature gate, just check length of constraints.
+	constraints, err := filterHardTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints)
+	if err != nil {
+		return nil, err
+	}
+	if len(constraints) == 0 {
+		return nil, nil
+	}
+
+	var lock sync.Mutex
+
+	// TODO(Huang-Wei): It might be possible to use "make(map[topologyPair]*int32)".
+	// In that case, need to consider how to init each tpPairToCount[pair] in an atomic fashion.
+	m := evenPodsSpreadMetadata{
+		constraints:          constraints,
+		tpKeyToCriticalPaths: make(map[string]*criticalPaths, len(constraints)),
+		tpPairToMatchNum:     make(map[topologyPair]int32),
+	}
+	addTopologyPairMatchNum := func(pair topologyPair, num int32) {
+		lock.Lock()
+		m.tpPairToMatchNum[pair] += num
+		lock.Unlock()
+	}
+
+	processNode := func(i int) {
+		nodeInfo := allNodes[i]
+		node := nodeInfo.Node()
+		if node == nil {
+			klog.Error("node not found")
+			return
+		}
+		// In accordance to design, if NodeAffinity or NodeSelector is defined,
+		// spreading is applied to nodes that pass those filters.
+		if !PodMatchesNodeSelectorAndAffinityTerms(pod, node) {
+			return
+		}
+
+		// Ensure current node's labels contains all topologyKeys in 'constraints'.
+		if !NodeLabelsMatchSpreadConstraints(node.Labels, constraints) {
+			return
+		}
+		for _, constraint := range constraints {
+			matchTotal := int32(0)
+			// nodeInfo.Pods() can be empty; or all pods don't fit
+			for _, existingPod := range nodeInfo.Pods() {
+				if existingPod.Namespace != pod.Namespace {
+					continue
+				}
+				if constraint.selector.Matches(labels.Set(existingPod.Labels)) {
+					matchTotal++
+				}
+			}
+			pair := topologyPair{key: constraint.topologyKey, value: node.Labels[constraint.topologyKey]}
+			addTopologyPairMatchNum(pair, matchTotal)
 		}
 	}
-	delete(topologyPairsMaps.podToTopologyPairs, deletedPodFullName)
+	workqueue.ParallelizeUntil(context.Background(), 16, len(allNodes), processNode)
+
+	// calculate min match for each topology pair
+	for i := 0; i < len(constraints); i++ {
+		key := constraints[i].topologyKey
+		m.tpKeyToCriticalPaths[key] = newCriticalPaths()
+	}
+	for pair, num := range m.tpPairToMatchNum {
+		m.tpKeyToCriticalPaths[pair.key].update(pair.value, num)
+	}
+
+	return &m, nil
 }

-func (topologyPairsMaps *topologyPairsMaps) appendMaps(toAppend *topologyPairsMaps) {
+func filterHardTopologySpreadConstraints(constraints []v1.TopologySpreadConstraint) ([]topologySpreadConstraint, error) {
+	var result []topologySpreadConstraint
+	for _, c := range constraints {
+		if c.WhenUnsatisfiable == v1.DoNotSchedule {
+			selector, err := metav1.LabelSelectorAsSelector(c.LabelSelector)
+			if err != nil {
+				return nil, err
+			}
+			result = append(result, topologySpreadConstraint{
+				maxSkew:     c.MaxSkew,
+				topologyKey: c.TopologyKey,
+				selector:    selector,
+			})
+		}
+	}
+	return result, nil
+}
+
+// NodeLabelsMatchSpreadConstraints checks if ALL topology keys in spread constraints are present in node labels.
+func NodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints []topologySpreadConstraint) bool {
+	for _, c := range constraints {
+		if _, ok := nodeLabels[c.topologyKey]; !ok {
+			return false
+		}
+	}
+	return true
+}
+
+// returns a pointer to a new topologyPairsMaps
+func newTopologyPairsMaps() *topologyPairsMaps {
+	return &topologyPairsMaps{
+		topologyPairToPods: make(map[topologyPair]podSet),
+		podToTopologyPairs: make(map[string]topologyPairSet),
+	}
+}
+
+func (m *topologyPairsMaps) addTopologyPair(pair topologyPair, pod *v1.Pod) {
+	podFullName := schedutil.GetPodFullName(pod)
+	if m.topologyPairToPods[pair] == nil {
+		m.topologyPairToPods[pair] = make(map[*v1.Pod]struct{})
+	}
+	m.topologyPairToPods[pair][pod] = struct{}{}
+	if m.podToTopologyPairs[podFullName] == nil {
+		m.podToTopologyPairs[podFullName] = make(map[topologyPair]struct{})
+	}
+	m.podToTopologyPairs[podFullName][pair] = struct{}{}
+}
+
+func (m *topologyPairsMaps) removePod(deletedPod *v1.Pod) {
+	deletedPodFullName := schedutil.GetPodFullName(deletedPod)
+	for pair := range m.podToTopologyPairs[deletedPodFullName] {
+		delete(m.topologyPairToPods[pair], deletedPod)
+		if len(m.topologyPairToPods[pair]) == 0 {
+			delete(m.topologyPairToPods, pair)
+		}
+	}
+	delete(m.podToTopologyPairs, deletedPodFullName)
+}
+
+func (m *topologyPairsMaps) appendMaps(toAppend *topologyPairsMaps) {
 	if toAppend == nil {
 		return
 	}
 	for pair := range toAppend.topologyPairToPods {
 		for pod := range toAppend.topologyPairToPods[pair] {
-			topologyPairsMaps.addTopologyPair(pair, pod)
+			m.addTopologyPair(pair, pod)
 		}
 	}
 }

+func (m *topologyPairsMaps) clone() *topologyPairsMaps {
+	copy := newTopologyPairsMaps()
+	copy.appendMaps(m)
+	return copy
+}
+
+func (m *evenPodsSpreadMetadata) addPod(addedPod, preemptorPod *v1.Pod, node *v1.Node) {
+	m.updatePod(addedPod, preemptorPod, node, 1)
+}
+
+func (m *evenPodsSpreadMetadata) removePod(deletedPod, preemptorPod *v1.Pod, node *v1.Node) {
+	m.updatePod(deletedPod, preemptorPod, node, -1)
+}
+
+func (m *evenPodsSpreadMetadata) updatePod(updatedPod, preemptorPod *v1.Pod, node *v1.Node, delta int32) {
+	if m == nil || updatedPod.Namespace != preemptorPod.Namespace || node == nil {
+		return
+	}
+	if !NodeLabelsMatchSpreadConstraints(node.Labels, m.constraints) {
+		return
+	}
+
+	podLabelSet := labels.Set(updatedPod.Labels)
+	for _, constraint := range m.constraints {
+		if !constraint.selector.Matches(podLabelSet) {
+			continue
+		}
+
+		k, v := constraint.topologyKey, node.Labels[constraint.topologyKey]
+		pair := topologyPair{key: k, value: v}
+		m.tpPairToMatchNum[pair] = m.tpPairToMatchNum[pair] + delta
+
+		m.tpKeyToCriticalPaths[k].update(v, m.tpPairToMatchNum[pair])
+	}
+}
+
+func (m *evenPodsSpreadMetadata) clone() *evenPodsSpreadMetadata {
+	// c could be nil when EvenPodsSpread feature is disabled
+	if m == nil {
+		return nil
+	}
+	cp := evenPodsSpreadMetadata{
+		// constraints are shared because they don't change.
+		constraints:          m.constraints,
+		tpKeyToCriticalPaths: make(map[string]*criticalPaths, len(m.tpKeyToCriticalPaths)),
+		tpPairToMatchNum:     make(map[topologyPair]int32, len(m.tpPairToMatchNum)),
+	}
+	for tpKey, paths := range m.tpKeyToCriticalPaths {
+		cp.tpKeyToCriticalPaths[tpKey] = &criticalPaths{paths[0], paths[1]}
+	}
+	for tpPair, matchNum := range m.tpPairToMatchNum {
+		copyPair := topologyPair{key: tpPair.key, value: tpPair.value}
+		cp.tpPairToMatchNum[copyPair] = matchNum
+	}
+	return &cp
+}
+
 // RemovePod changes predicateMetadata assuming that the given `deletedPod` is
 // deleted from the system.
-func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
+func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod, node *v1.Node) error {
 	deletedPodFullName := schedutil.GetPodFullName(deletedPod)
 	if deletedPodFullName == schedutil.GetPodFullName(meta.pod) {
 		return fmt.Errorf("deletedPod and meta.pod must not be the same")
 	}
-	meta.topologyPairsAntiAffinityPodsMap.removePod(deletedPod)
-	// Delete pod from the matching affinity or anti-affinity topology pairs maps.
-	meta.topologyPairsPotentialAffinityPods.removePod(deletedPod)
-	meta.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod)
-	// All pods in the serviceAffinityMatchingPodList are in the same namespace.
-	// So, if the namespace of the first one is not the same as the namespace of the
-	// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
-	if meta.serviceAffinityInUse &&
-		len(meta.serviceAffinityMatchingPodList) > 0 &&
-		deletedPod.Namespace == meta.serviceAffinityMatchingPodList[0].Namespace {
-		for i, pod := range meta.serviceAffinityMatchingPodList {
-			if schedutil.GetPodFullName(pod) == deletedPodFullName {
-				meta.serviceAffinityMatchingPodList = append(
-					meta.serviceAffinityMatchingPodList[:i],
-					meta.serviceAffinityMatchingPodList[i+1:]...)
-				break
-			}
-		}
-	}
+	meta.podAffinityMetadata.removePod(deletedPod)
+	meta.evenPodsSpreadMetadata.removePod(deletedPod, meta.pod, node)
+	meta.serviceAffinityMetadata.removePod(deletedPod, node)
+
 	return nil
 }

-// AddPod changes predicateMetadata assuming that `newPod` is added to the
+// AddPod changes predicateMetadata assuming that the given `addedPod` is added to the
 // system.
-func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) error {
+func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, node *v1.Node) error {
 	addedPodFullName := schedutil.GetPodFullName(addedPod)
 	if addedPodFullName == schedutil.GetPodFullName(meta.pod) {
 		return fmt.Errorf("addedPod and meta.pod must not be the same")
 	}
-	if nodeInfo.Node() == nil {
-		return fmt.Errorf("invalid node in nodeInfo")
+	if node == nil {
+		return fmt.Errorf("node not found")
 	}
-	// Add matching anti-affinity terms of the addedPod to the map.
-	topologyPairsMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(meta.pod, addedPod, nodeInfo.Node())
-	if err != nil {
+
+	if err := meta.podAffinityMetadata.addPod(addedPod, meta.pod, node); err != nil {
 		return err
 	}
-	meta.topologyPairsAntiAffinityPodsMap.appendMaps(topologyPairsMaps)
-	// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
-	affinity := meta.pod.Spec.Affinity
-	podNodeName := addedPod.Spec.NodeName
-	if affinity != nil && len(podNodeName) > 0 {
-		podNode := nodeInfo.Node()
-		// It is assumed that when the added pod matches affinity of the meta.pod, all the terms must match,
-		// this should be changed when the implementation of targetPodMatchesAffinityOfPod/podMatchesAffinityTermProperties
-		// is changed
-		if targetPodMatchesAffinityOfPod(meta.pod, addedPod) {
-			affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
-			for _, term := range affinityTerms {
-				if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
-					pair := topologyPair{key: term.TopologyKey, value: topologyValue}
-					meta.topologyPairsPotentialAffinityPods.addTopologyPair(pair, addedPod)
-				}
-			}
-		}
-		if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) {
-			antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
-			for _, term := range antiAffinityTerms {
-				if topologyValue, ok := podNode.Labels[term.TopologyKey]; ok {
-					pair := topologyPair{key: term.TopologyKey, value: topologyValue}
-					meta.topologyPairsPotentialAntiAffinityPods.addTopologyPair(pair, addedPod)
-				}
-			}
-		}
-	}
-	// If addedPod is in the same namespace as the meta.pod, update the list
-	// of matching pods if applicable.
-	if meta.serviceAffinityInUse && addedPod.Namespace == meta.pod.Namespace {
-		selector := CreateSelectorFromLabels(meta.pod.Labels)
-		if selector.Matches(labels.Set(addedPod.Labels)) {
-			meta.serviceAffinityMatchingPodList = append(meta.serviceAffinityMatchingPodList,
-				addedPod)
-		}
-	}
+	// Update meta.evenPodsSpreadMetadata if meta.pod has hard spread constraints
+	// and addedPod matches that
+	meta.evenPodsSpreadMetadata.addPod(addedPod, meta.pod, node)
+
+	meta.serviceAffinityMetadata.addPod(addedPod, meta.pod, node)
+
 	return nil
 }

 // ShallowCopy copies a metadata struct into a new struct and creates a copy of
 // its maps and slices, but it does not copy the contents of pointer values.
-func (meta *predicateMetadata) ShallowCopy() PredicateMetadata {
+func (meta *predicateMetadata) ShallowCopy() Metadata {
 	newPredMeta := &predicateMetadata{
-		pod:                      meta.pod,
-		podBestEffort:            meta.podBestEffort,
-		podRequest:               meta.podRequest,
-		serviceAffinityInUse:     meta.serviceAffinityInUse,
-		ignoredExtendedResources: meta.ignoredExtendedResources,
+		pod:           meta.pod,
+		podBestEffort: meta.podBestEffort,
 	}
-	newPredMeta.podPorts = append([]*v1.ContainerPort(nil), meta.podPorts...)
-	newPredMeta.topologyPairsPotentialAffinityPods = newTopologyPairsMaps()
-	newPredMeta.topologyPairsPotentialAffinityPods.appendMaps(meta.topologyPairsPotentialAffinityPods)
-	newPredMeta.topologyPairsPotentialAntiAffinityPods = newTopologyPairsMaps()
-	newPredMeta.topologyPairsPotentialAntiAffinityPods.appendMaps(meta.topologyPairsPotentialAntiAffinityPods)
-	newPredMeta.topologyPairsAntiAffinityPodsMap = newTopologyPairsMaps()
-	newPredMeta.topologyPairsAntiAffinityPodsMap.appendMaps(meta.topologyPairsAntiAffinityPodsMap)
-	newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
-		meta.serviceAffinityMatchingPodServices...)
-	newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
-		meta.serviceAffinityMatchingPodList...)
-	return (PredicateMetadata)(newPredMeta)
+	newPredMeta.podFitsHostPortsMetadata = meta.podFitsHostPortsMetadata.clone()
+	newPredMeta.podAffinityMetadata = meta.podAffinityMetadata.clone()
+	newPredMeta.evenPodsSpreadMetadata = meta.evenPodsSpreadMetadata.clone()
+	newPredMeta.serviceAffinityMetadata = meta.serviceAffinityMetadata.clone()
+	newPredMeta.podFitsResourcesMetadata = meta.podFitsResourcesMetadata.clone()
+	return (Metadata)(newPredMeta)
 }

 type affinityTermProperties struct {
@ -365,15 +730,9 @@ func podMatchesAnyAffinityTermProperties(pod *v1.Pod, properties []*affinityTerm
 // getTPMapMatchingExistingAntiAffinity calculates the following for each existing pod on each node:
 // (1) Whether it has PodAntiAffinity
 // (2) Whether any AffinityTerm matches the incoming pod
-func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulernodeinfo.NodeInfo) (*topologyPairsMaps, error) {
-	allNodeNames := make([]string, 0, len(nodeInfoMap))
-	for name := range nodeInfoMap {
-		allNodeNames = append(allNodeNames, name)
-	}
-
+func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (*topologyPairsMaps, error) {
+	errCh := schedutil.NewErrorChannel()
 	var lock sync.Mutex
-	var firstError error
-
 	topologyMaps := newTopologyPairsMaps()

 	appendTopologyPairsMaps := func(toAppend *topologyPairsMaps) {
@ -381,54 +740,48 @@ func getTPMapMatchingExistingAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*s
 		defer lock.Unlock()
 		topologyMaps.appendMaps(toAppend)
 	}
-	catchError := func(err error) {
-		lock.Lock()
-		defer lock.Unlock()
-		if firstError == nil {
-			firstError = err
-		}
-	}

 	ctx, cancel := context.WithCancel(context.Background())

 	processNode := func(i int) {
-		nodeInfo := nodeInfoMap[allNodeNames[i]]
+		nodeInfo := allNodes[i]
 		node := nodeInfo.Node()
 		if node == nil {
-			catchError(fmt.Errorf("node not found"))
+			klog.Error("node not found")
 			return
 		}
 		for _, existingPod := range nodeInfo.PodsWithAffinity() {
 			existingPodTopologyMaps, err := getMatchingAntiAffinityTopologyPairsOfPod(pod, existingPod, node)
 			if err != nil {
-				catchError(err)
-				cancel()
+				errCh.SendErrorWithCancel(err, cancel)
 				return
 			}
-			appendTopologyPairsMaps(existingPodTopologyMaps)
+			if existingPodTopologyMaps != nil {
+				appendTopologyPairsMaps(existingPodTopologyMaps)
+			}
 		}
 	}
-	workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processNode)
-	return topologyMaps, firstError
+	workqueue.ParallelizeUntil(ctx, 16, len(allNodes), processNode)
+
+	if err := errCh.ReceiveError(); err != nil {
+		return nil, err
+	}
+
+	return topologyMaps, nil
 }

 // getTPMapMatchingIncomingAffinityAntiAffinity finds existing Pods that match affinity terms of the given "pod".
 // It returns a topologyPairsMaps that are checked later by the affinity
 // predicate. With this topologyPairsMaps available, the affinity predicate does not
 // need to check all the pods in the cluster.
-func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulernodeinfo.NodeInfo) (topologyPairsAffinityPodsMaps *topologyPairsMaps, topologyPairsAntiAffinityPodsMaps *topologyPairsMaps, err error) {
+func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (topologyPairsAffinityPodsMaps *topologyPairsMaps, topologyPairsAntiAffinityPodsMaps *topologyPairsMaps, err error) {
 	affinity := pod.Spec.Affinity
 	if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
 		return newTopologyPairsMaps(), newTopologyPairsMaps(), nil
 	}

-	allNodeNames := make([]string, 0, len(nodeInfoMap))
-	for name := range nodeInfoMap {
-		allNodeNames = append(allNodeNames, name)
-	}
-
+	errCh := schedutil.NewErrorChannel()
 	var lock sync.Mutex
-	var firstError error
 	topologyPairsAffinityPodsMaps = newTopologyPairsMaps()
 	topologyPairsAntiAffinityPodsMaps = newTopologyPairsMaps()
 	appendResult := func(nodeName string, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps *topologyPairsMaps) {
@ -442,28 +795,21 @@ func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[s
 		}
 	}

-	catchError := func(err error) {
-		lock.Lock()
-		defer lock.Unlock()
-		if firstError == nil {
-			firstError = err
-		}
-	}
-
 	affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
 	affinityProperties, err := getAffinityTermProperties(pod, affinityTerms)
 	if err != nil {
 		return nil, nil, err
 	}
+
 	antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)

 	ctx, cancel := context.WithCancel(context.Background())

 	processNode := func(i int) {
-		nodeInfo := nodeInfoMap[allNodeNames[i]]
+		nodeInfo := allNodes[i]
 		node := nodeInfo.Node()
 		if node == nil {
-			catchError(fmt.Errorf("nodeInfo.Node is nil"))
+			klog.Error("node not found")
 			return
 		}
 		nodeTopologyPairsAffinityPodsMaps := newTopologyPairsMaps()
@ -483,8 +829,7 @@ func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[s
 				namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
 				selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
 				if err != nil {
-					catchError(err)
-					cancel()
+					errCh.SendErrorWithCancel(err, cancel)
 					return
 				}
 				if priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
@ -495,12 +840,18 @@ func getTPMapMatchingIncomingAffinityAntiAffinity(pod *v1.Pod, nodeInfoMap map[s
 				}
 			}
 		}
+
 		if len(nodeTopologyPairsAffinityPodsMaps.topologyPairToPods) > 0 || len(nodeTopologyPairsAntiAffinityPodsMaps.topologyPairToPods) > 0 {
 			appendResult(node.Name, nodeTopologyPairsAffinityPodsMaps, nodeTopologyPairsAntiAffinityPodsMaps)
 		}
 	}
-	workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processNode)
-	return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, firstError
+	workqueue.ParallelizeUntil(ctx, 16, len(allNodes), processNode)
+
+	if err := errCh.ReceiveError(); err != nil {
+		return nil, nil, err
+	}
+
+	return topologyPairsAffinityPodsMaps, topologyPairsAntiAffinityPodsMaps, nil
 }

 // targetPodMatchesAffinityOfPod returns true if "targetPod" matches ALL affinity terms of
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/predicates.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/predicates.go
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/testing_helper.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/testing_helper.go
@ -1,85 +0,0 @@
-/*
-Copyright 2017 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package predicates
-
-import (
-	"fmt"
-
-	"k8s.io/api/core/v1"
-	storagev1 "k8s.io/api/storage/v1"
-)
-
-// FakePersistentVolumeClaimInfo declares a []v1.PersistentVolumeClaim type for testing.
-type FakePersistentVolumeClaimInfo []v1.PersistentVolumeClaim
-
-// GetPersistentVolumeClaimInfo gets PVC matching the namespace and PVC ID.
-func (pvcs FakePersistentVolumeClaimInfo) GetPersistentVolumeClaimInfo(namespace string, pvcID string) (*v1.PersistentVolumeClaim, error) {
-	for _, pvc := range pvcs {
-		if pvc.Name == pvcID && pvc.Namespace == namespace {
-			return &pvc, nil
-		}
-	}
-	return nil, fmt.Errorf("Unable to find persistent volume claim: %s/%s", namespace, pvcID)
-}
-
-// FakeNodeInfo declares a v1.Node type for testing.
-type FakeNodeInfo v1.Node
-
-// GetNodeInfo return a fake node info object.
-func (n FakeNodeInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
-	node := v1.Node(n)
-	return &node, nil
-}
-
-// FakeNodeListInfo declares a []v1.Node type for testing.
-type FakeNodeListInfo []v1.Node
-
-// GetNodeInfo returns a fake node object in the fake nodes.
-func (nodes FakeNodeListInfo) GetNodeInfo(nodeName string) (*v1.Node, error) {
-	for _, node := range nodes {
-		if node.Name == nodeName {
-			return &node, nil
-		}
-	}
-	return nil, fmt.Errorf("Unable to find node: %s", nodeName)
-}
-
-// FakePersistentVolumeInfo declares a []v1.PersistentVolume type for testing.
-type FakePersistentVolumeInfo []v1.PersistentVolume
-
-// GetPersistentVolumeInfo returns a fake PV object in the fake PVs by PV ID.
-func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*v1.PersistentVolume, error) {
-	for _, pv := range pvs {
-		if pv.Name == pvID {
-			return &pv, nil
-		}
-	}
-	return nil, fmt.Errorf("Unable to find persistent volume: %s", pvID)
-}
-
-// FakeStorageClassInfo declares a []storagev1.StorageClass type for testing.
-type FakeStorageClassInfo []storagev1.StorageClass
-
-// GetStorageClassInfo returns a fake storage class object in the fake storage classes by name.
-func (classes FakeStorageClassInfo) GetStorageClassInfo(name string) (*storagev1.StorageClass, error) {
-	for _, sc := range classes {
-		if sc.Name == name {
-			return &sc, nil
-		}
-	}
-	return nil, fmt.Errorf("Unable to find storage class: %s", name)
-}
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/utils.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/predicates/utils.go
@ -17,8 +17,15 @@ limitations under the License.
 package predicates

 import (
-	"k8s.io/api/core/v1"
+	"strings"
+
+	v1 "k8s.io/api/core/v1"
+	storagev1 "k8s.io/api/storage/v1"
 	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/util/sets"
+	utilfeature "k8s.io/apiserver/pkg/util/feature"
+	csilibplugins "k8s.io/csi-translation-lib/plugins"
+	"k8s.io/kubernetes/pkg/features"
 	schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
 )

@ -60,7 +67,7 @@ func FilterPodsByNamespace(pods []*v1.Pod, ns string) []*v1.Pod {

 // CreateSelectorFromLabels is used to define a selector that corresponds to the keys in a map.
 func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
-	if aL == nil || len(aL) == 0 {
+	if len(aL) == 0 {
 		return labels.Everything()
 	}
 	return labels.Set(aL).AsSelector()
@ -87,3 +94,56 @@ func SetPredicatesOrderingDuringTest(value []string) func() {
 		predicatesOrdering = origVal
 	}
 }
+
+// isCSIMigrationOn returns a boolean value indicating whether
+// the CSI migration has been enabled for a particular storage plugin.
+func isCSIMigrationOn(csiNode *storagev1.CSINode, pluginName string) bool {
+	if csiNode == nil || len(pluginName) == 0 {
+		return false
+	}
+
+	// In-tree storage to CSI driver migration feature should be enabled,
+	// along with the plugin-specific one
+	if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigration) {
+		return false
+	}
+
+	switch pluginName {
+	case csilibplugins.AWSEBSInTreePluginName:
+		if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationAWS) {
+			return false
+		}
+	case csilibplugins.GCEPDInTreePluginName:
+		if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationGCE) {
+			return false
+		}
+	case csilibplugins.AzureDiskInTreePluginName:
+		if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationAzureDisk) {
+			return false
+		}
+	case csilibplugins.CinderInTreePluginName:
+		if !utilfeature.DefaultFeatureGate.Enabled(features.CSIMigrationOpenStack) {
+			return false
+		}
+	default:
+		return false
+	}
+
+	// The plugin name should be listed in the CSINode object annotation.
+	// This indicates that the plugin has been migrated to a CSI driver in the node.
+	csiNodeAnn := csiNode.GetAnnotations()
+	if csiNodeAnn == nil {
+		return false
+	}
+
+	var mpaSet sets.String
+	mpa := csiNodeAnn[v1.MigratedPluginsAnnotationKey]
+	if len(mpa) == 0 {
+		mpaSet = sets.NewString()
+	} else {
+		tok := strings.Split(mpa, ",")
+		mpaSet = sets.NewString(tok...)
+	}
+
+	return mpaSet.Has(pluginName)
+}
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util/non_zero.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util/non_zero.go
@ -16,7 +16,10 @@ limitations under the License.

 package util

-import "k8s.io/api/core/v1"
+import (
+	v1 "k8s.io/api/core/v1"
+	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
+)

 // For each of these resources, a pod that doesn't request the resource explicitly
 // will be treated as having requested the amount indicated below, for the purpose
@ -26,27 +29,50 @@ import "k8s.io/api/core/v1"
 // consuming no resources whatsoever. We chose these values to be similar to the
 // resources that we give to cluster addon pods (#10653). But they are pretty arbitrary.
 // As described in #11713, we use request instead of limit to deal with resource requirements.
+const (
+	// DefaultMilliCPURequest defines default milli cpu request number.
+	DefaultMilliCPURequest int64 = 100 // 0.1 core
+	// DefaultMemoryRequest defines default memory request size.
+	DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
+)

-// DefaultMilliCPURequest defines default milli cpu request number.
-const DefaultMilliCPURequest int64 = 100 // 0.1 core
-// DefaultMemoryRequest defines default memory request size.
-const DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
-
-// GetNonzeroRequests returns the default resource request if none is found or
+// GetNonzeroRequests returns the default cpu and memory resource request if none is found or
 // what is provided on the request.
 func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
-	var outMilliCPU, outMemory int64
-	// Override if un-set, but not if explicitly set to zero
-	if _, found := (*requests)[v1.ResourceCPU]; !found {
-		outMilliCPU = DefaultMilliCPURequest
-	} else {
-		outMilliCPU = requests.Cpu().MilliValue()
-	}
-	// Override if un-set, but not if explicitly set to zero
-	if _, found := (*requests)[v1.ResourceMemory]; !found {
-		outMemory = DefaultMemoryRequest
-	} else {
-		outMemory = requests.Memory().Value()
-	}
-	return outMilliCPU, outMemory
+	return GetNonzeroRequestForResource(v1.ResourceCPU, requests),
+		GetNonzeroRequestForResource(v1.ResourceMemory, requests)
+}
+
+// GetNonzeroRequestForResource returns the default resource request if none is found or
+// what is provided on the request.
+func GetNonzeroRequestForResource(resource v1.ResourceName, requests *v1.ResourceList) int64 {
+	switch resource {
+	case v1.ResourceCPU:
+		// Override if un-set, but not if explicitly set to zero
+		if _, found := (*requests)[v1.ResourceCPU]; !found {
+			return DefaultMilliCPURequest
+		}
+		return requests.Cpu().MilliValue()
+	case v1.ResourceMemory:
+		// Override if un-set, but not if explicitly set to zero
+		if _, found := (*requests)[v1.ResourceMemory]; !found {
+			return DefaultMemoryRequest
+		}
+		return requests.Memory().Value()
+	case v1.ResourceEphemeralStorage:
+		quantity, found := (*requests)[v1.ResourceEphemeralStorage]
+		if !found {
+			return 0
+		}
+		return quantity.Value()
+	default:
+		if v1helper.IsScalarResourceName(resource) {
+			quantity, found := (*requests)[resource]
+			if !found {
+				return 0
+			}
+			return quantity.Value()
+		}
+	}
+	return 0
 }
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/scheduler_interface.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/scheduler_interface.go
@ -18,7 +18,7 @@ package algorithm

 import (
 	"k8s.io/api/core/v1"
-	schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
+	extenderv1 "k8s.io/kubernetes/pkg/scheduler/apis/extender/v1"
 	schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
 )

@ -34,12 +34,12 @@ type SchedulerExtender interface {
 	// the list of failed nodes and failure reasons.
 	Filter(pod *v1.Pod,
 		nodes []*v1.Node, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo,
-	) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
+	) (filteredNodes []*v1.Node, failedNodesMap extenderv1.FailedNodesMap, err error)

 	// Prioritize based on extender-implemented priority functions. The returned scores & weight
 	// are used to compute the weighted score for an extender. The weighted scores are added to
-	// the scores computed  by Kubernetes scheduler. The total scores are used to do the host selection.
-	Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *schedulerapi.HostPriorityList, weight int, err error)
+	// the scores computed by Kubernetes scheduler. The total scores are used to do the host selection.
+	Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *extenderv1.HostPriorityList, weight int64, err error)

 	// Bind delegates the action of binding a pod to a node to the extender.
 	Bind(binding *v1.Binding) error
@ -61,9 +61,9 @@ type SchedulerExtender interface {
 	//   2. A different set of victim pod for every given candidate node after preemption phase of extender.
 	ProcessPreemption(
 		pod *v1.Pod,
-		nodeToVictims map[*v1.Node]*schedulerapi.Victims,
+		nodeToVictims map[*v1.Node]*extenderv1.Victims,
 		nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo,
-	) (map[*v1.Node]*schedulerapi.Victims, error)
+	) (map[*v1.Node]*extenderv1.Victims, error)

 	// SupportsPreemption returns if the scheduler extender support preemption or not.
 	SupportsPreemption() bool
--- a/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/types.go
+++ b/vendor/k8s.io/kubernetes/pkg/scheduler/algorithm/types.go
@ -17,68 +17,22 @@ limitations under the License.
 package algorithm

 import (
-	apps "k8s.io/api/apps/v1"
-	"k8s.io/api/core/v1"
-	policyv1beta1 "k8s.io/api/policy/v1beta1"
+	appsv1 "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/labels"
-	schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
+	appslisters "k8s.io/client-go/listers/apps/v1"
+	corelisters "k8s.io/client-go/listers/core/v1"
+	"k8s.io/kubernetes/pkg/apis/apps"
+	api "k8s.io/kubernetes/pkg/apis/core"
 )

-// NodeFieldSelectorKeys is a map that: the key are node field selector keys; the values are
+// NodeFieldSelectorKeys is a map that: the keys are node field selector keys; the values are
 // the functions to get the value of the node field.
 var NodeFieldSelectorKeys = map[string]func(*v1.Node) string{
-	schedulerapi.NodeFieldSelectorKeyNodeName: func(n *v1.Node) string { return n.Name },
+	api.ObjectNameField: func(n *v1.Node) string { return n.Name },
 }

-// NodeLister interface represents anything that can list nodes for a scheduler.
-type NodeLister interface {
-	// We explicitly return []*v1.Node, instead of v1.NodeList, to avoid
-	// performing expensive copies that are unneeded.
-	List() ([]*v1.Node, error)
-}
-
-// PodFilter is a function to filter a pod. If pod passed return true else return false.
-type PodFilter func(*v1.Pod) bool
-
-// PodLister interface represents anything that can list pods for a scheduler.
-type PodLister interface {
-	// We explicitly return []*v1.Pod, instead of v1.PodList, to avoid
-	// performing expensive copies that are unneeded.
-	List(labels.Selector) ([]*v1.Pod, error)
-	// This is similar to "List()", but the returned slice does not
-	// contain pods that don't pass `podFilter`.
-	FilteredList(podFilter PodFilter, selector labels.Selector) ([]*v1.Pod, error)
-}
-
-// ServiceLister interface represents anything that can produce a list of services; the list is consumed by a scheduler.
-type ServiceLister interface {
-	// Lists all the services
-	List(labels.Selector) ([]*v1.Service, error)
-	// Gets the services for the given pod
-	GetPodServices(*v1.Pod) ([]*v1.Service, error)
-}
-
-// ControllerLister interface represents anything that can produce a list of ReplicationController; the list is consumed by a scheduler.
-type ControllerLister interface {
-	// Lists all the replication controllers
-	List(labels.Selector) ([]*v1.ReplicationController, error)
-	// Gets the services for the given pod
-	GetPodControllers(*v1.Pod) ([]*v1.ReplicationController, error)
-}
-
-// ReplicaSetLister interface represents anything that can produce a list of ReplicaSet; the list is consumed by a scheduler.
-type ReplicaSetLister interface {
-	// Gets the replicasets for the given pod
-	GetPodReplicaSets(*v1.Pod) ([]*apps.ReplicaSet, error)
-}
-
-// PDBLister interface represents anything that can list PodDisruptionBudget objects.
-type PDBLister interface {
-	// List() returns a list of PodDisruptionBudgets matching the selector.
-	List(labels.Selector) ([]*policyv1beta1.PodDisruptionBudget, error)
-}
-
-var _ ControllerLister = &EmptyControllerLister{}
+var _ corelisters.ReplicationControllerLister = &EmptyControllerLister{}

 // EmptyControllerLister implements ControllerLister on []v1.ReplicationController returning empty data
 type EmptyControllerLister struct{}
@ -93,28 +47,53 @@ func (f EmptyControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1
 	return nil, nil
 }

-var _ ReplicaSetLister = &EmptyReplicaSetLister{}
+// ReplicationControllers returns nil
+func (f EmptyControllerLister) ReplicationControllers(namespace string) corelisters.ReplicationControllerNamespaceLister {
+	return nil
+}
+
+var _ appslisters.ReplicaSetLister = &EmptyReplicaSetLister{}

 // EmptyReplicaSetLister implements ReplicaSetLister on []extensions.ReplicaSet returning empty data
 type EmptyReplicaSetLister struct{}

-// GetPodReplicaSets returns nil
-func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*apps.ReplicaSet, err error) {
+// List returns nil
+func (f EmptyReplicaSetLister) List(labels.Selector) ([]*appsv1.ReplicaSet, error) {
 	return nil, nil
 }

+// GetPodReplicaSets returns nil
+func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*appsv1.ReplicaSet, err error) {
+	return nil, nil
+}
+
+// ReplicaSets returns nil
+func (f EmptyReplicaSetLister) ReplicaSets(namespace string) appslisters.ReplicaSetNamespaceLister {
+	return nil
+}
+
 // StatefulSetLister interface represents anything that can produce a list of StatefulSet; the list is consumed by a scheduler.
 type StatefulSetLister interface {
 	// Gets the StatefulSet for the given pod.
 	GetPodStatefulSets(*v1.Pod) ([]*apps.StatefulSet, error)
 }

-var _ StatefulSetLister = &EmptyStatefulSetLister{}
+var _ appslisters.StatefulSetLister = &EmptyStatefulSetLister{}

 // EmptyStatefulSetLister implements StatefulSetLister on []apps.StatefulSet returning empty data.
 type EmptyStatefulSetLister struct{}

-// GetPodStatefulSets of EmptyStatefulSetLister returns nil.
-func (f EmptyStatefulSetLister) GetPodStatefulSets(pod *v1.Pod) (sss []*apps.StatefulSet, err error) {
+// List returns nil
+func (f EmptyStatefulSetLister) List(labels.Selector) ([]*appsv1.StatefulSet, error) {
 	return nil, nil
 }
+
+// GetPodStatefulSets of EmptyStatefulSetLister returns nil.
+func (f EmptyStatefulSetLister) GetPodStatefulSets(pod *v1.Pod) (sss []*appsv1.StatefulSet, err error) {
+	return nil, nil
+}
+
+// StatefulSets returns nil
+func (f EmptyStatefulSetLister) StatefulSets(namespace string) appslisters.StatefulSetNamespaceLister {
+	return nil
+}