vendor update for CSI 0.3.0

This commit is contained in:
gman
2018-07-18 16:47:22 +02:00
parent 6f484f92fc
commit 8ea659f0d5
6810 changed files with 438061 additions and 193861 deletions

64
vendor/k8s.io/kubernetes/pkg/scheduler/cache/BUILD generated vendored Normal file
View File

@ -0,0 +1,64 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = [
"cache.go",
"interface.go",
"node_info.go",
"util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/cache",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"cache_test.go",
"node_info_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/util/parsers:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

510
vendor/k8s.io/kubernetes/pkg/scheduler/cache/cache.go generated vendored Normal file
View File

@ -0,0 +1,510 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import (
"fmt"
"sync"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"github.com/golang/glog"
policy "k8s.io/api/policy/v1beta1"
)
var (
cleanAssumedPeriod = 1 * time.Second
)
// New returns a Cache implementation.
// It automatically starts a go routine that manages expiration of assumed pods.
// "ttl" is how long the assumed pod will get expired.
// "stop" is the channel that would close the background goroutine.
func New(ttl time.Duration, stop <-chan struct{}) Cache {
cache := newSchedulerCache(ttl, cleanAssumedPeriod, stop)
cache.run()
return cache
}
type schedulerCache struct {
stop <-chan struct{}
ttl time.Duration
period time.Duration
// This mutex guards all fields within this cache struct.
mu sync.Mutex
// a set of assumed pod keys.
// The key could further be used to get an entry in podStates.
assumedPods map[string]bool
// a map from pod key to podState.
podStates map[string]*podState
nodes map[string]*NodeInfo
pdbs map[string]*policy.PodDisruptionBudget
}
type podState struct {
pod *v1.Pod
// Used by assumedPod to determinate expiration.
deadline *time.Time
// Used to block cache from expiring assumedPod if binding still runs
bindingFinished bool
}
func newSchedulerCache(ttl, period time.Duration, stop <-chan struct{}) *schedulerCache {
return &schedulerCache{
ttl: ttl,
period: period,
stop: stop,
nodes: make(map[string]*NodeInfo),
assumedPods: make(map[string]bool),
podStates: make(map[string]*podState),
pdbs: make(map[string]*policy.PodDisruptionBudget),
}
}
// Snapshot takes a snapshot of the current schedulerCache. The method has performance impact,
// and should be only used in non-critical path.
func (cache *schedulerCache) Snapshot() *Snapshot {
cache.mu.Lock()
defer cache.mu.Unlock()
nodes := make(map[string]*NodeInfo)
for k, v := range cache.nodes {
nodes[k] = v.Clone()
}
assumedPods := make(map[string]bool)
for k, v := range cache.assumedPods {
assumedPods[k] = v
}
pdbs := make(map[string]*policy.PodDisruptionBudget)
for k, v := range cache.pdbs {
pdbs[k] = v.DeepCopy()
}
return &Snapshot{
Nodes: nodes,
AssumedPods: assumedPods,
Pdbs: pdbs,
}
}
func (cache *schedulerCache) UpdateNodeNameToInfoMap(nodeNameToInfo map[string]*NodeInfo) error {
cache.mu.Lock()
defer cache.mu.Unlock()
for name, info := range cache.nodes {
if utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && info.TransientInfo != nil {
// Transient scheduler info is reset here.
info.TransientInfo.resetTransientSchedulerInfo()
}
if current, ok := nodeNameToInfo[name]; !ok || current.generation != info.generation {
nodeNameToInfo[name] = info.Clone()
}
}
for name := range nodeNameToInfo {
if _, ok := cache.nodes[name]; !ok {
delete(nodeNameToInfo, name)
}
}
return nil
}
func (cache *schedulerCache) List(selector labels.Selector) ([]*v1.Pod, error) {
alwaysTrue := func(p *v1.Pod) bool { return true }
return cache.FilteredList(alwaysTrue, selector)
}
func (cache *schedulerCache) FilteredList(podFilter PodFilter, selector labels.Selector) ([]*v1.Pod, error) {
cache.mu.Lock()
defer cache.mu.Unlock()
// podFilter is expected to return true for most or all of the pods. We
// can avoid expensive array growth without wasting too much memory by
// pre-allocating capacity.
maxSize := 0
for _, info := range cache.nodes {
maxSize += len(info.pods)
}
pods := make([]*v1.Pod, 0, maxSize)
for _, info := range cache.nodes {
for _, pod := range info.pods {
if podFilter(pod) && selector.Matches(labels.Set(pod.Labels)) {
pods = append(pods, pod)
}
}
}
return pods, nil
}
func (cache *schedulerCache) AssumePod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
if _, ok := cache.podStates[key]; ok {
return fmt.Errorf("pod %v is in the cache, so can't be assumed", key)
}
cache.addPod(pod)
ps := &podState{
pod: pod,
}
cache.podStates[key] = ps
cache.assumedPods[key] = true
return nil
}
func (cache *schedulerCache) FinishBinding(pod *v1.Pod) error {
return cache.finishBinding(pod, time.Now())
}
// finishBinding exists to make tests determinitistic by injecting now as an argument
func (cache *schedulerCache) finishBinding(pod *v1.Pod, now time.Time) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
glog.V(5).Infof("Finished binding for pod %v. Can be expired.", key)
currState, ok := cache.podStates[key]
if ok && cache.assumedPods[key] {
dl := now.Add(cache.ttl)
currState.bindingFinished = true
currState.deadline = &dl
}
return nil
}
func (cache *schedulerCache) ForgetPod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
if ok && currState.pod.Spec.NodeName != pod.Spec.NodeName {
return fmt.Errorf("pod %v was assumed on %v but assigned to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
}
switch {
// Only assumed pod can be forgotten.
case ok && cache.assumedPods[key]:
err := cache.removePod(pod)
if err != nil {
return err
}
delete(cache.assumedPods, key)
delete(cache.podStates, key)
default:
return fmt.Errorf("pod %v wasn't assumed so cannot be forgotten", key)
}
return nil
}
// Assumes that lock is already acquired.
func (cache *schedulerCache) addPod(pod *v1.Pod) {
n, ok := cache.nodes[pod.Spec.NodeName]
if !ok {
n = NewNodeInfo()
cache.nodes[pod.Spec.NodeName] = n
}
n.AddPod(pod)
}
// Assumes that lock is already acquired.
func (cache *schedulerCache) updatePod(oldPod, newPod *v1.Pod) error {
if err := cache.removePod(oldPod); err != nil {
return err
}
cache.addPod(newPod)
return nil
}
// Assumes that lock is already acquired.
func (cache *schedulerCache) removePod(pod *v1.Pod) error {
n := cache.nodes[pod.Spec.NodeName]
if err := n.RemovePod(pod); err != nil {
return err
}
if len(n.pods) == 0 && n.node == nil {
delete(cache.nodes, pod.Spec.NodeName)
}
return nil
}
func (cache *schedulerCache) AddPod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
switch {
case ok && cache.assumedPods[key]:
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
// The pod was added to a different node than it was assumed to.
glog.Warningf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
// Clean this up.
cache.removePod(currState.pod)
cache.addPod(pod)
}
delete(cache.assumedPods, key)
cache.podStates[key].deadline = nil
cache.podStates[key].pod = pod
case !ok:
// Pod was expired. We should add it back.
cache.addPod(pod)
ps := &podState{
pod: pod,
}
cache.podStates[key] = ps
default:
return fmt.Errorf("pod %v was already in added state", key)
}
return nil
}
func (cache *schedulerCache) UpdatePod(oldPod, newPod *v1.Pod) error {
key, err := getPodKey(oldPod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
switch {
// An assumed pod won't have Update/Remove event. It needs to have Add event
// before Update event, in which case the state would change from Assumed to Added.
case ok && !cache.assumedPods[key]:
if currState.pod.Spec.NodeName != newPod.Spec.NodeName {
glog.Errorf("Pod %v updated on a different node than previously added to.", key)
glog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
}
if err := cache.updatePod(oldPod, newPod); err != nil {
return err
}
default:
return fmt.Errorf("pod %v is not added to scheduler cache, so cannot be updated", key)
}
return nil
}
func (cache *schedulerCache) RemovePod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
switch {
// An assumed pod won't have Delete/Remove event. It needs to have Add event
// before Remove event, in which case the state would change from Assumed to Added.
case ok && !cache.assumedPods[key]:
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
glog.Errorf("Pod %v was assumed to be on %v but got added to %v", key, pod.Spec.NodeName, currState.pod.Spec.NodeName)
glog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
}
err := cache.removePod(currState.pod)
if err != nil {
return err
}
delete(cache.podStates, key)
default:
return fmt.Errorf("pod %v is not found in scheduler cache, so cannot be removed from it", key)
}
return nil
}
func (cache *schedulerCache) IsAssumedPod(pod *v1.Pod) (bool, error) {
key, err := getPodKey(pod)
if err != nil {
return false, err
}
cache.mu.Lock()
defer cache.mu.Unlock()
b, found := cache.assumedPods[key]
if !found {
return false, nil
}
return b, nil
}
func (cache *schedulerCache) GetPod(pod *v1.Pod) (*v1.Pod, error) {
key, err := getPodKey(pod)
if err != nil {
return nil, err
}
cache.mu.Lock()
defer cache.mu.Unlock()
podState, ok := cache.podStates[key]
if !ok {
return nil, fmt.Errorf("pod %v does not exist in scheduler cache", key)
}
return podState.pod, nil
}
func (cache *schedulerCache) AddNode(node *v1.Node) error {
cache.mu.Lock()
defer cache.mu.Unlock()
n, ok := cache.nodes[node.Name]
if !ok {
n = NewNodeInfo()
cache.nodes[node.Name] = n
}
return n.SetNode(node)
}
func (cache *schedulerCache) UpdateNode(oldNode, newNode *v1.Node) error {
cache.mu.Lock()
defer cache.mu.Unlock()
n, ok := cache.nodes[newNode.Name]
if !ok {
n = NewNodeInfo()
cache.nodes[newNode.Name] = n
}
return n.SetNode(newNode)
}
func (cache *schedulerCache) RemoveNode(node *v1.Node) error {
cache.mu.Lock()
defer cache.mu.Unlock()
n := cache.nodes[node.Name]
if err := n.RemoveNode(node); err != nil {
return err
}
// We remove NodeInfo for this node only if there aren't any pods on this node.
// We can't do it unconditionally, because notifications about pods are delivered
// in a different watch, and thus can potentially be observed later, even though
// they happened before node removal.
if len(n.pods) == 0 && n.node == nil {
delete(cache.nodes, node.Name)
}
return nil
}
func (cache *schedulerCache) AddPDB(pdb *policy.PodDisruptionBudget) error {
cache.mu.Lock()
defer cache.mu.Unlock()
// Unconditionally update cache.
cache.pdbs[string(pdb.UID)] = pdb
return nil
}
func (cache *schedulerCache) UpdatePDB(oldPDB, newPDB *policy.PodDisruptionBudget) error {
return cache.AddPDB(newPDB)
}
func (cache *schedulerCache) RemovePDB(pdb *policy.PodDisruptionBudget) error {
cache.mu.Lock()
defer cache.mu.Unlock()
delete(cache.pdbs, string(pdb.UID))
return nil
}
func (cache *schedulerCache) ListPDBs(selector labels.Selector) ([]*policy.PodDisruptionBudget, error) {
cache.mu.Lock()
defer cache.mu.Unlock()
var pdbs []*policy.PodDisruptionBudget
for _, pdb := range cache.pdbs {
if selector.Matches(labels.Set(pdb.Labels)) {
pdbs = append(pdbs, pdb)
}
}
return pdbs, nil
}
func (cache *schedulerCache) IsUpToDate(n *NodeInfo) bool {
cache.mu.Lock()
defer cache.mu.Unlock()
node, ok := cache.nodes[n.Node().Name]
return ok && n.generation == node.generation
}
func (cache *schedulerCache) run() {
go wait.Until(cache.cleanupExpiredAssumedPods, cache.period, cache.stop)
}
func (cache *schedulerCache) cleanupExpiredAssumedPods() {
cache.cleanupAssumedPods(time.Now())
}
// cleanupAssumedPods exists for making test deterministic by taking time as input argument.
func (cache *schedulerCache) cleanupAssumedPods(now time.Time) {
cache.mu.Lock()
defer cache.mu.Unlock()
// The size of assumedPods should be small
for key := range cache.assumedPods {
ps, ok := cache.podStates[key]
if !ok {
panic("Key found in assumed set but not in podStates. Potentially a logical error.")
}
if !ps.bindingFinished {
glog.V(3).Infof("Couldn't expire cache for pod %v/%v. Binding is still in progress.",
ps.pod.Namespace, ps.pod.Name)
continue
}
if now.After(*ps.deadline) {
glog.Warningf("Pod %s/%s expired", ps.pod.Namespace, ps.pod.Name)
if err := cache.expirePod(key, ps); err != nil {
glog.Errorf("ExpirePod failed for %s: %v", key, err)
}
}
}
}
func (cache *schedulerCache) expirePod(key string, ps *podState) error {
if err := cache.removePod(ps.pod); err != nil {
return err
}
delete(cache.assumedPods, key)
delete(cache.podStates, key)
return nil
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,135 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import (
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/labels"
)
// PodFilter is a function to filter a pod. If pod passed return true else return false.
type PodFilter func(*v1.Pod) bool
// Cache collects pods' information and provides node-level aggregated information.
// It's intended for generic scheduler to do efficient lookup.
// Cache's operations are pod centric. It does incremental updates based on pod events.
// Pod events are sent via network. We don't have guaranteed delivery of all events:
// We use Reflector to list and watch from remote.
// Reflector might be slow and do a relist, which would lead to missing events.
//
// State Machine of a pod's events in scheduler's cache:
//
//
// +-------------------------------------------+ +----+
// | Add | | |
// | | | | Update
// + Assume Add v v |
//Initial +--------> Assumed +------------+---> Added <--+
// ^ + + | +
// | | | | |
// | | | Add | | Remove
// | | | | |
// | | | + |
// +----------------+ +-----------> Expired +----> Deleted
// Forget Expire
//
//
// Note that an assumed pod can expire, because if we haven't received Add event notifying us
// for a while, there might be some problems and we shouldn't keep the pod in cache anymore.
//
// Note that "Initial", "Expired", and "Deleted" pods do not actually exist in cache.
// Based on existing use cases, we are making the following assumptions:
// - No pod would be assumed twice
// - A pod could be added without going through scheduler. In this case, we will see Add but not Assume event.
// - If a pod wasn't added, it wouldn't be removed or updated.
// - Both "Expired" and "Deleted" are valid end states. In case of some problems, e.g. network issue,
// a pod might have changed its state (e.g. added and deleted) without delivering notification to the cache.
type Cache interface {
// AssumePod assumes a pod scheduled and aggregates the pod's information into its node.
// The implementation also decides the policy to expire pod before being confirmed (receiving Add event).
// After expiration, its information would be subtracted.
AssumePod(pod *v1.Pod) error
// FinishBinding signals that cache for assumed pod can be expired
FinishBinding(pod *v1.Pod) error
// ForgetPod removes an assumed pod from cache.
ForgetPod(pod *v1.Pod) error
// AddPod either confirms a pod if it's assumed, or adds it back if it's expired.
// If added back, the pod's information would be added again.
AddPod(pod *v1.Pod) error
// UpdatePod removes oldPod's information and adds newPod's information.
UpdatePod(oldPod, newPod *v1.Pod) error
// RemovePod removes a pod. The pod's information would be subtracted from assigned node.
RemovePod(pod *v1.Pod) error
// GetPod returns the pod from the cache with the same namespace and the
// same name of the specified pod.
GetPod(pod *v1.Pod) (*v1.Pod, error)
// IsAssumedPod returns true if the pod is assumed and not expired.
IsAssumedPod(pod *v1.Pod) (bool, error)
// AddNode adds overall information about node.
AddNode(node *v1.Node) error
// UpdateNode updates overall information about node.
UpdateNode(oldNode, newNode *v1.Node) error
// RemoveNode removes overall information about node.
RemoveNode(node *v1.Node) error
// AddPDB adds a PodDisruptionBudget object to the cache.
AddPDB(pdb *policy.PodDisruptionBudget) error
// UpdatePDB updates a PodDisruptionBudget object in the cache.
UpdatePDB(oldPDB, newPDB *policy.PodDisruptionBudget) error
// RemovePDB removes a PodDisruptionBudget object from the cache.
RemovePDB(pdb *policy.PodDisruptionBudget) error
// List lists all cached PDBs matching the selector.
ListPDBs(selector labels.Selector) ([]*policy.PodDisruptionBudget, error)
// UpdateNodeNameToInfoMap updates the passed infoMap to the current contents of Cache.
// The node info contains aggregated information of pods scheduled (including assumed to be)
// on this node.
UpdateNodeNameToInfoMap(infoMap map[string]*NodeInfo) error
// List lists all cached pods (including assumed ones).
List(labels.Selector) ([]*v1.Pod, error)
// FilteredList returns all cached pods that pass the filter.
FilteredList(filter PodFilter, selector labels.Selector) ([]*v1.Pod, error)
// Snapshot takes a snapshot on current cache
Snapshot() *Snapshot
// IsUpToDate returns true if the given NodeInfo matches the current data in the cache.
IsUpToDate(n *NodeInfo) bool
}
// Snapshot is a snapshot of cache state
type Snapshot struct {
AssumedPods map[string]bool
Nodes map[string]*NodeInfo
Pdbs map[string]*policy.PodDisruptionBudget
}

View File

@ -0,0 +1,656 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import (
"errors"
"fmt"
"sync"
"sync/atomic"
"github.com/golang/glog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/util"
)
var (
emptyResource = Resource{}
generation int64
)
// NodeInfo is node level aggregated information.
type NodeInfo struct {
// Overall node information.
node *v1.Node
pods []*v1.Pod
podsWithAffinity []*v1.Pod
usedPorts util.HostPortInfo
// Total requested resource of all pods on this node.
// It includes assumed pods which scheduler sends binding to apiserver but
// didn't get it as scheduled yet.
requestedResource *Resource
nonzeroRequest *Resource
// We store allocatedResources (which is Node.Status.Allocatable.*) explicitly
// as int64, to avoid conversions and accessing map.
allocatableResource *Resource
// Cached taints of the node for faster lookup.
taints []v1.Taint
taintsErr error
// This is a map from image name to image size, also for checking image existence on the node
// Cache it here to avoid rebuilding the map during scheduling, e.g., in image_locality.go
imageSizes map[string]int64
// TransientInfo holds the information pertaining to a scheduling cycle. This will be destructed at the end of
// scheduling cycle.
// TODO: @ravig. Remove this once we have a clear approach for message passing across predicates and priorities.
TransientInfo *transientSchedulerInfo
// Cached conditions of node for faster lookup.
memoryPressureCondition v1.ConditionStatus
diskPressureCondition v1.ConditionStatus
pidPressureCondition v1.ConditionStatus
// Whenever NodeInfo changes, generation is bumped.
// This is used to avoid cloning it if the object didn't change.
generation int64
}
//initializeNodeTransientInfo initializes transient information pertaining to node.
func initializeNodeTransientInfo() nodeTransientInfo {
return nodeTransientInfo{AllocatableVolumesCount: 0, RequestedVolumes: 0}
}
// nextGeneration: Let's make sure history never forgets the name...
// Increments the generation number monotonically ensuring that generation numbers never collide.
// Collision of the generation numbers would be particularly problematic if a node was deleted and
// added back with the same name. See issue#63262.
func nextGeneration() int64 {
return atomic.AddInt64(&generation, 1)
}
// nodeTransientInfo contains transient node information while scheduling.
type nodeTransientInfo struct {
// AllocatableVolumesCount contains number of volumes that could be attached to node.
AllocatableVolumesCount int
// Requested number of volumes on a particular node.
RequestedVolumes int
}
// transientSchedulerInfo is a transient structure which is destructed at the end of each scheduling cycle.
// It consists of items that are valid for a scheduling cycle and is used for message passing across predicates and
// priorities. Some examples which could be used as fields are number of volumes being used on node, current utilization
// on node etc.
// IMPORTANT NOTE: Make sure that each field in this structure is documented along with usage. Expand this structure
// only when absolutely needed as this data structure will be created and destroyed during every scheduling cycle.
type transientSchedulerInfo struct {
TransientLock sync.Mutex
// NodeTransInfo holds the information related to nodeTransientInformation. NodeName is the key here.
TransNodeInfo nodeTransientInfo
}
// newTransientSchedulerInfo returns a new scheduler transient structure with initialized values.
func newTransientSchedulerInfo() *transientSchedulerInfo {
tsi := &transientSchedulerInfo{
TransNodeInfo: initializeNodeTransientInfo(),
}
return tsi
}
// resetTransientSchedulerInfo resets the transientSchedulerInfo.
func (transientSchedInfo *transientSchedulerInfo) resetTransientSchedulerInfo() {
transientSchedInfo.TransientLock.Lock()
defer transientSchedInfo.TransientLock.Unlock()
// Reset TransientNodeInfo.
transientSchedInfo.TransNodeInfo.AllocatableVolumesCount = 0
transientSchedInfo.TransNodeInfo.RequestedVolumes = 0
}
// Resource is a collection of compute resource.
type Resource struct {
MilliCPU int64
Memory int64
EphemeralStorage int64
// We store allowedPodNumber (which is Node.Status.Allocatable.Pods().Value())
// explicitly as int, to avoid conversions and improve performance.
AllowedPodNumber int
// ScalarResources
ScalarResources map[v1.ResourceName]int64
}
// NewResource creates a Resource from ResourceList
func NewResource(rl v1.ResourceList) *Resource {
r := &Resource{}
r.Add(rl)
return r
}
// Add adds ResourceList into Resource.
func (r *Resource) Add(rl v1.ResourceList) {
if r == nil {
return
}
for rName, rQuant := range rl {
switch rName {
case v1.ResourceCPU:
r.MilliCPU += rQuant.MilliValue()
case v1.ResourceMemory:
r.Memory += rQuant.Value()
case v1.ResourcePods:
r.AllowedPodNumber += int(rQuant.Value())
case v1.ResourceEphemeralStorage:
r.EphemeralStorage += rQuant.Value()
default:
if v1helper.IsScalarResourceName(rName) {
r.AddScalar(rName, rQuant.Value())
}
}
}
}
// ResourceList returns a resource list of this resource.
func (r *Resource) ResourceList() v1.ResourceList {
result := v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(r.MilliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(r.Memory, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(int64(r.AllowedPodNumber), resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(r.EphemeralStorage, resource.BinarySI),
}
for rName, rQuant := range r.ScalarResources {
if v1helper.IsHugePageResourceName(rName) {
result[rName] = *resource.NewQuantity(rQuant, resource.BinarySI)
} else {
result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI)
}
}
return result
}
// Clone returns a copy of this resource.
func (r *Resource) Clone() *Resource {
res := &Resource{
MilliCPU: r.MilliCPU,
Memory: r.Memory,
AllowedPodNumber: r.AllowedPodNumber,
EphemeralStorage: r.EphemeralStorage,
}
if r.ScalarResources != nil {
res.ScalarResources = make(map[v1.ResourceName]int64)
for k, v := range r.ScalarResources {
res.ScalarResources[k] = v
}
}
return res
}
// AddScalar adds a resource by a scalar value of this resource.
func (r *Resource) AddScalar(name v1.ResourceName, quantity int64) {
r.SetScalar(name, r.ScalarResources[name]+quantity)
}
// SetScalar sets a resource by a scalar value of this resource.
func (r *Resource) SetScalar(name v1.ResourceName, quantity int64) {
// Lazily allocate scalar resource map.
if r.ScalarResources == nil {
r.ScalarResources = map[v1.ResourceName]int64{}
}
r.ScalarResources[name] = quantity
}
// SetMaxResource compares with ResourceList and takes max value for each Resource.
func (r *Resource) SetMaxResource(rl v1.ResourceList) {
if r == nil {
return
}
for rName, rQuantity := range rl {
switch rName {
case v1.ResourceMemory:
if mem := rQuantity.Value(); mem > r.Memory {
r.Memory = mem
}
case v1.ResourceCPU:
if cpu := rQuantity.MilliValue(); cpu > r.MilliCPU {
r.MilliCPU = cpu
}
case v1.ResourceEphemeralStorage:
if ephemeralStorage := rQuantity.Value(); ephemeralStorage > r.EphemeralStorage {
r.EphemeralStorage = ephemeralStorage
}
default:
if v1helper.IsScalarResourceName(rName) {
value := rQuantity.Value()
if value > r.ScalarResources[rName] {
r.SetScalar(rName, value)
}
}
}
}
}
// NewNodeInfo returns a ready to use empty NodeInfo object.
// If any pods are given in arguments, their information will be aggregated in
// the returned object.
func NewNodeInfo(pods ...*v1.Pod) *NodeInfo {
ni := &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
allocatableResource: &Resource{},
TransientInfo: newTransientSchedulerInfo(),
generation: nextGeneration(),
usedPorts: make(util.HostPortInfo),
imageSizes: make(map[string]int64),
}
for _, pod := range pods {
ni.AddPod(pod)
}
return ni
}
// Node returns overall information about this node.
func (n *NodeInfo) Node() *v1.Node {
if n == nil {
return nil
}
return n.node
}
// Pods return all pods scheduled (including assumed to be) on this node.
func (n *NodeInfo) Pods() []*v1.Pod {
if n == nil {
return nil
}
return n.pods
}
// UsedPorts returns used ports on this node.
func (n *NodeInfo) UsedPorts() util.HostPortInfo {
if n == nil {
return nil
}
return n.usedPorts
}
// ImageSizes returns the image size information on this node.
func (n *NodeInfo) ImageSizes() map[string]int64 {
if n == nil {
return nil
}
return n.imageSizes
}
// PodsWithAffinity return all pods with (anti)affinity constraints on this node.
func (n *NodeInfo) PodsWithAffinity() []*v1.Pod {
if n == nil {
return nil
}
return n.podsWithAffinity
}
// AllowedPodNumber returns the number of the allowed pods on this node.
func (n *NodeInfo) AllowedPodNumber() int {
if n == nil || n.allocatableResource == nil {
return 0
}
return n.allocatableResource.AllowedPodNumber
}
// Taints returns the taints list on this node.
func (n *NodeInfo) Taints() ([]v1.Taint, error) {
if n == nil {
return nil, nil
}
return n.taints, n.taintsErr
}
// MemoryPressureCondition returns the memory pressure condition status on this node.
func (n *NodeInfo) MemoryPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.memoryPressureCondition
}
// DiskPressureCondition returns the disk pressure condition status on this node.
func (n *NodeInfo) DiskPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.diskPressureCondition
}
// PIDPressureCondition returns the pid pressure condition status on this node.
func (n *NodeInfo) PIDPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.pidPressureCondition
}
// RequestedResource returns aggregated resource request of pods on this node.
func (n *NodeInfo) RequestedResource() Resource {
if n == nil {
return emptyResource
}
return *n.requestedResource
}
// NonZeroRequest returns aggregated nonzero resource request of pods on this node.
func (n *NodeInfo) NonZeroRequest() Resource {
if n == nil {
return emptyResource
}
return *n.nonzeroRequest
}
// AllocatableResource returns allocatable resources on a given node.
func (n *NodeInfo) AllocatableResource() Resource {
if n == nil {
return emptyResource
}
return *n.allocatableResource
}
// SetAllocatableResource sets the allocatableResource information of given node.
func (n *NodeInfo) SetAllocatableResource(allocatableResource *Resource) {
n.allocatableResource = allocatableResource
n.generation = nextGeneration()
}
// Clone returns a copy of this node.
func (n *NodeInfo) Clone() *NodeInfo {
clone := &NodeInfo{
node: n.node,
requestedResource: n.requestedResource.Clone(),
nonzeroRequest: n.nonzeroRequest.Clone(),
allocatableResource: n.allocatableResource.Clone(),
taintsErr: n.taintsErr,
TransientInfo: n.TransientInfo,
memoryPressureCondition: n.memoryPressureCondition,
diskPressureCondition: n.diskPressureCondition,
pidPressureCondition: n.pidPressureCondition,
usedPorts: make(util.HostPortInfo),
imageSizes: n.imageSizes,
generation: n.generation,
}
if len(n.pods) > 0 {
clone.pods = append([]*v1.Pod(nil), n.pods...)
}
if len(n.usedPorts) > 0 {
for k, v := range n.usedPorts {
clone.usedPorts[k] = v
}
}
if len(n.podsWithAffinity) > 0 {
clone.podsWithAffinity = append([]*v1.Pod(nil), n.podsWithAffinity...)
}
if len(n.taints) > 0 {
clone.taints = append([]v1.Taint(nil), n.taints...)
}
return clone
}
// VolumeLimits returns volume limits associated with the node
func (n *NodeInfo) VolumeLimits() map[v1.ResourceName]int64 {
volumeLimits := map[v1.ResourceName]int64{}
for k, v := range n.AllocatableResource().ScalarResources {
if v1helper.IsAttachableVolumeResourceName(k) {
volumeLimits[k] = v
}
}
return volumeLimits
}
// String returns representation of human readable format of this NodeInfo.
func (n *NodeInfo) String() string {
podKeys := make([]string, len(n.pods))
for i, pod := range n.pods {
podKeys[i] = pod.Name
}
return fmt.Sprintf("&NodeInfo{Pods:%v, RequestedResource:%#v, NonZeroRequest: %#v, UsedPort: %#v, AllocatableResource:%#v}",
podKeys, n.requestedResource, n.nonzeroRequest, n.usedPorts, n.allocatableResource)
}
func hasPodAffinityConstraints(pod *v1.Pod) bool {
affinity := pod.Spec.Affinity
return affinity != nil && (affinity.PodAffinity != nil || affinity.PodAntiAffinity != nil)
}
// AddPod adds pod information to this NodeInfo.
func (n *NodeInfo) AddPod(pod *v1.Pod) {
res, non0CPU, non0Mem := calculateResource(pod)
n.requestedResource.MilliCPU += res.MilliCPU
n.requestedResource.Memory += res.Memory
n.requestedResource.EphemeralStorage += res.EphemeralStorage
if n.requestedResource.ScalarResources == nil && len(res.ScalarResources) > 0 {
n.requestedResource.ScalarResources = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.ScalarResources {
n.requestedResource.ScalarResources[rName] += rQuant
}
n.nonzeroRequest.MilliCPU += non0CPU
n.nonzeroRequest.Memory += non0Mem
n.pods = append(n.pods, pod)
if hasPodAffinityConstraints(pod) {
n.podsWithAffinity = append(n.podsWithAffinity, pod)
}
// Consume ports when pods added.
n.updateUsedPorts(pod, true)
n.generation = nextGeneration()
}
// RemovePod subtracts pod information from this NodeInfo.
func (n *NodeInfo) RemovePod(pod *v1.Pod) error {
k1, err := getPodKey(pod)
if err != nil {
return err
}
for i := range n.podsWithAffinity {
k2, err := getPodKey(n.podsWithAffinity[i])
if err != nil {
glog.Errorf("Cannot get pod key, err: %v", err)
continue
}
if k1 == k2 {
// delete the element
n.podsWithAffinity[i] = n.podsWithAffinity[len(n.podsWithAffinity)-1]
n.podsWithAffinity = n.podsWithAffinity[:len(n.podsWithAffinity)-1]
break
}
}
for i := range n.pods {
k2, err := getPodKey(n.pods[i])
if err != nil {
glog.Errorf("Cannot get pod key, err: %v", err)
continue
}
if k1 == k2 {
// delete the element
n.pods[i] = n.pods[len(n.pods)-1]
n.pods = n.pods[:len(n.pods)-1]
// reduce the resource data
res, non0CPU, non0Mem := calculateResource(pod)
n.requestedResource.MilliCPU -= res.MilliCPU
n.requestedResource.Memory -= res.Memory
n.requestedResource.EphemeralStorage -= res.EphemeralStorage
if len(res.ScalarResources) > 0 && n.requestedResource.ScalarResources == nil {
n.requestedResource.ScalarResources = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.ScalarResources {
n.requestedResource.ScalarResources[rName] -= rQuant
}
n.nonzeroRequest.MilliCPU -= non0CPU
n.nonzeroRequest.Memory -= non0Mem
// Release ports when remove Pods.
n.updateUsedPorts(pod, false)
n.generation = nextGeneration()
return nil
}
}
return fmt.Errorf("no corresponding pod %s in pods of node %s", pod.Name, n.node.Name)
}
func calculateResource(pod *v1.Pod) (res Resource, non0CPU int64, non0Mem int64) {
resPtr := &res
for _, c := range pod.Spec.Containers {
resPtr.Add(c.Resources.Requests)
non0CPUReq, non0MemReq := priorityutil.GetNonzeroRequests(&c.Resources.Requests)
non0CPU += non0CPUReq
non0Mem += non0MemReq
// No non-zero resources for GPUs or opaque resources.
}
return
}
func (n *NodeInfo) updateUsedPorts(pod *v1.Pod, add bool) {
for j := range pod.Spec.Containers {
container := &pod.Spec.Containers[j]
for k := range container.Ports {
podPort := &container.Ports[k]
if add {
n.usedPorts.Add(podPort.HostIP, string(podPort.Protocol), podPort.HostPort)
} else {
n.usedPorts.Remove(podPort.HostIP, string(podPort.Protocol), podPort.HostPort)
}
}
}
}
func (n *NodeInfo) updateImageSizes() {
node := n.Node()
imageSizes := make(map[string]int64)
for _, image := range node.Status.Images {
for _, name := range image.Names {
imageSizes[name] = image.SizeBytes
}
}
n.imageSizes = imageSizes
}
// SetNode sets the overall node information.
func (n *NodeInfo) SetNode(node *v1.Node) error {
n.node = node
n.allocatableResource = NewResource(node.Status.Allocatable)
n.taints = node.Spec.Taints
for i := range node.Status.Conditions {
cond := &node.Status.Conditions[i]
switch cond.Type {
case v1.NodeMemoryPressure:
n.memoryPressureCondition = cond.Status
case v1.NodeDiskPressure:
n.diskPressureCondition = cond.Status
case v1.NodePIDPressure:
n.pidPressureCondition = cond.Status
default:
// We ignore other conditions.
}
}
n.TransientInfo = newTransientSchedulerInfo()
n.updateImageSizes()
n.generation = nextGeneration()
return nil
}
// RemoveNode removes the overall information about the node.
func (n *NodeInfo) RemoveNode(node *v1.Node) error {
// We don't remove NodeInfo for because there can still be some pods on this node -
// this is because notifications about pods are delivered in a different watch,
// and thus can potentially be observed later, even though they happened before
// node removal. This is handled correctly in cache.go file.
n.node = nil
n.allocatableResource = &Resource{}
n.taints, n.taintsErr = nil, nil
n.memoryPressureCondition = v1.ConditionUnknown
n.diskPressureCondition = v1.ConditionUnknown
n.pidPressureCondition = v1.ConditionUnknown
n.generation = nextGeneration()
return nil
}
// FilterOutPods receives a list of pods and filters out those whose node names
// are equal to the node of this NodeInfo, but are not found in the pods of this NodeInfo.
//
// Preemption logic simulates removal of pods on a node by removing them from the
// corresponding NodeInfo. In order for the simulation to work, we call this method
// on the pods returned from SchedulerCache, so that predicate functions see
// only the pods that are not removed from the NodeInfo.
func (n *NodeInfo) FilterOutPods(pods []*v1.Pod) []*v1.Pod {
node := n.Node()
if node == nil {
return pods
}
filtered := make([]*v1.Pod, 0, len(pods))
for _, p := range pods {
if p.Spec.NodeName != node.Name {
filtered = append(filtered, p)
continue
}
// If pod is on the given node, add it to 'filtered' only if it is present in nodeInfo.
podKey, _ := getPodKey(p)
for _, np := range n.Pods() {
npodkey, _ := getPodKey(np)
if npodkey == podKey {
filtered = append(filtered, p)
break
}
}
}
return filtered
}
// getPodKey returns the string key of a pod.
func getPodKey(pod *v1.Pod) (string, error) {
uid := string(pod.UID)
if len(uid) == 0 {
return "", errors.New("Cannot get cache key for pod with empty UID")
}
return uid, nil
}
// Filter implements PodFilter interface. It returns false only if the pod node name
// matches NodeInfo.node and the pod is not found in the pods list. Otherwise,
// returns true.
func (n *NodeInfo) Filter(pod *v1.Pod) bool {
if pod.Spec.NodeName != n.node.Name {
return true
}
for _, p := range n.pods {
if p.Name == pod.Name && p.Namespace == pod.Namespace {
return true
}
}
return false
}

View File

@ -0,0 +1,940 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import (
"fmt"
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/util/parsers"
)
func TestNewResource(t *testing.T) {
tests := []struct {
resourceList v1.ResourceList
expected *Resource
}{
{
resourceList: map[v1.ResourceName]resource.Quantity{},
expected: &Resource{},
},
{
resourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(4, -3),
v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(80, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
"scalar.test/" + "scalar1": *resource.NewQuantity(1, resource.DecimalSI),
v1.ResourceHugePagesPrefix + "test": *resource.NewQuantity(2, resource.BinarySI),
},
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
},
},
}
for _, test := range tests {
r := NewResource(test.resourceList)
if !reflect.DeepEqual(test.expected, r) {
t.Errorf("expected: %#v, got: %#v", test.expected, r)
}
}
}
func TestResourceList(t *testing.T) {
tests := []struct {
resource *Resource
expected v1.ResourceList
}{
{
resource: &Resource{},
expected: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(0, -3),
v1.ResourceMemory: *resource.NewQuantity(0, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(0, resource.BinarySI),
},
},
{
resource: &Resource{
MilliCPU: 4,
Memory: 2000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{
"scalar.test/scalar1": 1,
"hugepages-test": 2,
"attachable-volumes-aws-ebs": 39,
},
},
expected: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(4, -3),
v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(80, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
"scalar.test/" + "scalar1": *resource.NewQuantity(1, resource.DecimalSI),
"attachable-volumes-aws-ebs": *resource.NewQuantity(39, resource.DecimalSI),
v1.ResourceHugePagesPrefix + "test": *resource.NewQuantity(2, resource.BinarySI),
},
},
}
for _, test := range tests {
rl := test.resource.ResourceList()
if !reflect.DeepEqual(test.expected, rl) {
t.Errorf("expected: %#v, got: %#v", test.expected, rl)
}
}
}
func TestResourceClone(t *testing.T) {
tests := []struct {
resource *Resource
expected *Resource
}{
{
resource: &Resource{},
expected: &Resource{},
},
{
resource: &Resource{
MilliCPU: 4,
Memory: 2000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
},
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
},
},
}
for _, test := range tests {
r := test.resource.Clone()
// Modify the field to check if the result is a clone of the origin one.
test.resource.MilliCPU += 1000
if !reflect.DeepEqual(test.expected, r) {
t.Errorf("expected: %#v, got: %#v", test.expected, r)
}
}
}
func TestResourceAddScalar(t *testing.T) {
tests := []struct {
resource *Resource
scalarName v1.ResourceName
scalarQuantity int64
expected *Resource
}{
{
resource: &Resource{},
scalarName: "scalar1",
scalarQuantity: 100,
expected: &Resource{
ScalarResources: map[v1.ResourceName]int64{"scalar1": 100},
},
},
{
resource: &Resource{
MilliCPU: 4,
Memory: 2000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"hugepages-test": 2},
},
scalarName: "scalar2",
scalarQuantity: 200,
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"hugepages-test": 2, "scalar2": 200},
},
},
}
for _, test := range tests {
test.resource.AddScalar(test.scalarName, test.scalarQuantity)
if !reflect.DeepEqual(test.expected, test.resource) {
t.Errorf("expected: %#v, got: %#v", test.expected, test.resource)
}
}
}
func TestSetMaxResource(t *testing.T) {
tests := []struct {
resource *Resource
resourceList v1.ResourceList
expected *Resource
}{
{
resource: &Resource{},
resourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(4, -3),
v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
},
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
EphemeralStorage: 5000,
},
},
{
resource: &Resource{
MilliCPU: 4,
Memory: 4000,
EphemeralStorage: 5000,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
},
resourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(4, -3),
v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(7000, resource.BinarySI),
"scalar.test/scalar1": *resource.NewQuantity(4, resource.DecimalSI),
v1.ResourceHugePagesPrefix + "test": *resource.NewQuantity(5, resource.BinarySI),
},
expected: &Resource{
MilliCPU: 4,
Memory: 4000,
EphemeralStorage: 7000,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 4, "hugepages-test": 5},
},
},
}
for _, test := range tests {
test.resource.SetMaxResource(test.resourceList)
if !reflect.DeepEqual(test.expected, test.resource) {
t.Errorf("expected: %#v, got: %#v", test.expected, test.resource)
}
}
}
func TestImageSizes(t *testing.T) {
ni := fakeNodeInfo()
ni.node = &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
Status: v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/10:" + parsers.DefaultImageTag,
"gcr.io/10:v1",
},
SizeBytes: int64(10 * 1024 * 1024),
},
{
Names: []string{
"gcr.io/50:" + parsers.DefaultImageTag,
"gcr.io/50:v1",
},
SizeBytes: int64(50 * 1024 * 1024),
},
},
},
}
ni.updateImageSizes()
expected := map[string]int64{
"gcr.io/10:" + parsers.DefaultImageTag: 10 * 1024 * 1024,
"gcr.io/10:v1": 10 * 1024 * 1024,
"gcr.io/50:" + parsers.DefaultImageTag: 50 * 1024 * 1024,
"gcr.io/50:v1": 50 * 1024 * 1024,
}
imageSizes := ni.ImageSizes()
if !reflect.DeepEqual(expected, imageSizes) {
t.Errorf("expected: %#v, got: %#v", expected, imageSizes)
}
}
func TestNewNodeInfo(t *testing.T) {
nodeName := "test-node"
pods := []*v1.Pod{
makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}),
makeBasePod(t, nodeName, "test-2", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}),
}
expected := &NodeInfo{
requestedResource: &Resource{
MilliCPU: 300,
Memory: 1524,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
nonzeroRequest: &Resource{
MilliCPU: 300,
Memory: 1524,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 80}: {},
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
}
gen := generation
ni := NewNodeInfo(pods...)
if ni.generation <= gen {
t.Errorf("generation is not incremented. previous: %v, current: %v", gen, ni.generation)
}
expected.generation = ni.generation
if !reflect.DeepEqual(expected, ni) {
t.Errorf("expected: %#v, got: %#v", expected, ni)
}
}
func TestNodeInfoClone(t *testing.T) {
nodeName := "test-node"
tests := []struct {
nodeInfo *NodeInfo
expected *NodeInfo
}{
{
nodeInfo: &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 80}: {},
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{
"gcr.io/10": 10 * 1024 * 1024,
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
},
expected: &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 80}: {},
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{
"gcr.io/10": 10 * 1024 * 1024,
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
},
},
}
for _, test := range tests {
ni := test.nodeInfo.Clone()
// Modify the field to check if the result is a clone of the origin one.
test.nodeInfo.generation += 10
if !reflect.DeepEqual(test.expected, ni) {
t.Errorf("expected: %#v, got: %#v", test.expected, ni)
}
}
}
func TestNodeInfoAddPod(t *testing.T) {
nodeName := "test-node"
pods := []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
}
expected := &NodeInfo{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
},
requestedResource: &Resource{
MilliCPU: 300,
Memory: 1524,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
nonzeroRequest: &Resource{
MilliCPU: 300,
Memory: 1524,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 80}: {},
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
}
ni := fakeNodeInfo()
gen := ni.generation
for _, pod := range pods {
ni.AddPod(pod)
if ni.generation <= gen {
t.Errorf("generation is not incremented. Prev: %v, current: %v", gen, ni.generation)
}
gen = ni.generation
}
expected.generation = ni.generation
if !reflect.DeepEqual(expected, ni) {
t.Errorf("expected: %#v, got: %#v", expected, ni)
}
}
func TestNodeInfoRemovePod(t *testing.T) {
nodeName := "test-node"
pods := []*v1.Pod{
makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}),
makeBasePod(t, nodeName, "test-2", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}),
}
tests := []struct {
pod *v1.Pod
errExpected bool
expectedNodeInfo *NodeInfo
}{
{
pod: makeBasePod(t, nodeName, "non-exist", "0", "0", "", []v1.ContainerPort{{}}),
errExpected: true,
expectedNodeInfo: &NodeInfo{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
},
requestedResource: &Resource{
MilliCPU: 300,
Memory: 1524,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
nonzeroRequest: &Resource{
MilliCPU: 300,
Memory: 1524,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 80}: {},
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
},
},
{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("500"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 80,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
errExpected: false,
expectedNodeInfo: &NodeInfo{
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
},
requestedResource: &Resource{
MilliCPU: 200,
Memory: 1024,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
nonzeroRequest: &Resource{
MilliCPU: 200,
Memory: 1024,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 3,
usedPorts: util.HostPortInfo{
"127.0.0.1": map[util.ProtocolPort]struct{}{
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("200m"),
v1.ResourceMemory: resource.MustParse("1Ki"),
},
},
Ports: []v1.ContainerPort{
{
HostIP: "127.0.0.1",
HostPort: 8080,
Protocol: "TCP",
},
},
},
},
NodeName: nodeName,
},
},
},
},
},
}
for _, test := range tests {
ni := fakeNodeInfo(pods...)
gen := ni.generation
err := ni.RemovePod(test.pod)
if err != nil {
if test.errExpected {
expectedErrorMsg := fmt.Errorf("no corresponding pod %s in pods of node %s", test.pod.Name, ni.node.Name)
if expectedErrorMsg == err {
t.Errorf("expected error: %v, got: %v", expectedErrorMsg, err)
}
} else {
t.Errorf("expected no error, got: %v", err)
}
} else {
if ni.generation <= gen {
t.Errorf("generation is not incremented. Prev: %v, current: %v", gen, ni.generation)
}
}
test.expectedNodeInfo.generation = ni.generation
if !reflect.DeepEqual(test.expectedNodeInfo, ni) {
t.Errorf("expected: %#v, got: %#v", test.expectedNodeInfo, ni)
}
}
}
func fakeNodeInfo(pods ...*v1.Pod) *NodeInfo {
ni := NewNodeInfo(pods...)
ni.node = &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
}
return ni
}

39
vendor/k8s.io/kubernetes/pkg/scheduler/cache/util.go generated vendored Normal file
View File

@ -0,0 +1,39 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import "k8s.io/api/core/v1"
// CreateNodeNameToInfoMap obtains a list of pods and pivots that list into a map where the keys are node names
// and the values are the aggregated information for that node.
func CreateNodeNameToInfoMap(pods []*v1.Pod, nodes []*v1.Node) map[string]*NodeInfo {
nodeNameToInfo := make(map[string]*NodeInfo)
for _, pod := range pods {
nodeName := pod.Spec.NodeName
if _, ok := nodeNameToInfo[nodeName]; !ok {
nodeNameToInfo[nodeName] = NewNodeInfo()
}
nodeNameToInfo[nodeName].AddPod(pod)
}
for _, node := range nodes {
if _, ok := nodeNameToInfo[node.Name]; !ok {
nodeNameToInfo[node.Name] = NewNodeInfo()
}
nodeNameToInfo[node.Name].SetNode(node)
}
return nodeNameToInfo
}