vendor update for CSI 0.3.0

This commit is contained in:
gman
2018-07-18 16:47:22 +02:00
parent 6f484f92fc
commit 8ea659f0d5
6810 changed files with 438061 additions and 193861 deletions

View File

@ -15,15 +15,15 @@ go_test(
"//pkg/controller/volume/persistentvolume:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/diff:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
@ -44,9 +44,9 @@ go_library(
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
@ -75,10 +75,10 @@ filegroup(
"//pkg/scheduler/algorithm:all-srcs",
"//pkg/scheduler/algorithmprovider:all-srcs",
"//pkg/scheduler/api:all-srcs",
"//pkg/scheduler/cache:all-srcs",
"//pkg/scheduler/core:all-srcs",
"//pkg/scheduler/factory:all-srcs",
"//pkg/scheduler/metrics:all-srcs",
"//pkg/scheduler/schedulercache:all-srcs",
"//pkg/scheduler/testing:all-srcs",
"//pkg/scheduler/util:all-srcs",
"//pkg/scheduler/volumebinder:all-srcs",

View File

@ -2,3 +2,5 @@ approvers:
- sig-scheduling-maintainers
reviewers:
- sig-scheduling
labels:
- sig/scheduling

View File

@ -16,8 +16,9 @@ go_library(
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm",
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
@ -33,7 +34,7 @@ go_test(
],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
],

View File

@ -23,7 +23,7 @@ go_library(
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//pkg/volume/util:go_default_library",
@ -32,6 +32,7 @@ go_library(
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
@ -45,6 +46,7 @@ go_library(
go_test(
name = "go_default_test",
srcs = [
"max_attachable_volume_predicate_test.go",
"metadata_test.go",
"predicates_test.go",
"utils_test.go",
@ -52,10 +54,12 @@ go_test(
embed = [":go_default_library"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/volume/util:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
@ -63,6 +67,7 @@ go_test(
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature/testing:go_default_library",
],
)

View File

@ -61,6 +61,8 @@ var (
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure", "node(s) had memory pressure")
// ErrNodeUnderDiskPressure is used for NodeUnderDiskPressure predicate error.
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure", "node(s) had disk pressure")
// ErrNodeUnderPIDPressure is used for NodeUnderPIDPressure predicate error.
ErrNodeUnderPIDPressure = newPredicateFailureError("NodeUnderPIDPressure", "node(s) had pid pressure")
// ErrNodeOutOfDisk is used for NodeOutOfDisk predicate error.
ErrNodeOutOfDisk = newPredicateFailureError("NodeOutOfDisk", "node(s) were out of disk space")
// ErrNodeNotReady is used for NodeNotReady predicate error.

View File

@ -0,0 +1,854 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"os"
"reflect"
"strconv"
"strings"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
func onePVCPod(filterName string) *v1.Pod {
return &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "some" + filterName + "Vol",
},
},
},
},
},
}
}
func splitPVCPod(filterName string) *v1.Pod {
return &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someNon" + filterName + "Vol",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "some" + filterName + "Vol",
},
},
},
},
},
}
}
func TestVolumeCountConflicts(t *testing.T) {
oneVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp"},
},
},
},
},
}
twoVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "tvp1"},
},
},
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "tvp2"},
},
},
},
},
}
splitVolsPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{},
},
},
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "svp"},
},
},
},
},
}
nonApplicablePod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{},
},
},
},
},
}
deletedPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "deletedPVC",
},
},
},
},
},
}
twoDeletedPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "deletedPVC",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherDeletedPVC",
},
},
},
},
},
}
deletedPVPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "pvcWithDeletedPV",
},
},
},
},
},
}
// deletedPVPod2 is a different pod than deletedPVPod but using the same PVC
deletedPVPod2 := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "pvcWithDeletedPV",
},
},
},
},
},
}
// anotherDeletedPVPod is a different pod than deletedPVPod and uses another PVC
anotherDeletedPVPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherPVCWithDeletedPV",
},
},
},
},
},
}
emptyPod := &v1.Pod{
Spec: v1.PodSpec{},
}
unboundPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "unboundPVC",
},
},
},
},
},
}
// Different pod than unboundPVCPod, but using the same unbound PVC
unboundPVCPod2 := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "unboundPVC",
},
},
},
},
},
}
// pod with unbound PVC that's different to unboundPVC
anotherUnboundPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherUnboundPVC",
},
},
},
},
},
}
tests := []struct {
newPod *v1.Pod
existingPods []*v1.Pod
filterName string
maxVols int
fits bool
test string
}{
// filterName:EBSVolumeFilterType
{
newPod: oneVolPod,
existingPods: []*v1.Pod{twoVolPod, oneVolPod},
filterName: EBSVolumeFilterType,
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pod's EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "doesn't fit when node capacity < new pod's EBS volumes",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{twoVolPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores non-EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "existing pods' counts ignore non-EBS volumes",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count considers PVCs backed by EBS volumes",
},
{
newPod: splitPVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, oneVolPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores PVCs not backed by EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, onePVCPod(EBSVolumeFilterType)},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: false,
test: "existing pods' counts considers PVCs backed by EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, twoVolPod, onePVCPod(EBSVolumeFilterType)},
filterName: EBSVolumeFilterType,
maxVols: 4,
fits: true,
test: "already-mounted EBS volumes are always ok to allow",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{oneVolPod, oneVolPod, onePVCPod(EBSVolumeFilterType)},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "the same EBS volumes are not counted multiple times",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, twoDeletedPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: false,
test: "pod with missing two PVCs is counted towards the PV limit twice",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: deletedPVPod2,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: true,
test: "two pods missing the same PV are counted towards the PV limit only once",
},
{
newPod: anotherDeletedPVPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "two pods missing different PVs are counted towards the PV limit twice",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: unboundPVCPod2,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: true,
test: "the same unbound PVC in multiple pods is counted towards the PV limit only once",
},
{
newPod: anotherUnboundPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "two different unbound PVCs are counted towards the PV limit as two volumes",
},
// filterName:GCEPDVolumeFilterType
{
newPod: oneVolPod,
existingPods: []*v1.Pod{twoVolPod, oneVolPod},
filterName: GCEPDVolumeFilterType,
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pod's GCE volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "fit when node capacity < new pod's GCE volumes",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{twoVolPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores non-GCE volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "existing pods' counts ignore non-GCE volumes",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count considers PVCs backed by GCE volumes",
},
{
newPod: splitPVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, oneVolPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores PVCs not backed by GCE volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, onePVCPod(GCEPDVolumeFilterType)},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "existing pods' counts considers PVCs backed by GCE volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, twoVolPod, onePVCPod(GCEPDVolumeFilterType)},
filterName: GCEPDVolumeFilterType,
maxVols: 4,
fits: true,
test: "already-mounted EBS volumes are always ok to allow",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{oneVolPod, oneVolPod, onePVCPod(GCEPDVolumeFilterType)},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "the same GCE volumes are not counted multiple times",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, twoDeletedPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing two PVCs is counted towards the PV limit twice",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: deletedPVPod2,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "two pods missing the same PV are counted towards the PV limit only once",
},
{
newPod: anotherDeletedPVPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "two pods missing different PVs are counted towards the PV limit twice",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: unboundPVCPod2,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "the same unbound PVC in multiple pods is counted towards the PV limit only once",
},
{
newPod: anotherUnboundPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "two different unbound PVCs are counted towards the PV limit as two volumes",
},
// filterName:AzureDiskVolumeFilterType
{
newPod: oneVolPod,
existingPods: []*v1.Pod{twoVolPod, oneVolPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pod's AzureDisk volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "fit when node capacity < new pod's AzureDisk volumes",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{twoVolPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores non-AzureDisk volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "existing pods' counts ignore non-AzureDisk volumes",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count considers PVCs backed by AzureDisk volumes",
},
{
newPod: splitPVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, oneVolPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores PVCs not backed by AzureDisk volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, onePVCPod(AzureDiskVolumeFilterType)},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "existing pods' counts considers PVCs backed by AzureDisk volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, twoVolPod, onePVCPod(AzureDiskVolumeFilterType)},
filterName: AzureDiskVolumeFilterType,
maxVols: 4,
fits: true,
test: "already-mounted AzureDisk volumes are always ok to allow",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{oneVolPod, oneVolPod, onePVCPod(AzureDiskVolumeFilterType)},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "the same AzureDisk volumes are not counted multiple times",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, twoDeletedPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing two PVCs is counted towards the PV limit twice",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: deletedPVPod2,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "two pods missing the same PV are counted towards the PV limit only once",
},
{
newPod: anotherDeletedPVPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "two pods missing different PVs are counted towards the PV limit twice",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: unboundPVCPod2,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "the same unbound PVC in multiple pods is counted towards the PV limit only once",
},
{
newPod: anotherUnboundPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "two different unbound PVCs are counted towards the PV limit as two volumes",
},
}
pvInfo := func(filterName string) FakePersistentVolumeInfo {
return FakePersistentVolumeInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: strings.ToLower(filterName) + "Vol"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{},
},
},
}
}
pvcInfo := func(filterName string) FakePersistentVolumeClaimInfo {
return FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "some" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someNon" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvcWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherPVCWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "unboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherUnboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
}
}
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}
// running attachable predicate tests without feature gate and no limit present on nodes
for _, test := range tests {
os.Setenv(KubeMaxPDVols, strconv.Itoa(test.maxVols))
pred := NewMaxPDVolumeCountPredicate(test.filterName, pvInfo(test.filterName), pvcInfo(test.filterName))
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), schedulercache.NewNodeInfo(test.existingPods...))
if err != nil {
t.Errorf("[%s]%s: unexpected error: %v", test.filterName, test.test, err)
}
if !fits && !reflect.DeepEqual(reasons, expectedFailureReasons) {
t.Errorf("[%s]%s: unexpected failure reasons: %v, want: %v", test.filterName, test.test, reasons, expectedFailureReasons)
}
if fits != test.fits {
t.Errorf("[%s]%s: expected %v, got %v", test.filterName, test.test, test.fits, fits)
}
}
defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.AttachVolumeLimit, true)()
// running attachable predicate tests with feature gate and limit present on nodes
for _, test := range tests {
node := getNodeWithPodAndVolumeLimits(test.existingPods, int64(test.maxVols), test.filterName)
pred := NewMaxPDVolumeCountPredicate(test.filterName, pvInfo(test.filterName), pvcInfo(test.filterName))
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), node)
if err != nil {
t.Errorf("Using allocatable [%s]%s: unexpected error: %v", test.filterName, test.test, err)
}
if !fits && !reflect.DeepEqual(reasons, expectedFailureReasons) {
t.Errorf("Using allocatable [%s]%s: unexpected failure reasons: %v, want: %v", test.filterName, test.test, reasons, expectedFailureReasons)
}
if fits != test.fits {
t.Errorf("Using allocatable [%s]%s: expected %v, got %v", test.filterName, test.test, test.fits, fits)
}
}
}
func getNodeWithPodAndVolumeLimits(pods []*v1.Pod, limit int64, filter string) *schedulercache.NodeInfo {
nodeInfo := schedulercache.NewNodeInfo(pods...)
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node-for-max-pd-test-1"},
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{
getVolumeLimitKey(filter): *resource.NewQuantity(limit, resource.DecimalSI),
},
},
}
nodeInfo.SetNode(node)
return nodeInfo
}
func getVolumeLimitKey(filterType string) v1.ResourceName {
switch filterType {
case EBSVolumeFilterType:
return v1.ResourceName(volumeutil.EBSVolumeLimitKey)
case GCEPDVolumeFilterType:
return v1.ResourceName(volumeutil.GCEVolumeLimitKey)
case AzureDiskVolumeFilterType:
return v1.ResourceName(volumeutil.AzureVolumeLimitKey)
default:
return ""
}
}

View File

@ -20,14 +20,17 @@ import (
"fmt"
"sync"
"github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
"github.com/golang/glog"
)
// PredicateMetadataFactory defines a factory of predicate metadata.
@ -50,7 +53,13 @@ type predicateMetadata struct {
podRequest *schedulercache.Resource
podPorts []*v1.ContainerPort
//key is a pod full name with the anti-affinity rules.
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
// A map of node name to a list of Pods on the node that can potentially match
// the affinity rules of the "pod".
nodeNameToMatchingAffinityPods map[string][]*v1.Pod
// A map of node name to a list of Pods on the node that can potentially match
// the anti-affinity rules of the "pod".
nodeNameToMatchingAntiAffinityPods map[string][]*v1.Pod
serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service
@ -108,12 +117,19 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
if err != nil {
return nil
}
affinityPods, antiAffinityPods, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap)
if err != nil {
glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
matchingAntiAffinityTerms: matchingTerms,
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
matchingAntiAffinityTerms: matchingTerms,
nodeNameToMatchingAffinityPods: affinityPods,
nodeNameToMatchingAntiAffinityPods: antiAffinityPods,
}
for predicateName, precomputeFunc := range predicateMetadataProducers {
glog.V(10).Infof("Precompute: %v", predicateName)
@ -131,6 +147,33 @@ func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
}
// Delete any anti-affinity rule from the deletedPod.
delete(meta.matchingAntiAffinityTerms, deletedPodFullName)
// Delete pod from the matching affinity or anti-affinity pods if exists.
affinity := meta.pod.Spec.Affinity
podNodeName := deletedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
if affinity.PodAffinity != nil {
for i, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] {
if p == deletedPod {
s := meta.nodeNameToMatchingAffinityPods[podNodeName]
s[i] = s[len(s)-1]
s = s[:len(s)-1]
meta.nodeNameToMatchingAffinityPods[podNodeName] = s
break
}
}
}
if affinity.PodAntiAffinity != nil {
for i, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] {
if p == deletedPod {
s := meta.nodeNameToMatchingAntiAffinityPods[podNodeName]
s[i] = s[len(s)-1]
s = s[:len(s)-1]
meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = s
break
}
}
}
}
// All pods in the serviceAffinityMatchingPodList are in the same namespace.
// So, if the namespace of the first one is not the same as the namespace of the
// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
@ -173,6 +216,35 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache
meta.matchingAntiAffinityTerms[addedPodFullName] = podMatchingTerms
}
}
// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
affinity := meta.pod.Spec.Affinity
podNodeName := addedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
if targetPodMatchesAffinityOfPod(meta.pod, addedPod) {
found := false
for _, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] {
if p == addedPod {
found = true
break
}
}
if !found {
meta.nodeNameToMatchingAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAffinityPods[podNodeName], addedPod)
}
}
if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) {
found := false
for _, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] {
if p == addedPod {
found = true
break
}
}
if !found {
meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAntiAffinityPods[podNodeName], addedPod)
}
}
}
// If addedPod is in the same namespace as the meta.pod, update the list
// of matching pods if applicable.
if meta.serviceAffinityInUse && addedPod.Namespace == meta.pod.Namespace {
@ -200,9 +272,162 @@ func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata {
for k, v := range meta.matchingAntiAffinityTerms {
newPredMeta.matchingAntiAffinityTerms[k] = append([]matchingPodAntiAffinityTerm(nil), v...)
}
newPredMeta.nodeNameToMatchingAffinityPods = make(map[string][]*v1.Pod)
for k, v := range meta.nodeNameToMatchingAffinityPods {
newPredMeta.nodeNameToMatchingAffinityPods[k] = append([]*v1.Pod(nil), v...)
}
newPredMeta.nodeNameToMatchingAntiAffinityPods = make(map[string][]*v1.Pod)
for k, v := range meta.nodeNameToMatchingAntiAffinityPods {
newPredMeta.nodeNameToMatchingAntiAffinityPods[k] = append([]*v1.Pod(nil), v...)
}
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
meta.serviceAffinityMatchingPodServices...)
newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
meta.serviceAffinityMatchingPodList...)
return (algorithm.PredicateMetadata)(newPredMeta)
}
type affinityTermProperties struct {
namespaces sets.String
selector labels.Selector
}
// getAffinityTermProperties receives a Pod and affinity terms and returns the namespaces and
// selectors of the terms.
func getAffinityTermProperties(pod *v1.Pod, terms []v1.PodAffinityTerm) (properties []*affinityTermProperties, err error) {
if terms == nil {
return properties, nil
}
for _, term := range terms {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
return nil, err
}
properties = append(properties, &affinityTermProperties{namespaces: namespaces, selector: selector})
}
return properties, nil
}
// podMatchesAffinityTermProperties return true IFF the given pod matches all the given properties.
func podMatchesAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
if len(properties) == 0 {
return false
}
for _, property := range properties {
if !priorityutil.PodMatchesTermsNamespaceAndSelector(pod, property.namespaces, property.selector) {
return false
}
}
return true
}
// getPodsMatchingAffinity finds existing Pods that match affinity terms of the given "pod".
// It ignores topology. It returns a set of Pods that are checked later by the affinity
// predicate. With this set of pods available, the affinity predicate does not
// need to check all the pods in the cluster.
func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (affinityPods map[string][]*v1.Pod, antiAffinityPods map[string][]*v1.Pod, err error) {
allNodeNames := make([]string, 0, len(nodeInfoMap))
affinity := pod.Spec.Affinity
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return nil, nil, nil
}
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
}
var lock sync.Mutex
var firstError error
affinityPods = make(map[string][]*v1.Pod)
antiAffinityPods = make(map[string][]*v1.Pod)
appendResult := func(nodeName string, affPods, antiAffPods []*v1.Pod) {
lock.Lock()
defer lock.Unlock()
if len(affPods) > 0 {
affinityPods[nodeName] = affPods
}
if len(antiAffPods) > 0 {
antiAffinityPods[nodeName] = antiAffPods
}
}
catchError := func(err error) {
lock.Lock()
defer lock.Unlock()
if firstError == nil {
firstError = err
}
}
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
if err != nil {
return nil, nil, err
}
antiAffinityProperties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
if err != nil {
return nil, nil, err
}
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
node := nodeInfo.Node()
if node == nil {
catchError(fmt.Errorf("nodeInfo.Node is nil"))
return
}
affPods := make([]*v1.Pod, 0, len(nodeInfo.Pods()))
antiAffPods := make([]*v1.Pod, 0, len(nodeInfo.Pods()))
for _, existingPod := range nodeInfo.Pods() {
// Check affinity properties.
if podMatchesAffinityTermProperties(existingPod, affinityProperties) {
affPods = append(affPods, existingPod)
}
// Check anti-affinity properties.
if podMatchesAffinityTermProperties(existingPod, antiAffinityProperties) {
antiAffPods = append(antiAffPods, existingPod)
}
}
if len(antiAffPods) > 0 || len(affPods) > 0 {
appendResult(node.Name, affPods, antiAffPods)
}
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
return affinityPods, antiAffinityPods, firstError
}
// podMatchesAffinity returns true if "targetPod" matches any affinity rule of
// "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAffinityOfPod(pod, targetPod *v1.Pod) bool {
affinity := pod.Spec.Affinity
if affinity == nil || affinity.PodAffinity == nil {
return false
}
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
if err != nil {
glog.Errorf("error in getting affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAffinityTermProperties(targetPod, affinityProperties)
}
// targetPodMatchesAntiAffinityOfPod returns true if "targetPod" matches any anti-affinity
// rule of "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAntiAffinityOfPod(pod, targetPod *v1.Pod) bool {
affinity := pod.Spec.Affinity
if affinity == nil || affinity.PodAntiAffinity == nil {
return false
}
properties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
if err != nil {
glog.Errorf("error in getting anti-affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAffinityTermProperties(targetPod, properties)
}

View File

@ -24,7 +24,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
@ -88,6 +88,13 @@ func (s sortableServices) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
var _ = sort.Interface(&sortableServices{})
func sortNodePodMap(np map[string][]*v1.Pod) {
for _, pl := range np {
sortablePods := sortablePods(pl)
sort.Sort(sortablePods)
}
}
// predicateMetadataEquivalent returns true if the two metadata are equivalent.
// Note: this function does not compare podRequest.
func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
@ -111,6 +118,16 @@ func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
if !reflect.DeepEqual(meta1.matchingAntiAffinityTerms, meta2.matchingAntiAffinityTerms) {
return fmt.Errorf("matchingAntiAffinityTerms are not euqal")
}
sortNodePodMap(meta1.nodeNameToMatchingAffinityPods)
sortNodePodMap(meta2.nodeNameToMatchingAffinityPods)
if !reflect.DeepEqual(meta1.nodeNameToMatchingAffinityPods, meta2.nodeNameToMatchingAffinityPods) {
return fmt.Errorf("nodeNameToMatchingAffinityPods are not euqal")
}
sortNodePodMap(meta1.nodeNameToMatchingAntiAffinityPods)
sortNodePodMap(meta2.nodeNameToMatchingAntiAffinityPods)
if !reflect.DeepEqual(meta1.nodeNameToMatchingAntiAffinityPods, meta2.nodeNameToMatchingAntiAffinityPods) {
return fmt.Errorf("nodeNameToMatchingAntiAffinityPods are not euqal")
}
if meta1.serviceAffinityInUse {
sortablePods1 := sortablePods(meta1.serviceAffinityMatchingPodList)
sort.Sort(sortablePods1)
@ -189,6 +206,34 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
},
},
}
affinityComplex := &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "buzz"},
},
},
},
TopologyKey: "region",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"bar", "security", "test"},
},
},
},
TopologyKey: "zone",
},
},
}
tests := []struct {
description string
@ -312,6 +357,41 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
description: "metadata matching pod affinity and anti-affinity are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
PodAffinity: affinityComplex,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeA",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityComplex,
},
},
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
}
for _, test := range tests {
@ -360,6 +440,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
// on the idea that shallow-copy should produce an object that is deep-equal to the original
// object.
func TestPredicateMetadata_ShallowCopy(t *testing.T) {
selector1 := map[string]string{"foo": "bar"}
source := predicateMetadata{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
@ -392,6 +473,45 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) {
},
},
},
nodeNameToMatchingAffinityPods: map[string][]*v1.Pod{
"nodeA": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
},
"nodeC": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeC"},
},
},
},
nodeNameToMatchingAntiAffinityPods: map[string][]*v1.Pod{
"nodeN": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeN"},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
},
},
"nodeM": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeM"},
},
},
},
serviceAffinityInUse: true,
serviceAffinityMatchingPodList: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}},

View File

@ -23,10 +23,13 @@ import (
"strconv"
"sync"
"github.com/golang/glog"
"k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/rand"
"k8s.io/apimachinery/pkg/util/sets"
@ -40,12 +43,10 @@ import (
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
"github.com/golang/glog"
)
const (
@ -69,6 +70,8 @@ const (
NoDiskConflictPred = "NoDiskConflict"
// PodToleratesNodeTaintsPred defines the name of predicate PodToleratesNodeTaints.
PodToleratesNodeTaintsPred = "PodToleratesNodeTaints"
// CheckNodeUnschedulablePred defines the name of predicate CheckNodeUnschedulablePredicate.
CheckNodeUnschedulablePred = "CheckNodeUnschedulable"
// PodToleratesNodeNoExecuteTaintsPred defines the name of predicate PodToleratesNodeNoExecuteTaints.
PodToleratesNodeNoExecuteTaintsPred = "PodToleratesNodeNoExecuteTaints"
// CheckNodeLabelPresencePred defines the name of predicate CheckNodeLabelPresence.
@ -87,6 +90,8 @@ const (
CheckNodeMemoryPressurePred = "CheckNodeMemoryPressure"
// CheckNodeDiskPressurePred defines the name of predicate CheckNodeDiskPressure.
CheckNodeDiskPressurePred = "CheckNodeDiskPressure"
// CheckNodePIDPressurePred defines the name of predicate CheckNodePIDPressure.
CheckNodePIDPressurePred = "CheckNodePIDPressure"
// DefaultMaxEBSVolumes is the limit for volumes attached to an instance.
// Amazon recommends no more than 40; the system root volume uses at least one.
@ -125,13 +130,13 @@ const (
// The order is based on the restrictiveness & complexity of predicates.
// Design doc: https://github.com/kubernetes/community/blob/master/contributors/design-proposals/scheduling/predicates-ordering.md
var (
predicatesOrdering = []string{CheckNodeConditionPred,
predicatesOrdering = []string{CheckNodeConditionPred, CheckNodeUnschedulablePred,
GeneralPred, HostNamePred, PodFitsHostPortsPred,
MatchNodeSelectorPred, PodFitsResourcesPred, NoDiskConflictPred,
PodToleratesNodeTaintsPred, PodToleratesNodeNoExecuteTaintsPred, CheckNodeLabelPresencePred,
CheckServiceAffinityPred, MaxEBSVolumeCountPred, MaxGCEPDVolumeCountPred,
MaxAzureDiskVolumeCountPred, CheckVolumeBindingPred, NoVolumeZoneConflictPred,
CheckNodeMemoryPressurePred, CheckNodeDiskPressurePred, MatchInterPodAffinityPred}
CheckNodeMemoryPressurePred, CheckNodePIDPressurePred, CheckNodeDiskPressurePred, MatchInterPodAffinityPred}
)
// NodeInfo interface represents anything that can get node object from node ID.
@ -284,10 +289,11 @@ func NoDiskConflict(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *sch
// MaxPDVolumeCountChecker contains information to check the max number of volumes for a predicate.
type MaxPDVolumeCountChecker struct {
filter VolumeFilter
maxVolumes int
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
filter VolumeFilter
volumeLimitKey v1.ResourceName
maxVolumes int
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
// The string below is generated randomly during the struct's initialization.
// It is used to prefix volumeID generated inside the predicate() method to
@ -308,21 +314,25 @@ type VolumeFilter struct {
// The predicate looks for both volumes used directly, as well as PVC volumes that are backed by relevant volume
// types, counts the number of unique volumes, and rejects the new pod if it would place the total count over
// the maximum.
func NewMaxPDVolumeCountPredicate(filterName string, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
func NewMaxPDVolumeCountPredicate(
filterName string, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
var filter VolumeFilter
var maxVolumes int
var volumeLimitKey v1.ResourceName
switch filterName {
case EBSVolumeFilterType:
filter = EBSVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.EBSVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxEBSVolumes)
case GCEPDVolumeFilterType:
filter = GCEPDVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.GCEVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxGCEPDVolumes)
case AzureDiskVolumeFilterType:
filter = AzureDiskVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.AzureVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxAzureDiskVolumes)
default:
glog.Fatalf("Wrong filterName, Only Support %v %v %v ", EBSVolumeFilterType,
@ -332,6 +342,7 @@ func NewMaxPDVolumeCountPredicate(filterName string, pvInfo PersistentVolumeInfo
}
c := &MaxPDVolumeCountChecker{
filter: filter,
volumeLimitKey: volumeLimitKey,
maxVolumes: maxVolumes,
pvInfo: pvInfo,
pvcInfo: pvcInfo,
@ -357,7 +368,6 @@ func getMaxVols(defaultVal int) int {
}
func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, filteredVolumes map[string]bool) error {
for i := range volumes {
vol := &volumes[i]
if id, ok := c.filter.FilterVolume(vol); ok {
@ -444,12 +454,25 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *v1.Pod, meta algorithm.Predicat
}
numNewVolumes := len(newVolumes)
maxAttachLimit := c.maxVolumes
if numExistingVolumes+numNewVolumes > c.maxVolumes {
if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
volumeLimits := nodeInfo.VolumeLimits()
if maxAttachLimitFromAllocatable, ok := volumeLimits[c.volumeLimitKey]; ok {
maxAttachLimit = int(maxAttachLimitFromAllocatable)
}
}
if numExistingVolumes+numNewVolumes > maxAttachLimit {
// violates MaxEBSVolumeCount or MaxGCEPDVolumeCount
return false, []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}, nil
}
if nodeInfo != nil && nodeInfo.TransientInfo != nil && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) {
nodeInfo.TransientInfo.TransientLock.Lock()
defer nodeInfo.TransientInfo.TransientLock.Unlock()
nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount = maxAttachLimit - numExistingVolumes
nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes = numNewVolumes
}
return true, nil, nil
}
@ -582,9 +605,9 @@ func (c *VolumeZoneChecker) predicate(pod *v1.Pod, meta algorithm.PredicateMetad
pvName := pvc.Spec.VolumeName
if pvName == "" {
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
scName := pvc.Spec.StorageClassName
if scName != nil && len(*scName) > 0 {
class, _ := c.classInfo.GetStorageClassInfo(*scName)
scName := v1helper.GetPersistentVolumeClaimClass(pvc)
if len(scName) > 0 {
class, _ := c.classInfo.GetStorageClassInfo(scName)
if class != nil {
if class.VolumeBindingMode == nil {
return false, nil, fmt.Errorf("VolumeBindingMode not set for StorageClass %q", scName)
@ -662,33 +685,7 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
for rName, rQuantity := range container.Resources.Requests {
switch rName {
case v1.ResourceMemory:
if mem := rQuantity.Value(); mem > result.Memory {
result.Memory = mem
}
case v1.ResourceEphemeralStorage:
if ephemeralStorage := rQuantity.Value(); ephemeralStorage > result.EphemeralStorage {
result.EphemeralStorage = ephemeralStorage
}
case v1.ResourceCPU:
if cpu := rQuantity.MilliValue(); cpu > result.MilliCPU {
result.MilliCPU = cpu
}
case v1.ResourceNvidiaGPU:
if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
result.NvidiaGPU = gpu
}
default:
if v1helper.IsScalarResourceName(rName) {
value := rQuantity.Value()
if value > result.ScalarResources[rName] {
result.SetScalar(rName, value)
}
}
}
}
result.SetMaxResource(container.Resources.Requests)
}
return result
@ -728,7 +725,6 @@ func PodFitsResources(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *s
}
if podRequest.MilliCPU == 0 &&
podRequest.Memory == 0 &&
podRequest.NvidiaGPU == 0 &&
podRequest.EphemeralStorage == 0 &&
len(podRequest.ScalarResources) == 0 {
return len(predicateFails) == 0, predicateFails, nil
@ -741,10 +737,6 @@ func PodFitsResources(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *s
if allocatable.Memory < podRequest.Memory+nodeInfo.RequestedResource().Memory {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceMemory, podRequest.Memory, nodeInfo.RequestedResource().Memory, allocatable.Memory))
}
if allocatable.NvidiaGPU < podRequest.NvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceNvidiaGPU, podRequest.NvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, allocatable.NvidiaGPU))
}
if allocatable.EphemeralStorage < podRequest.EphemeralStorage+nodeInfo.RequestedResource().EphemeralStorage {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceEphemeralStorage, podRequest.EphemeralStorage, nodeInfo.RequestedResource().EphemeralStorage, allocatable.EphemeralStorage))
}
@ -776,21 +768,16 @@ func PodFitsResources(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *s
// nodeMatchesNodeSelectorTerms checks if a node's labels satisfy a list of node selector terms,
// terms are ORed, and an empty list of terms will match nothing.
func nodeMatchesNodeSelectorTerms(node *v1.Node, nodeSelectorTerms []v1.NodeSelectorTerm) bool {
for _, req := range nodeSelectorTerms {
nodeSelector, err := v1helper.NodeSelectorRequirementsAsSelector(req.MatchExpressions)
if err != nil {
glog.V(10).Infof("Failed to parse MatchExpressions: %+v, regarding as not match.", req.MatchExpressions)
return false
}
if nodeSelector.Matches(labels.Set(node.Labels)) {
return true
}
nodeFields := map[string]string{}
for k, f := range algorithm.NodeFieldSelectorKeys {
nodeFields[k] = f(node)
}
return false
return v1helper.MatchNodeSelectorTerms(nodeSelectorTerms, labels.Set(node.Labels), fields.Set(nodeFields))
}
// The pod can only schedule onto nodes that satisfy requirements in both NodeAffinity and nodeSelector.
func podMatchesNodeLabels(pod *v1.Pod, node *v1.Node) bool {
// podMatchesNodeSelectorAndAffinityTerms checks whether the pod is schedulable onto nodes according to
// the requirements in both NodeAffinity and nodeSelector.
func podMatchesNodeSelectorAndAffinityTerms(pod *v1.Pod, node *v1.Node) bool {
// Check if node.Labels match pod.Spec.NodeSelector.
if len(pod.Spec.NodeSelector) > 0 {
selector := labels.SelectorFromSet(pod.Spec.NodeSelector)
@ -841,7 +828,7 @@ func PodMatchNodeSelector(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInf
if node == nil {
return false, nil, fmt.Errorf("node not found")
}
if podMatchesNodeLabels(pod, node) {
if podMatchesNodeSelectorAndAffinityTerms(pod, node) {
return true, nil, nil
}
return false, []algorithm.PredicateFailureReason{ErrNodeSelectorNotMatch}, nil
@ -1153,7 +1140,7 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return true, nil, nil
}
if failedPredicates, error := c.satisfiesPodsAffinityAntiAffinity(pod, nodeInfo, affinity); failedPredicates != nil {
if failedPredicates, error := c.satisfiesPodsAffinityAntiAffinity(pod, meta, nodeInfo, affinity); failedPredicates != nil {
failedPredicates := append([]algorithm.PredicateFailureReason{ErrPodAffinityNotMatch}, failedPredicates)
return false, failedPredicates, error
}
@ -1167,39 +1154,36 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
return true, nil, nil
}
// anyPodMatchesPodAffinityTerm checks if any of given pods can match the specific podAffinityTerm.
// First return value indicates whether a matching pod exists on a node that matches the topology key,
// while the second return value indicates whether a matching pod exists anywhere.
// TODO: Do we really need any pod matching, or all pods matching? I think the latter.
func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, pods []*v1.Pod, nodeInfo *schedulercache.NodeInfo, term *v1.PodAffinityTerm) (bool, bool, error) {
if len(term.TopologyKey) == 0 {
return false, false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
// podMatchesPodAffinityTerms checks if the "targetPod" matches the given "terms"
// of the "pod" on the given "nodeInfo".Node(). It returns three values: 1) whether
// targetPod matches all the terms and their topologies, 2) whether targetPod
// matches all the terms label selector and namespaces (AKA term properties),
// 3) any error.
func (c *PodAffinityChecker) podMatchesPodAffinityTerms(pod *v1.Pod, targetPod *v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, bool, error) {
if len(terms) == 0 {
return false, false, fmt.Errorf("terms array is empty")
}
matchingPodExists := false
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
props, err := getAffinityTermProperties(pod, terms)
if err != nil {
return false, false, err
}
// Special case: When the topological domain is node, we can limit our
// search to pods on that node without searching the entire cluster.
if term.TopologyKey == kubeletapis.LabelHostname {
pods = nodeInfo.Pods()
if !podMatchesAffinityTermProperties(targetPod, props) {
return false, false, nil
}
for _, existingPod := range pods {
match := priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector)
if match {
matchingPodExists = true
existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
return false, matchingPodExists, err
}
if priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), existingPodNode, term.TopologyKey) {
return true, matchingPodExists, nil
}
// Namespace and selector of the terms have matched. Now we check topology of the terms.
targetPodNode, err := c.info.GetNodeInfo(targetPod.Spec.NodeName)
if err != nil {
return false, false, err
}
for _, term := range terms {
if len(term.TopologyKey) == 0 {
return false, false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
}
if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNode, term.TopologyKey) {
return false, true, nil
}
}
return false, matchingPodExists, nil
return true, true, nil
}
// GetPodAffinityTerms gets pod affinity terms by a pod affinity object.
@ -1383,57 +1367,127 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta
return nil, nil
}
// anyPodsMatchingTopologyTerms checks whether any of the nodes given via
// "targetPods" matches topology of all the "terms" for the give "pod" and "nodeInfo".
func (c *PodAffinityChecker) anyPodsMatchingTopologyTerms(pod *v1.Pod, targetPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, error) {
for nodeName, targetPods := range targetPods {
targetPodNodeInfo, err := c.info.GetNodeInfo(nodeName)
if err != nil {
return false, err
}
if len(targetPods) > 0 {
allTermsMatched := true
for _, term := range terms {
if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNodeInfo, term.TopologyKey) {
allTermsMatched = false
break
}
}
if allTermsMatched {
// We have 1 or more pods on the target node that have already matched namespace and selector
// and all of the terms topologies matched the target node. So, there is at least 1 matching pod on the node.
return true, nil
}
}
}
return false, nil
}
// Checks if scheduling the pod onto this node would break any rules of this pod.
func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, nodeInfo *schedulercache.NodeInfo, affinity *v1.Affinity) (algorithm.PredicateFailureReason, error) {
func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo,
affinity *v1.Affinity) (algorithm.PredicateFailureReason, error) {
node := nodeInfo.Node()
if node == nil {
return ErrPodAffinityRulesNotMatch, fmt.Errorf("Node is nil")
}
filteredPods, err := c.podLister.FilteredList(nodeInfo.Filter, labels.Everything())
if err != nil {
return ErrPodAffinityRulesNotMatch, err
}
// Check all affinity terms.
for _, term := range GetPodAffinityTerms(affinity.PodAffinity) {
termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
if err != nil {
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err)
glog.Error(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
if !termMatches {
// If the requirement matches a pod's own labels are namespace, and there are
// no other such pods, then disregard the requirement. This is necessary to
// not block forever because the first pod of the collection can't be scheduled.
if matchingPodExists {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
podName(pod), node.Name, term)
return ErrPodAffinityRulesNotMatch, nil
}
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if predicateMeta, ok := meta.(*predicateMetadata); ok {
// Check all affinity terms.
matchingPods := predicateMeta.nodeNameToMatchingAffinityPods
if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 {
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, affinityTerms)
if err != nil {
errMessage := fmt.Sprintf("Cannot parse selector on term %v for pod %v. Details %v", term, podName(pod), err)
glog.Error(errMessage)
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
glog.Errorf(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector)
if !match {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
podName(pod), node.Name, term)
return ErrPodAffinityRulesNotMatch, nil
if !matchExists {
// This pod may the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches
// its own terms, then we allow the pod to pass the affinity check.
if !(len(matchingPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
}
}
}
// Check all anti-affinity terms.
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
if err != nil || termMatches {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v",
podName(pod), node.Name, term, err)
return ErrPodAntiAffinityRulesNotMatch, nil
// Check all anti-affinity terms.
matchingPods = predicateMeta.nodeNameToMatchingAntiAffinityPods
if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 {
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, antiAffinityTerms)
if err != nil || matchExists {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity, err: %v",
podName(pod), node.Name, err)
return ErrPodAntiAffinityRulesNotMatch, nil
}
}
} else { // We don't have precomputed metadata. We have to follow a slow path to check affinity rules.
filteredPods, err := c.podLister.FilteredList(nodeInfo.Filter, labels.Everything())
if err != nil {
return ErrPodAffinityRulesNotMatch, err
}
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
matchFound, termsSelectorMatchFound := false, false
for _, targetPod := range filteredPods {
// Check all affinity terms.
if !matchFound && len(affinityTerms) > 0 {
affTermsMatch, termsSelectorMatch, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, affinityTerms)
if err != nil {
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
glog.Error(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
if termsSelectorMatch {
termsSelectorMatchFound = true
}
if affTermsMatch {
matchFound = true
}
}
// Check all anti-affinity terms.
if len(antiAffinityTerms) > 0 {
antiAffTermsMatch, _, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, antiAffinityTerms)
if err != nil || antiAffTermsMatch {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm, err: %v",
podName(pod), node.Name, err)
return ErrPodAntiAffinityRulesNotMatch, nil
}
}
}
if !matchFound && len(affinityTerms) > 0 {
// We have not been able to find any matches for the pod's affinity rules.
// This pod may be the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches
// its own terms, then we allow the pod to pass the affinity check.
if termsSelectorMatchFound {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
// Check if pod matches its own affinity properties (namespace and label selector).
if !targetPodMatchesAffinityOfPod(pod, pod) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
}
}
@ -1446,8 +1500,8 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node
return nil, nil
}
// PodToleratesNodeTaints checks if a pod tolerations can tolerate the node taints
func PodToleratesNodeTaints(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
// CheckNodeUnschedulablePredicate checks if a pod can be scheduled on a node with Unschedulable spec.
func CheckNodeUnschedulablePredicate(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
if nodeInfo == nil || nodeInfo.Node() == nil {
return false, []algorithm.PredicateFailureReason{ErrNodeUnknownCondition}, nil
}
@ -1456,6 +1510,15 @@ func PodToleratesNodeTaints(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeI
return false, []algorithm.PredicateFailureReason{ErrNodeUnschedulable}, nil
}
return true, nil, nil
}
// PodToleratesNodeTaints checks if a pod tolerations can tolerate the node taints
func PodToleratesNodeTaints(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
if nodeInfo == nil || nodeInfo.Node() == nil {
return false, []algorithm.PredicateFailureReason{ErrNodeUnknownCondition}, nil
}
return podToleratesNodeTaints(pod, nodeInfo, func(t *v1.Taint) bool {
// PodToleratesNodeTaints is only interested in NoSchedule and NoExecute taints.
return t.Effect == v1.TaintEffectNoSchedule || t.Effect == v1.TaintEffectNoExecute
@ -1518,6 +1581,16 @@ func CheckNodeDiskPressurePredicate(pod *v1.Pod, meta algorithm.PredicateMetadat
return true, nil, nil
}
// CheckNodePIDPressurePredicate checks if a pod can be scheduled on a node
// reporting pid pressure condition.
func CheckNodePIDPressurePredicate(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
// check if node is under pid pressure
if nodeInfo.PIDPressureCondition() == v1.ConditionTrue {
return false, []algorithm.PredicateFailureReason{ErrNodeUnderPIDPressure}, nil
}
return true, nil, nil
}
// CheckNodeConditionPredicate checks if a pod can be scheduled on a node reporting out of disk,
// network unavailable and not ready condition. Only node conditions are accounted in this predicate.
func CheckNodeConditionPredicate(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
@ -1582,7 +1655,7 @@ func (c *VolumeBindingChecker) predicate(pod *v1.Pod, meta algorithm.PredicateMe
return false, nil, fmt.Errorf("node not found")
}
unboundSatisfied, boundSatisfied, err := c.binder.Binder.FindPodVolumes(pod, node.Name)
unboundSatisfied, boundSatisfied, err := c.binder.Binder.FindPodVolumes(pod, node)
if err != nil {
return false, nil, err
}

View File

@ -32,23 +32,24 @@ import (
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
var (
extendedResourceA = v1.ResourceName("example.com/aaa")
extendedResourceB = v1.ResourceName("example.com/bbb")
hugePageResourceA = v1helper.HugePageResourceName(resource.MustParse("2Mi"))
extendedResourceA = v1.ResourceName("example.com/aaa")
extendedResourceB = v1.ResourceName("example.com/bbb")
kubernetesIOResourceA = v1.ResourceName("kubernetes.io/something")
kubernetesIOResourceB = v1.ResourceName("subdomain.kubernetes.io/something")
hugePageResourceA = v1helper.HugePageResourceName(resource.MustParse("2Mi"))
)
func makeResources(milliCPU, memory, nvidiaGPUs, pods, extendedA, storage, hugePageA int64) v1.NodeResources {
func makeResources(milliCPU, memory, pods, extendedA, storage, hugePageA int64) v1.NodeResources {
return v1.NodeResources{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
extendedResourceA: *resource.NewQuantity(extendedA, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI),
@ -56,12 +57,11 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, extendedA, storage, hugeP
}
}
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, extendedA, storage, hugePageA int64) v1.ResourceList {
func makeAllocatableResources(milliCPU, memory, pods, extendedA, storage, hugePageA int64) v1.ResourceList {
return v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
extendedResourceA: *resource.NewQuantity(extendedA, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI),
@ -297,6 +297,24 @@ func TestPodFitsResources(t *testing.T) {
test: "extended resource allocatable enforced for unknown resource for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(extendedResourceB, 1, 0, 0)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, ScalarResources: map[v1.ResourceName]int64{kubernetesIOResourceA: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0})),
fits: false,
test: "kubernetes.io resource capacity enforced",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(kubernetesIOResourceA, 10, 0, 0)},
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
schedulercache.Resource{MilliCPU: 1, Memory: 1, ScalarResources: map[v1.ResourceName]int64{kubernetesIOResourceB: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0})),
fits: false,
test: "kubernetes.io resource capacity enforced for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(kubernetesIOResourceB, 10, 0, 0)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, ScalarResources: map[v1.ResourceName]int64{hugePageResourceA: 10}}),
@ -337,7 +355,7 @@ func TestPodFitsResources(t *testing.T) {
}
for _, test := range enoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node)
RegisterPredicateMetadataProducerWithExtendedResourceOptions(test.ignoredExtendedResources)
meta := PredicateMetadata(test.pod, nil)
@ -394,7 +412,7 @@ func TestPodFitsResources(t *testing.T) {
},
}
for _, test := range notEnoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0, 0)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 1, 0, 0, 0)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@ -452,7 +470,7 @@ func TestPodFitsResources(t *testing.T) {
}
for _, test := range storagePodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@ -893,10 +911,11 @@ func TestISCSIDiskConflicts(t *testing.T) {
// TODO: Add test case for RequiredDuringSchedulingRequiredDuringExecution after it's implemented.
func TestPodFitsSelector(t *testing.T) {
tests := []struct {
pod *v1.Pod
labels map[string]string
fits bool
test string
pod *v1.Pod
labels map[string]string
nodeName string
fits bool
test string
}{
{
pod: &v1.Pod{},
@ -1341,11 +1360,234 @@ func TestPodFitsSelector(t *testing.T) {
test: "Pod with an Affinity matches node's labels but the PodSpec.NodeSelector(the old thing that we are deprecating) " +
"is not satisfied, won't schedule onto the node",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpNotIn,
Values: []string{"invalid value: ___@#$%^"},
},
},
},
},
},
},
},
},
},
labels: map[string]string{
"foo": "bar",
},
fits: false,
test: "Pod with an invalid value in Affinity term won't be scheduled onto the node",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
},
},
},
},
},
},
},
nodeName: "node_1",
fits: true,
test: "Pod with matchFields using In operator that matches the existing node",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
},
},
},
},
},
},
},
nodeName: "node_2",
fits: false,
test: "Pod with matchFields using In operator that does not match the existing node",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
},
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
},
},
},
},
},
},
},
},
},
nodeName: "node_2",
labels: map[string]string{"foo": "bar"},
fits: true,
test: "Pod with two terms: matchFields does not match, but matchExpressions matches",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
},
},
},
},
},
},
},
},
},
nodeName: "node_2",
labels: map[string]string{"foo": "bar"},
fits: false,
test: "Pod with one term: matchFields does not match, but matchExpressions matches",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
},
},
},
},
},
},
},
},
},
nodeName: "node_1",
labels: map[string]string{"foo": "bar"},
fits: true,
test: "Pod with one term: both matchFields and matchExpressions match",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
},
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"not-match-to-bar"},
},
},
},
},
},
},
},
},
},
nodeName: "node_2",
labels: map[string]string{"foo": "bar"},
fits: false,
test: "Pod with two terms: both matchFields and matchExpressions do not match",
},
}
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrNodeSelectorNotMatch}
for _, test := range tests {
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Labels: test.labels}}
node := v1.Node{ObjectMeta: metav1.ObjectMeta{
Name: test.nodeName,
Labels: test.labels,
}}
nodeInfo := schedulercache.NewNodeInfo()
nodeInfo.SetNode(&node)
@ -1591,425 +1833,6 @@ func TestServiceAffinity(t *testing.T) {
}
}
func TestEBSVolumeCountConflicts(t *testing.T) {
oneVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp"},
},
},
},
},
}
ebsPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol",
},
},
},
},
},
}
splitPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someNonEBSVol",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol",
},
},
},
},
},
}
twoVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "tvp1"},
},
},
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "tvp2"},
},
},
},
},
}
splitVolsPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{},
},
},
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "svp"},
},
},
},
},
}
nonApplicablePod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{},
},
},
},
},
}
deletedPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "deletedPVC",
},
},
},
},
},
}
twoDeletedPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "deletedPVC",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherDeletedPVC",
},
},
},
},
},
}
deletedPVPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "pvcWithDeletedPV",
},
},
},
},
},
}
// deletedPVPod2 is a different pod than deletedPVPod but using the same PVC
deletedPVPod2 := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "pvcWithDeletedPV",
},
},
},
},
},
}
// anotherDeletedPVPod is a different pod than deletedPVPod and uses another PVC
anotherDeletedPVPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherPVCWithDeletedPV",
},
},
},
},
},
}
emptyPod := &v1.Pod{
Spec: v1.PodSpec{},
}
unboundPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "unboundPVC",
},
},
},
},
},
}
// Different pod than unboundPVCPod, but using the same unbound PVC
unboundPVCPod2 := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "unboundPVC",
},
},
},
},
},
}
// pod with unbound PVC that's different to unboundPVC
anotherUnboundPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherUnboundPVC",
},
},
},
},
},
}
tests := []struct {
newPod *v1.Pod
existingPods []*v1.Pod
maxVols int
fits bool
test string
}{
{
newPod: oneVolPod,
existingPods: []*v1.Pod{twoVolPod, oneVolPod},
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pod's EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod},
maxVols: 2,
fits: false,
test: "doesn't fit when node capacity < new pod's EBS volumes",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{twoVolPod},
maxVols: 3,
fits: true,
test: "new pod's count ignores non-EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
maxVols: 3,
fits: true,
test: "existing pods' counts ignore non-EBS volumes",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
maxVols: 3,
fits: true,
test: "new pod's count considers PVCs backed by EBS volumes",
},
{
newPod: splitPVCPod,
existingPods: []*v1.Pod{splitVolsPod, oneVolPod},
maxVols: 3,
fits: true,
test: "new pod's count ignores PVCs not backed by EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, ebsPVCPod},
maxVols: 3,
fits: false,
test: "existing pods' counts considers PVCs backed by EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, twoVolPod, ebsPVCPod},
maxVols: 4,
fits: true,
test: "already-mounted EBS volumes are always ok to allow",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{oneVolPod, oneVolPod, ebsPVCPod},
maxVols: 3,
fits: true,
test: "the same EBS volumes are not counted multiple times",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
maxVols: 2,
fits: false,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
maxVols: 3,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, twoDeletedPVCPod},
maxVols: 3,
fits: false,
test: "pod with missing two PVCs is counted towards the PV limit twice",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
maxVols: 2,
fits: false,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
maxVols: 3,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: deletedPVPod2,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
maxVols: 2,
fits: true,
test: "two pods missing the same PV are counted towards the PV limit only once",
},
{
newPod: anotherDeletedPVPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
maxVols: 2,
fits: false,
test: "two pods missing different PVs are counted towards the PV limit twice",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
maxVols: 2,
fits: false,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
maxVols: 3,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: unboundPVCPod2,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
maxVols: 2,
fits: true,
test: "the same unbound PVC in multiple pods is counted towards the PV limit only once",
},
{
newPod: anotherUnboundPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
maxVols: 2,
fits: false,
test: "two different unbound PVCs are counted towards the PV limit as two volumes",
},
}
pvInfo := FakePersistentVolumeInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "someEBSVol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ebsVol"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNonEBSVol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{},
},
},
}
pvcInfo := FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "someEBSVol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someEBSVol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNonEBSVol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someNonEBSVol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvcWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherPVCWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "unboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherUnboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
}
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}
for _, test := range tests {
os.Setenv(KubeMaxPDVols, strconv.Itoa(test.maxVols))
pred := NewMaxPDVolumeCountPredicate(EBSVolumeFilterType, pvInfo, pvcInfo)
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), schedulercache.NewNodeInfo(test.existingPods...))
if err != nil {
t.Errorf("%s: unexpected error: %v", test.test, err)
}
if !fits && !reflect.DeepEqual(reasons, expectedFailureReasons) {
t.Errorf("%s: unexpected failure reasons: %v, want: %v", test.test, reasons, expectedFailureReasons)
}
if fits != test.fits {
t.Errorf("%s: expected %v, got %v", test.test, test.fits, fits)
}
}
}
func newPodWithPort(hostPorts ...int) *v1.Pod {
networkPorts := []v1.ContainerPort{}
for _, port := range hostPorts {
@ -2042,7 +1865,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)},
},
fits: true,
wErr: nil,
@ -2054,7 +1877,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -2064,34 +1887,6 @@ func TestRunGeneralPredicates(t *testing.T) {
},
test: "not enough cpu and memory resource",
},
{
pod: &v1.Pod{},
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: true,
wErr: nil,
test: "no resources/port/host requested always fits on GPU machine",
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: false,
wErr: nil,
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)},
test: "not enough GPU resource",
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: true,
wErr: nil,
test: "enough GPU resource",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
@ -2101,7 +1896,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -2113,7 +1908,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -2803,7 +2598,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
},
},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelA}},
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelA}},
},
nodes: []v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
@ -2875,7 +2670,8 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"foo": "bar",
"foo": "bar",
"service": "securityscan",
},
},
Spec: v1.PodSpec{
@ -2894,12 +2690,24 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
},
TopologyKey: "zone",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
},
pods: []*v1.Pod{},
pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: map[string]string{"foo": "bar"}}}},
nodes: []v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"zone": "az1", "hostname": "h1"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"zone": "az2", "hostname": "h2"}}},
@ -2949,6 +2757,55 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
},
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB.",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"abc"},
},
},
},
TopologyKey: "region",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "abc", "service": "securityscan"}}},
},
nodes: []v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}},
},
nodesExpectAffinityFailureReasons: [][]algorithm.PredicateFailureReason{{ErrPodAffinityNotMatch, ErrPodAntiAffinityRulesNotMatch}},
fits: map[string]bool{
"nodeA": false,
"nodeB": true,
},
test: "This test ensures that anti-affinity matches a pod when all terms of the anti-affinity rule matches a pod.",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
@ -2986,7 +2843,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
"nodeB": false,
"nodeC": true,
},
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB but can be schedulerd onto nodeC",
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB but can be scheduled onto nodeC",
},
{
pod: &v1.Pod{
@ -3134,7 +2991,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
"nodeB": false,
"nodeC": true,
},
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA, nodeB, but can be schedulerd onto nodeC (NodeC has an existing pod that match the inter pod affinity rule but in different namespace)",
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA, nodeB, but can be scheduled onto nodeC (NodeC has an existing pod that match the inter pod affinity rule but in different namespace)",
},
}
@ -3142,7 +2999,8 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
for indexTest, test := range tests {
nodeListInfo := FakeNodeListInfo(test.nodes)
for indexNode, node := range test.nodes {
nodeInfoMap := make(map[string]*schedulercache.NodeInfo)
for i, node := range test.nodes {
var podsOnNode []*v1.Pod
for _, pod := range test.pods {
if pod.Spec.NodeName == node.Name {
@ -3150,21 +3008,23 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
}
}
nodeInfo := schedulercache.NewNodeInfo(podsOnNode...)
nodeInfo.SetNode(&test.nodes[i])
nodeInfoMap[node.Name] = nodeInfo
}
for indexNode, node := range test.nodes {
testFit := PodAffinityChecker{
info: nodeListInfo,
podLister: schedulertesting.FakePodLister(test.pods),
}
nodeInfo := schedulercache.NewNodeInfo(podsOnNode...)
nodeInfo.SetNode(&node)
nodeInfoMap := map[string]*schedulercache.NodeInfo{node.Name: nodeInfo}
var meta algorithm.PredicateMetadata
if !test.nometa {
meta = PredicateMetadata(test.pod, nodeInfoMap)
}
fits, reasons, _ := testFit.InterPodAffinityMatches(test.pod, meta, nodeInfo)
fits, reasons, _ := testFit.InterPodAffinityMatches(test.pod, meta, nodeInfoMap[node.Name])
if !fits && !reflect.DeepEqual(reasons, test.nodesExpectAffinityFailureReasons[indexNode]) {
t.Errorf("index: %d test: %s unexpected failure reasons: %v expect: %v", indexTest, test.test, reasons, test.nodesExpectAffinityFailureReasons[indexNode])
}
@ -3423,7 +3283,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
ImagePullPolicy: "Always",
// at least one requirement -> burstable pod
Resources: v1.ResourceRequirements{
Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0, 0),
Requests: makeAllocatableResources(100, 100, 100, 0, 0, 0),
},
},
},

View File

@ -17,12 +17,8 @@ limitations under the License.
package predicates
import (
"github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
@ -70,69 +66,6 @@ func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
return labels.Set(aL).AsSelector()
}
// EquivalencePodGenerator is a generator of equivalence class for pod with consideration of PVC info.
type EquivalencePodGenerator struct {
pvcInfo PersistentVolumeClaimInfo
}
// NewEquivalencePodGenerator returns a getEquivalencePod method with consideration of PVC info.
func NewEquivalencePodGenerator(pvcInfo PersistentVolumeClaimInfo) algorithm.GetEquivalencePodFunc {
g := &EquivalencePodGenerator{
pvcInfo: pvcInfo,
}
return g.getEquivalencePod
}
// GetEquivalencePod returns a EquivalencePod which contains a group of pod attributes which can be reused.
func (e *EquivalencePodGenerator) getEquivalencePod(pod *v1.Pod) interface{} {
// For now we only consider pods:
// 1. OwnerReferences is Controller
// 2. with same OwnerReferences
// 3. with same PVC claim
// to be equivalent
for _, ref := range pod.OwnerReferences {
if ref.Controller != nil && *ref.Controller {
pvcSet, err := e.getPVCSet(pod)
if err == nil {
// A pod can only belongs to one controller, so let's return.
return &EquivalencePod{
ControllerRef: ref,
PVCSet: pvcSet,
}
}
// If error encountered, log warning and return nil (i.e. no equivalent pod found)
glog.Warningf("[EquivalencePodGenerator] for pod: %v failed due to: %v", pod.GetName(), err)
return nil
}
}
return nil
}
// getPVCSet returns a set of PVC UIDs of given pod.
func (e *EquivalencePodGenerator) getPVCSet(pod *v1.Pod) (sets.String, error) {
result := sets.NewString()
for _, volume := range pod.Spec.Volumes {
if volume.PersistentVolumeClaim == nil {
continue
}
pvcName := volume.PersistentVolumeClaim.ClaimName
pvc, err := e.pvcInfo.GetPersistentVolumeClaimInfo(pod.GetNamespace(), pvcName)
if err != nil {
return nil, err
}
result.Insert(string(pvc.UID))
}
return result, nil
}
// EquivalencePod is a group of pod attributes which can be reused as equivalence to schedule other pods.
type EquivalencePod struct {
ControllerRef metav1.OwnerReference
PVCSet sets.String
}
// portsConflict check whether existingPorts and wantPorts conflict with each other
// return true if we have a conflict
func portsConflict(existingPorts schedutil.HostPortInfo, wantPorts []*v1.ContainerPort) bool {

View File

@ -19,6 +19,7 @@ go_library(
"node_label.go",
"node_prefer_avoid_pods.go",
"reduce.go",
"requested_to_capacity_ratio.go",
"resource_allocation.go",
"resource_limits.go",
"selector_spreading.go",
@ -28,18 +29,21 @@ go_library(
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities",
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/util/node:go_default_library",
"//pkg/util/parsers:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
],
)
@ -56,22 +60,27 @@ go_test(
"node_affinity_test.go",
"node_label_test.go",
"node_prefer_avoid_pods_test.go",
"requested_to_capacity_ratio_test.go",
"resource_limits_test.go",
"selector_spreading_test.go",
"taint_toleration_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/features:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/util/parsers:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

View File

@ -19,8 +19,10 @@ package priorities
import (
"math"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
var (
@ -36,17 +38,31 @@ var (
BalancedResourceAllocationMap = balancedResourcePriority.PriorityMap
)
func balancedResourceScorer(requested, allocable *schedulercache.Resource) int64 {
func balancedResourceScorer(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
cpuFraction := fractionOfCapacity(requested.MilliCPU, allocable.MilliCPU)
memoryFraction := fractionOfCapacity(requested.Memory, allocable.Memory)
// This to find a node which has most balanced CPU, memory and volume usage.
if includeVolumes && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && allocatableVolumes > 0 {
volumeFraction := float64(requestedVolumes) / float64(allocatableVolumes)
if cpuFraction >= 1 || memoryFraction >= 1 || volumeFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
return 0
}
// Compute variance for all the three fractions.
mean := (cpuFraction + memoryFraction + volumeFraction) / float64(3)
variance := float64((((cpuFraction - mean) * (cpuFraction - mean)) + ((memoryFraction - mean) * (memoryFraction - mean)) + ((volumeFraction - mean) * (volumeFraction - mean))) / float64(3))
// Since the variance is between positive fractions, it will be positive fraction. 1-variance lets the
// score to be higher for node which has least variance and multiplying it with 10 provides the scaling
// factor needed.
return int64((1 - variance) * float64(schedulerapi.MaxPriority))
}
if cpuFraction >= 1 || memoryFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
return 0
}
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
// respectively. Multilying the absolute value of the difference by 10 scales the value to
// respectively. Multiplying the absolute value of the difference by 10 scales the value to
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)

View File

@ -17,17 +17,118 @@ limitations under the License.
package priorities
import (
"fmt"
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// getExistingVolumeCountForNode gets the current number of volumes on node.
func getExistingVolumeCountForNode(pods []*v1.Pod, maxVolumes int) int {
volumeCount := 0
for _, pod := range pods {
volumeCount += len(pod.Spec.Volumes)
}
if maxVolumes-volumeCount > 0 {
return maxVolumes - volumeCount
}
return 0
}
func TestBalancedResourceAllocation(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
podwithVol1 := v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp"},
},
},
},
NodeName: "machine4",
}
podwithVol2 := v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp1"},
},
},
},
NodeName: "machine4",
}
podwithVol3 := v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp1"},
},
},
},
NodeName: "machine4",
}
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
@ -89,12 +190,33 @@ func TestBalancedResourceAllocation(t *testing.T) {
},
},
}
cpuAndMemory3 := v1.PodSpec{
NodeName: "machine3",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
/*
@ -111,7 +233,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, nothing requested",
name: "nothing scheduled, nothing requested",
},
{
/*
@ -128,7 +250,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, resources requested, differently sized machines",
name: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
@ -145,7 +267,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no resources requested, pods scheduled",
name: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -168,7 +290,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}},
test: "no resources requested, pods scheduled with resources",
name: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -191,7 +313,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}},
test: "resources requested, pods scheduled with resources",
name: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -212,7 +334,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
name: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -233,7 +355,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "requested resources exceed node capacity",
name: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -243,22 +365,57 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
name: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Machine4 will be chosen here because it already has a existing volume making the variance
of volume count, CPU usage, memory usage closer.
*/
pod: &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp2"},
},
},
},
},
},
nodes: []*v1.Node{makeNode("machine3", 3500, 40000), makeNode("machine4", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine3", Score: 8}, {Host: "machine4", Score: 9}},
name: "Include volume count on a node for balanced resource allocation",
pods: []*v1.Pod{
{Spec: cpuAndMemory3},
{Spec: podwithVol1},
{Spec: podwithVol2},
{Spec: podwithVol3},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(BalancedResourceAllocationMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
if len(test.pod.Spec.Volumes) > 0 {
maxVolumes := 5
for _, info := range nodeNameToInfo {
info.TransientInfo.TransNodeInfo.AllocatableVolumesCount = getExistingVolumeCountForNode(info.Pods(), maxVolumes)
info.TransientInfo.TransNodeInfo.RequestedVolumes = len(test.pod.Spec.Volumes)
}
}
list, err := priorityFunction(BalancedResourceAllocationMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -18,10 +18,12 @@ package priorities
import (
"fmt"
"strings"
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/util/parsers"
)
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
@ -42,7 +44,7 @@ func ImageLocalityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *scheduler
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
sumSize := totalImageSize(node, pod.Spec.Containers)
sumSize := totalImageSize(nodeInfo, pod.Spec.Containers)
return schedulerapi.HostPriority{
Host: node.Name,
@ -69,20 +71,27 @@ func calculateScoreFromSize(sumSize int64) int {
}
// totalImageSize returns the total image size of all the containers that are already on the node.
func totalImageSize(node *v1.Node, containers []v1.Container) int64 {
imageSizes := make(map[string]int64)
for _, image := range node.Status.Images {
for _, name := range image.Names {
imageSizes[name] = image.SizeBytes
}
}
func totalImageSize(nodeInfo *schedulercache.NodeInfo, containers []v1.Container) int64 {
var total int64
imageSizes := nodeInfo.ImageSizes()
for _, container := range containers {
if size, ok := imageSizes[container.Image]; ok {
if size, ok := imageSizes[normalizedImageName(container.Image)]; ok {
total += size
}
}
return total
}
// normalizedImageName returns the CRI compliant name for a given image.
// TODO: cover the corner cases of missed matches, e.g,
// 1. Using Docker as runtime and docker.io/library/test:tag in pod spec, but only test:tag will present in node status
// 2. Using the implicit registry, i.e., test:tag or library/test:tag in pod spec but only docker.io/library/test:tag
// in node status; note that if users consistently use one registry format, this should not happen.
func normalizedImageName(name string) string {
if strings.LastIndex(name, ":") <= strings.LastIndex(name, "/") {
name = name + ":" + parsers.DefaultImageTag
}
return name
}

View File

@ -17,14 +17,17 @@ limitations under the License.
package priorities
import (
"crypto/sha256"
"reflect"
"sort"
"testing"
"encoding/hex"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/util/parsers"
)
func TestImageLocalityPriority(t *testing.T) {
@ -65,7 +68,7 @@ func TestImageLocalityPriority(t *testing.T) {
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/40",
"gcr.io/40:" + parsers.DefaultImageTag,
"gcr.io/40:v1",
"gcr.io/40:v1",
},
@ -73,14 +76,14 @@ func TestImageLocalityPriority(t *testing.T) {
},
{
Names: []string{
"gcr.io/140",
"gcr.io/140:" + parsers.DefaultImageTag,
"gcr.io/140:v1",
},
SizeBytes: int64(140 * mb),
},
{
Names: []string{
"gcr.io/2000",
"gcr.io/2000:" + parsers.DefaultImageTag,
},
SizeBytes: int64(2000 * mb),
},
@ -91,13 +94,13 @@ func TestImageLocalityPriority(t *testing.T) {
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/250",
"gcr.io/250:" + parsers.DefaultImageTag,
},
SizeBytes: int64(250 * mb),
},
{
Names: []string{
"gcr.io/10",
"gcr.io/10:" + parsers.DefaultImageTag,
"gcr.io/10:v1",
},
SizeBytes: int64(10 * mb),
@ -110,28 +113,28 @@ func TestImageLocalityPriority(t *testing.T) {
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
// Pod: gcr.io/40 gcr.io/250
// Node1
// Image: gcr.io/40 40MB
// Image: gcr.io/40:latest 40MB
// Score: (40M-23M)/97.7M + 1 = 1
// Node2
// Image: gcr.io/250 250MB
// Image: gcr.io/250:latest 250MB
// Score: (250M-23M)/97.7M + 1 = 3
pod: &v1.Pod{Spec: test40250},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
test: "two images spread on two nodes, prefer the larger image one",
name: "two images spread on two nodes, prefer the larger image one",
},
{
// Pod: gcr.io/40 gcr.io/140
// Node1
// Image: gcr.io/40 40MB, gcr.io/140 140MB
// Image: gcr.io/40:latest 40MB, gcr.io/140:latest 140MB
// Score: (40M+140M-23M)/97.7M + 1 = 2
// Node2
@ -140,37 +143,56 @@ func TestImageLocalityPriority(t *testing.T) {
pod: &v1.Pod{Spec: test40140},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
test: "two images on one node, prefer this node",
name: "two images on one node, prefer this node",
},
{
// Pod: gcr.io/2000 gcr.io/10
// Node1
// Image: gcr.io/2000 2000MB
// Image: gcr.io/2000:latest 2000MB
// Score: 2000 > max score = 10
// Node2
// Image: gcr.io/10 10MB
// Image: gcr.io/10:latest 10MB
// Score: 10 < min score = 0
pod: &v1.Pod{Spec: testMinMax},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "if exceed limit, use limit",
name: "if exceed limit, use limit",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
sort.Sort(test.expectedList)
sort.Sort(list)
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}
func TestNormalizedImageName(t *testing.T) {
for _, testCase := range []struct {
Input string
Output string
}{
{Input: "root", Output: "root:latest"},
{Input: "root:tag", Output: "root:tag"},
{Input: "gcr.io:5000/root", Output: "gcr.io:5000/root:latest"},
{Input: "root@" + getImageFakeDigest("root"), Output: "root@" + getImageFakeDigest("root")},
} {
image := normalizedImageName(testCase.Input)
if image != testCase.Output {
t.Errorf("expected image reference: %q, got %q", testCase.Output, image)
}
}
}
@ -181,3 +203,8 @@ func makeImageNode(node string, status v1.NodeStatus) *v1.Node {
Status: status,
}
}
func getImageFakeDigest(fakeContent string) string {
hash := sha256.Sum256([]byte(fakeContent))
return "sha256:" + hex.EncodeToString(hash[:])
}

View File

@ -27,7 +27,7 @@ import (
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"github.com/golang/glog"
)

View File

@ -24,7 +24,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
@ -267,7 +267,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
@ -277,7 +277,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "all machines are same priority as Affinity is nil",
name: "all machines are same priority as Affinity is nil",
},
// the node(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score
// the node(machine3) that don't have the label {"region": "whatever the value is"} (mismatch the topology key) but that have existing pods that match the labelSelector get low score
@ -295,7 +295,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "Affinity: pod that matches topology key & pods in nodes will get high score comparing to others" +
name: "Affinity: pod that matches topology key & pods in nodes will get high score comparing to others" +
"which doesn't match either pods in nodes or in topology key",
},
// the node1(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score
@ -313,7 +313,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "All the nodes that have the same topology key & label value with one of them has an existing pod that match the affinity rules, have the same score",
name: "All the nodes that have the same topology key & label value with one of them has an existing pod that match the affinity rules, have the same score",
},
// there are 2 regions, say regionChina(machine1,machine3,machine4) and regionIndia(machine2,machine5), both regions have nodes that match the preference.
// But there are more nodes(actually more existing pods) in regionChina that match the preference than regionIndia.
@ -337,7 +337,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 5}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: schedulerapi.MaxPriority}, {Host: "machine5", Score: 5}},
test: "Affinity: nodes in one region has more matching pods comparing to other reqion, so the region which has more macthes will get high score",
name: "Affinity: nodes in one region has more matching pods comparing to other reqion, so the region which has more macthes will get high score",
},
// Test with the different operators and values for pod affinity scheduling preference, including some match failures.
{
@ -353,7 +353,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity: different Label operators and values for pod affinity scheduling preference, including some match failures ",
name: "Affinity: different Label operators and values for pod affinity scheduling preference, including some match failures ",
},
// Test the symmetry cases for affinity, the difference between affinity and symmetry is not the pod wants to run together with some existing pods,
// but the existing pods have the inter pod affinity preference while the pod to schedule satisfy the preference.
@ -369,7 +369,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity symmetry: considred only the preferredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
name: "Affinity symmetry: considred only the preferredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
@ -383,7 +383,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity symmetry: considred RequiredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
name: "Affinity symmetry: considred RequiredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
},
// The pod to schedule prefer to stay away from some existing pods at node level using the pod anti affinity.
@ -403,7 +403,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: pod that doesnot match existing pods in node will get high score ",
name: "Anti Affinity: pod that doesnot match existing pods in node will get high score ",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
@ -416,7 +416,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: pod that does not matches topology key & matches the pods in nodes will get higher score comparing to others ",
name: "Anti Affinity: pod that does not matches topology key & matches the pods in nodes will get higher score comparing to others ",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
@ -430,7 +430,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: one node has more matching pods comparing to other node, so the node which has more unmacthes will get high score",
name: "Anti Affinity: one node has more matching pods comparing to other node, so the node which has more unmacthes will get high score",
},
// Test the symmetry cases for anti affinity
{
@ -444,7 +444,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity symmetry: the existing pods in node which has anti affinity match will get high score",
name: "Anti Affinity symmetry: the existing pods in node which has anti affinity match will get high score",
},
// Test both affinity and anti-affinity
{
@ -458,7 +458,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "Affinity and Anti Affinity: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity",
name: "Affinity and Anti Affinity: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity",
},
// Combined cases considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels (they are in the same RC/service),
// the pod prefer to run together with its brother pods in the same region, but wants to stay away from them at node level,
@ -483,7 +483,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 4}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: schedulerapi.MaxPriority}, {Host: "machine5", Score: 4}},
test: "Affinity and Anti Affinity: considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels",
name: "Affinity and Anti Affinity: considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels",
},
// Consider Affinity, Anti Affinity and symmetry together.
// for Affinity, the weights are: 8, 0, 0, 0
@ -505,24 +505,26 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelAzAz2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: 0}},
test: "Affinity and Anti Affinity and symmetry: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity & symmetry",
name: "Affinity and Anti Affinity and symmetry: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity & symmetry",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
interPodAffinity := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
}
list, err := interPodAffinity.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected \n\t%#v, \ngot \n\t%#v\n", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
interPodAffinity := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
}
list, err := interPodAffinity.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected \n\t%#v, \ngot \n\t%#v\n", test.expectedList, list)
}
})
}
}
@ -563,7 +565,7 @@ func TestHardPodAffinitySymmetricWeight(t *testing.T) {
nodes []*v1.Node
hardPodAffinityWeight int32
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelServiceS1}},
@ -578,7 +580,7 @@ func TestHardPodAffinitySymmetricWeight(t *testing.T) {
},
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Hard Pod Affinity symmetry: hard pod affinity symmetry weights 1 by default, then nodes that match the hard pod affinity symmetry rules, get a high score",
name: "Hard Pod Affinity symmetry: hard pod affinity symmetry weights 1 by default, then nodes that match the hard pod affinity symmetry rules, get a high score",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelServiceS1}},
@ -593,23 +595,25 @@ func TestHardPodAffinitySymmetricWeight(t *testing.T) {
},
hardPodAffinityWeight: 0,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "Hard Pod Affinity symmetry: hard pod affinity symmetry is closed(weights 0), then nodes that match the hard pod affinity symmetry rules, get same score with those not match",
name: "Hard Pod Affinity symmetry: hard pod affinity symmetry is closed(weights 0), then nodes that match the hard pod affinity symmetry rules, get same score with those not match",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
ipa := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: test.hardPodAffinityWeight,
}
list, err := ipa.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected \n\t%#v, \ngot \n\t%#v\n", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
ipa := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: test.hardPodAffinityWeight,
}
list, err := ipa.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected \n\t%#v, \ngot \n\t%#v\n", test.expectedList, list)
}
})
}
}

View File

@ -18,7 +18,7 @@ package priorities
import (
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
var (
@ -33,7 +33,7 @@ var (
LeastRequestedPriorityMap = leastResourcePriority.PriorityMap
)
func leastResourceScorer(requested, allocable *schedulercache.Resource) int64 {
func leastResourceScorer(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
return (leastRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
leastRequestedScore(requested.Memory, allocable.Memory)) / 2
}

View File

@ -24,7 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestLeastRequested(t *testing.T) {
@ -94,7 +94,7 @@ func TestLeastRequested(t *testing.T) {
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
/*
@ -111,7 +111,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, nothing requested",
name: "nothing scheduled, nothing requested",
},
{
/*
@ -128,7 +128,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
name: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
@ -145,7 +145,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no resources requested, pods scheduled",
name: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -168,7 +168,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}},
test: "no resources requested, pods scheduled with resources",
name: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -191,7 +191,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}},
test: "resources requested, pods scheduled with resources",
name: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -212,7 +212,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
name: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -233,7 +233,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}},
test: "requested resources exceed node capacity",
name: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -243,7 +243,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
name: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -252,13 +252,15 @@ func TestLeastRequested(t *testing.T) {
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(LeastRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(LeastRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -22,7 +22,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// PriorityMetadataFactory is a factory to produce PriorityMetadata.

View File

@ -26,7 +26,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
@ -117,13 +117,13 @@ func TestPriorityMetadata(t *testing.T) {
}
tests := []struct {
pod *v1.Pod
test string
name string
expected interface{}
}{
{
pod: nil,
expected: nil,
test: "pod is nil , priorityMetadata is nil",
name: "pod is nil , priorityMetadata is nil",
},
{
pod: podWithTolerationsAndAffinity,
@ -132,7 +132,7 @@ func TestPriorityMetadata(t *testing.T) {
podTolerations: tolerations,
affinity: podAffinity,
},
test: "Produce a priorityMetadata with default requests",
name: "Produce a priorityMetadata with default requests",
},
{
pod: podWithTolerationsAndRequests,
@ -141,7 +141,7 @@ func TestPriorityMetadata(t *testing.T) {
podTolerations: tolerations,
affinity: nil,
},
test: "Produce a priorityMetadata with specified requests",
name: "Produce a priorityMetadata with specified requests",
},
{
pod: podWithAffinityAndRequests,
@ -150,7 +150,7 @@ func TestPriorityMetadata(t *testing.T) {
podTolerations: nil,
affinity: podAffinity,
},
test: "Produce a priorityMetadata with specified requests",
name: "Produce a priorityMetadata with specified requests",
},
}
mataDataProducer := NewPriorityMetadataFactory(
@ -159,9 +159,11 @@ func TestPriorityMetadata(t *testing.T) {
schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
for _, test := range tests {
ptData := mataDataProducer(test.pod, nil)
if !reflect.DeepEqual(test.expected, ptData) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expected, ptData)
}
t.Run(test.name, func(t *testing.T) {
ptData := mataDataProducer(test.pod, nil)
if !reflect.DeepEqual(test.expected, ptData) {
t.Errorf("expected %#v, got %#v", test.expected, ptData)
}
})
}
}

View File

@ -18,7 +18,7 @@ package priorities
import (
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
var (
@ -31,7 +31,7 @@ var (
MostRequestedPriorityMap = mostResourcePriority.PriorityMap
)
func mostResourceScorer(requested, allocable *schedulercache.Resource) int64 {
func mostResourceScorer(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
return (mostRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
mostRequestedScore(requested.Memory, allocable.Memory)) / 2
}

View File

@ -24,7 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestMostRequested(t *testing.T) {
@ -109,7 +109,7 @@ func TestMostRequested(t *testing.T) {
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
/*
@ -126,7 +126,7 @@ func TestMostRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "nothing scheduled, nothing requested",
name: "nothing scheduled, nothing requested",
},
{
/*
@ -143,7 +143,7 @@ func TestMostRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
name: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
@ -160,7 +160,7 @@ func TestMostRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}},
test: "no resources requested, pods scheduled with resources",
name: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -183,7 +183,7 @@ func TestMostRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}},
test: "resources requested, pods scheduled with resources",
name: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -204,18 +204,20 @@ func TestMostRequested(t *testing.T) {
pod: &v1.Pod{Spec: bigCPUAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 10000, 8000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 2}},
test: "resources requested with more than the node, pods scheduled with resources",
name: "resources requested with more than the node, pods scheduled with resources",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(MostRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(MostRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -23,7 +23,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// CalculateNodeAffinityPriorityMap prioritizes nodes according to node affinity scheduling preferences
@ -37,12 +37,11 @@ func CalculateNodeAffinityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *s
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var affinity *v1.Affinity
// default is the podspec.
affinity := pod.Spec.Affinity
if priorityMeta, ok := meta.(*priorityMetadata); ok {
// We were able to parse metadata, use affinity from there.
affinity = priorityMeta.affinity
} else {
// We couldn't parse metadata - fallback to the podspec.
affinity = pod.Spec.Affinity
}
var count int32

View File

@ -23,7 +23,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestNodeAffinityPriority(t *testing.T) {
@ -105,7 +105,7 @@ func TestNodeAffinityPriority(t *testing.T) {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: &v1.Pod{
@ -119,7 +119,7 @@ func TestNodeAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "all machines are same priority as NodeAffinity is nil",
name: "all machines are same priority as NodeAffinity is nil",
},
{
pod: &v1.Pod{
@ -133,7 +133,7 @@ func TestNodeAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "no machine macthes preferred scheduling requirements in NodeAffinity of pod so all machines' priority is zero",
name: "no machine macthes preferred scheduling requirements in NodeAffinity of pod so all machines' priority is zero",
},
{
pod: &v1.Pod{
@ -147,7 +147,7 @@ func TestNodeAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "only machine1 matches the preferred scheduling requirements of pod",
name: "only machine1 matches the preferred scheduling requirements of pod",
},
{
pod: &v1.Pod{
@ -161,19 +161,21 @@ func TestNodeAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine5", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 3}},
test: "all machines matches the preferred scheduling requirements of pod but with different priorities ",
name: "all machines matches the preferred scheduling requirements of pod but with different priorities ",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce, nil)
list, err := nap(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected %#v, \ngot %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce, nil)
list, err := nap(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, \ngot %#v", test.expectedList, list)
}
})
}
}

View File

@ -23,7 +23,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// NodeLabelPrioritizer contains information to calculate node label priority.

View File

@ -24,7 +24,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestNewNodeLabelPriority(t *testing.T) {
@ -36,7 +36,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
label string
presence bool
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
nodes: []*v1.Node{
@ -47,7 +47,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "baz",
presence: true,
test: "no match found, presence true",
name: "no match found, presence true",
},
{
nodes: []*v1.Node{
@ -58,7 +58,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "baz",
presence: false,
test: "no match found, presence false",
name: "no match found, presence false",
},
{
nodes: []*v1.Node{
@ -69,7 +69,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "foo",
presence: true,
test: "one match found, presence true",
name: "one match found, presence true",
},
{
nodes: []*v1.Node{
@ -80,7 +80,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "foo",
presence: false,
test: "one match found, presence false",
name: "one match found, presence false",
},
{
nodes: []*v1.Node{
@ -91,7 +91,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "bar",
presence: true,
test: "two matches found, presence true",
name: "two matches found, presence true",
},
{
nodes: []*v1.Node{
@ -102,25 +102,27 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "bar",
presence: false,
test: "two matches found, presence false",
name: "two matches found, presence false",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
labelPrioritizer := &NodeLabelPrioritizer{
label: test.label,
presence: test.presence,
}
list, err := priorityFunction(labelPrioritizer.CalculateNodeLabelPriorityMap, nil, nil)(nil, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
labelPrioritizer := &NodeLabelPrioritizer{
label: test.label,
presence: test.presence,
}
list, err := priorityFunction(labelPrioritizer.CalculateNodeLabelPriorityMap, nil, nil)(nil, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -24,7 +24,7 @@ import (
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// CalculateNodePreferAvoidPodsPriorityMap priorities nodes according to the node annotation

View File

@ -24,7 +24,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestNodePreferAvoidPriority(t *testing.T) {
@ -84,7 +84,7 @@ func TestNodePreferAvoidPriority(t *testing.T) {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: &v1.Pod{
@ -97,7 +97,7 @@ func TestNodePreferAvoidPriority(t *testing.T) {
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
name: "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
},
{
pod: &v1.Pod{
@ -110,7 +110,7 @@ func TestNodePreferAvoidPriority(t *testing.T) {
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "ownership by random controller should be ignored",
name: "ownership by random controller should be ignored",
},
{
pod: &v1.Pod{
@ -123,7 +123,7 @@ func TestNodePreferAvoidPriority(t *testing.T) {
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "owner without Controller field set should be ignored",
name: "owner without Controller field set should be ignored",
},
{
pod: &v1.Pod{
@ -136,21 +136,23 @@ func TestNodePreferAvoidPriority(t *testing.T) {
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
name: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -20,7 +20,7 @@ import (
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// NormalizeReduce generates a PriorityReduceFunction that can normalize the result

View File

@ -0,0 +1,141 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// FunctionShape represents shape of scoring function.
// For safety use NewFunctionShape which performs precondition checks for struct creation.
type FunctionShape []FunctionShapePoint
// FunctionShapePoint represents single point in scoring function shape.
type FunctionShapePoint struct {
// Utilization is function argument.
Utilization int64
// Score is function value.
Score int64
}
var (
// give priority to least utilized nodes by default
defaultFunctionShape, _ = NewFunctionShape([]FunctionShapePoint{{0, 10}, {100, 0}})
)
const (
minUtilization = 0
maxUtilization = 100
minScore = 0
maxScore = schedulerapi.MaxPriority
)
// NewFunctionShape creates instance of FunctionShape in a safe way performing all
// necessary sanity checks.
func NewFunctionShape(points []FunctionShapePoint) (FunctionShape, error) {
n := len(points)
if n == 0 {
return nil, fmt.Errorf("at least one point must be specified")
}
for i := 1; i < n; i++ {
if points[i-1].Utilization >= points[i].Utilization {
return nil, fmt.Errorf("utilization values must be sorted. Utilization[%d]==%d >= Utilization[%d]==%d", i-1, points[i-1].Utilization, i, points[i].Utilization)
}
}
for i, point := range points {
if point.Utilization < minUtilization {
return nil, fmt.Errorf("utilization values must not be less than %d. Utilization[%d]==%d", minUtilization, i, point.Utilization)
}
if point.Utilization > maxUtilization {
return nil, fmt.Errorf("utilization values must not be greater than %d. Utilization[%d]==%d", maxUtilization, i, point.Utilization)
}
if point.Score < minScore {
return nil, fmt.Errorf("score values must not be less than %d. Score[%d]==%d", minScore, i, point.Score)
}
if point.Score > maxScore {
return nil, fmt.Errorf("score valuses not be greater than %d. Score[%d]==%d", maxScore, i, point.Score)
}
}
// We make defensive copy so we make no assumption if array passed as argument is not changed afterwards
pointsCopy := make(FunctionShape, n)
copy(pointsCopy, points)
return pointsCopy, nil
}
// RequestedToCapacityRatioResourceAllocationPriorityDefault creates a requestedToCapacity based
// ResourceAllocationPriority using default resource scoring function shape.
// The default function assigns 1.0 to resource when all capacity is available
// and 0.0 when requested amount is equal to capacity.
func RequestedToCapacityRatioResourceAllocationPriorityDefault() *ResourceAllocationPriority {
return RequestedToCapacityRatioResourceAllocationPriority(defaultFunctionShape)
}
// RequestedToCapacityRatioResourceAllocationPriority creates a requestedToCapacity based
// ResourceAllocationPriority using provided resource scoring function shape.
func RequestedToCapacityRatioResourceAllocationPriority(scoringFunctionShape FunctionShape) *ResourceAllocationPriority {
return &ResourceAllocationPriority{"RequestedToCapacityRatioResourceAllocationPriority", buildRequestedToCapacityRatioScorerFunction(scoringFunctionShape)}
}
func buildRequestedToCapacityRatioScorerFunction(scoringFunctionShape FunctionShape) func(*schedulercache.Resource, *schedulercache.Resource, bool, int, int) int64 {
rawScoringFunction := buildBrokenLinearFunction(scoringFunctionShape)
resourceScoringFunction := func(requested, capacity int64) int64 {
if capacity == 0 || requested > capacity {
return rawScoringFunction(maxUtilization)
}
return rawScoringFunction(maxUtilization - (capacity-requested)*maxUtilization/capacity)
}
return func(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
cpuScore := resourceScoringFunction(requested.MilliCPU, allocable.MilliCPU)
memoryScore := resourceScoringFunction(requested.Memory, allocable.Memory)
return (cpuScore + memoryScore) / 2
}
}
// Creates a function which is built using linear segments. Segments are defined via shape array.
// Shape[i].Utilization slice represents points on "utilization" axis where different segments meet.
// Shape[i].Score represents function values at meeting points.
//
// function f(p) is defined as:
// shape[0].Score for p < f[0].Utilization
// shape[i].Score for p == shape[i].Utilization
// shape[n-1].Score for p > shape[n-1].Utilization
// and linear between points (p < shape[i].Utilization)
func buildBrokenLinearFunction(shape FunctionShape) func(int64) int64 {
n := len(shape)
return func(p int64) int64 {
for i := 0; i < n; i++ {
if p <= shape[i].Utilization {
if i == 0 {
return shape[0].Score
}
return shape[i-1].Score + (shape[i].Score-shape[i-1].Score)*(p-shape[i-1].Utilization)/(shape[i].Utilization-shape[i-1].Utilization)
}
}
return shape[n-1].Score
}
}

View File

@ -0,0 +1,241 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestCreatingFunctionShapeErrorsIfEmptyPoints(t *testing.T) {
var err error
_, err = NewFunctionShape([]FunctionShapePoint{})
assert.Equal(t, "at least one point must be specified", err.Error())
}
func TestCreatingFunctionShapeErrorsIfXIsNotSorted(t *testing.T) {
var err error
_, err = NewFunctionShape([]FunctionShapePoint{{10, 1}, {15, 2}, {20, 3}, {19, 4}, {25, 5}})
assert.Equal(t, "utilization values must be sorted. Utilization[2]==20 >= Utilization[3]==19", err.Error())
_, err = NewFunctionShape([]FunctionShapePoint{{10, 1}, {20, 2}, {20, 3}, {22, 4}, {25, 5}})
assert.Equal(t, "utilization values must be sorted. Utilization[1]==20 >= Utilization[2]==20", err.Error())
}
func TestCreatingFunctionPointNotInAllowedRange(t *testing.T) {
var err error
_, err = NewFunctionShape([]FunctionShapePoint{{-1, 0}, {100, 10}})
assert.Equal(t, "utilization values must not be less than 0. Utilization[0]==-1", err.Error())
_, err = NewFunctionShape([]FunctionShapePoint{{0, 0}, {101, 10}})
assert.Equal(t, "utilization values must not be greater than 100. Utilization[1]==101", err.Error())
_, err = NewFunctionShape([]FunctionShapePoint{{0, -1}, {100, 10}})
assert.Equal(t, "score values must not be less than 0. Score[0]==-1", err.Error())
_, err = NewFunctionShape([]FunctionShapePoint{{0, 0}, {100, 11}})
assert.Equal(t, "score valuses not be greater than 10. Score[1]==11", err.Error())
}
func TestBrokenLinearFunction(t *testing.T) {
type Assertion struct {
p int64
expected int64
}
type Test struct {
points []FunctionShapePoint
assertions []Assertion
}
tests := []Test{
{
points: []FunctionShapePoint{{10, 1}, {90, 9}},
assertions: []Assertion{
{p: -10, expected: 1},
{p: 0, expected: 1},
{p: 9, expected: 1},
{p: 10, expected: 1},
{p: 15, expected: 1},
{p: 19, expected: 1},
{p: 20, expected: 2},
{p: 89, expected: 8},
{p: 90, expected: 9},
{p: 99, expected: 9},
{p: 100, expected: 9},
{p: 110, expected: 9},
},
},
{
points: []FunctionShapePoint{{0, 2}, {40, 10}, {100, 0}},
assertions: []Assertion{
{p: -10, expected: 2},
{p: 0, expected: 2},
{p: 20, expected: 6},
{p: 30, expected: 8},
{p: 40, expected: 10},
{p: 70, expected: 5},
{p: 100, expected: 0},
{p: 110, expected: 0},
},
},
{
points: []FunctionShapePoint{{0, 2}, {40, 2}, {100, 2}},
assertions: []Assertion{
{p: -10, expected: 2},
{p: 0, expected: 2},
{p: 20, expected: 2},
{p: 30, expected: 2},
{p: 40, expected: 2},
{p: 70, expected: 2},
{p: 100, expected: 2},
{p: 110, expected: 2},
},
},
}
for _, test := range tests {
functionShape, err := NewFunctionShape(test.points)
assert.Nil(t, err)
function := buildBrokenLinearFunction(functionShape)
for _, assertion := range test.assertions {
assert.InDelta(t, assertion.expected, function(assertion.p), 0.1, "points=%v, p=%f", test.points, assertion.p)
}
}
}
func TestRequestedToCapacityRatio(t *testing.T) {
type resources struct {
cpu int64
mem int64
}
type nodeResources struct {
capacity resources
used resources
}
type test struct {
test string
requested resources
nodes map[string]nodeResources
expectedPriorities schedulerapi.HostPriorityList
}
tests := []test{
{
test: "nothing scheduled, nothing requested (default - least requested nodes have priority)",
requested: resources{0, 0},
nodes: map[string]nodeResources{
"node1": {
capacity: resources{4000, 10000},
used: resources{0, 0},
},
"node2": {
capacity: resources{4000, 10000},
used: resources{0, 0},
},
},
expectedPriorities: []schedulerapi.HostPriority{{Host: "node1", Score: 10}, {Host: "node2", Score: 10}},
},
{
test: "nothing scheduled, resources requested, differently sized machines (default - least requested nodes have priority)",
requested: resources{3000, 5000},
nodes: map[string]nodeResources{
"node1": {
capacity: resources{4000, 10000},
used: resources{0, 0},
},
"node2": {
capacity: resources{6000, 10000},
used: resources{0, 0},
},
},
expectedPriorities: []schedulerapi.HostPriority{{Host: "node1", Score: 4}, {Host: "node2", Score: 5}},
},
{
test: "no resources requested, pods scheduled with resources (default - least requested nodes have priority)",
requested: resources{0, 0},
nodes: map[string]nodeResources{
"node1": {
capacity: resources{4000, 10000},
used: resources{3000, 5000},
},
"node2": {
capacity: resources{6000, 10000},
used: resources{3000, 5000},
},
},
expectedPriorities: []schedulerapi.HostPriority{{Host: "node1", Score: 4}, {Host: "node2", Score: 5}},
},
}
buildResourcesPod := func(node string, requestedResources resources) *v1.Pod {
return &v1.Pod{Spec: v1.PodSpec{
NodeName: node,
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(requestedResources.cpu, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(requestedResources.mem, resource.DecimalSI),
},
},
},
},
},
}
}
for _, test := range tests {
nodeNames := make([]string, 0)
for nodeName := range test.nodes {
nodeNames = append(nodeNames, nodeName)
}
sort.Strings(nodeNames)
nodes := make([]*v1.Node, 0)
for _, nodeName := range nodeNames {
node := test.nodes[nodeName]
nodes = append(nodes, makeNode(nodeName, node.capacity.cpu, node.capacity.mem))
}
scheduledPods := make([]*v1.Pod, 0)
for name, node := range test.nodes {
scheduledPods = append(scheduledPods,
buildResourcesPod(name, node.used))
}
newPod := buildResourcesPod("", test.requested)
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(scheduledPods, nodes)
list, err := priorityFunction(RequestedToCapacityRatioResourceAllocationPriorityDefault().PriorityMap, nil, nil)(newPod, nodeNameToInfo, nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedPriorities, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedPriorities, list)
}
}
}

View File

@ -23,13 +23,13 @@ import (
"k8s.io/api/core/v1"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// ResourceAllocationPriority contains information to calculate resource allocation priority.
type ResourceAllocationPriority struct {
Name string
scorer func(requested, allocable *schedulercache.Resource) int64
scorer func(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64
}
// PriorityMap priorities nodes according to the resource allocations on the node.
@ -54,8 +54,13 @@ func (r *ResourceAllocationPriority) PriorityMap(
requested.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
requested.Memory += nodeInfo.NonZeroRequest().Memory
score := r.scorer(&requested, &allocatable)
var score int64
// Check if the pod has volumes and this could be added to scorer function for balanced resource allocation.
if len(pod.Spec.Volumes) >= 0 && nodeInfo.TransientInfo != nil {
score = r.scorer(&requested, &allocatable, true, nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount)
} else {
score = r.scorer(&requested, &allocatable, false, 0, 0)
}
if glog.V(10) {
glog.Infof(

View File

@ -20,9 +20,8 @@ import (
"fmt"
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"github.com/golang/glog"
)
@ -93,35 +92,7 @@ func getResourceLimits(pod *v1.Pod) *schedulercache.Resource {
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
for rName, rQuantity := range container.Resources.Limits {
switch rName {
case v1.ResourceMemory:
if mem := rQuantity.Value(); mem > result.Memory {
result.Memory = mem
}
case v1.ResourceCPU:
if cpu := rQuantity.MilliValue(); cpu > result.MilliCPU {
result.MilliCPU = cpu
}
// keeping these resources though score computation in other priority functions and in this
// are only computed based on cpu and memory only.
case v1.ResourceEphemeralStorage:
if ephemeralStorage := rQuantity.Value(); ephemeralStorage > result.EphemeralStorage {
result.EphemeralStorage = ephemeralStorage
}
case v1.ResourceNvidiaGPU:
if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
result.NvidiaGPU = gpu
}
default:
if v1helper.IsScalarResourceName(rName) {
value := rQuantity.Value()
if value > result.ScalarResources[rName] {
result.SetScalar(rName, value)
}
}
}
}
result.SetMaxResource(container.Resources.Limits)
}
return result

View File

@ -24,7 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
//metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestResourceLimistPriority(t *testing.T) {
@ -103,49 +103,50 @@ func TestResourceLimistPriority(t *testing.T) {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 0), makeNode("machine3", 0, 10000), makeNode("machine4", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}, {Host: "machine4", Score: 0}},
test: "pod does not specify its resource limits",
name: "pod does not specify its resource limits",
},
{
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 3000, 10000), makeNode("machine2", 2000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 0}},
test: "pod only specifies cpu limits",
name: "pod only specifies cpu limits",
},
{
pod: &v1.Pod{Spec: memOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 1}},
test: "pod only specifies mem limits",
name: "pod only specifies mem limits",
},
{
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 1}},
test: "pod specifies both cpu and mem limits",
name: "pod specifies both cpu and mem limits",
},
{
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}},
test: "node does not advertise its allocatables",
name: "node does not advertise its allocatables",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(ResourceLimitsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(ResourceLimitsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -23,7 +23,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
utilnode "k8s.io/kubernetes/pkg/util/node"
"github.com/golang/glog"
@ -211,7 +211,9 @@ func filteredPod(namespace string, selector labels.Selector, nodeInfo *scheduler
return []*v1.Pod{}
}
for _, pod := range nodeInfo.Pods() {
if namespace == pod.Namespace && selector.Matches(labels.Set(pod.Labels)) {
// Ignore pods being deleted for spreading purposes
// Similar to how it is done for SelectorSpreadPriority
if namespace == pod.Namespace && pod.DeletionTimestamp == nil && selector.Matches(labels.Set(pod.Labels)) {
pods = append(pods, pod)
}
}

View File

@ -27,7 +27,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
@ -64,20 +64,20 @@ func TestSelectorSpreadPriority(t *testing.T) {
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: new(v1.Pod),
nodes: []string{"machine1", "machine2"},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled",
name: "nothing scheduled",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{{Spec: zone1Spec}},
nodes: []string{"machine1", "machine2"},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no services",
name: "no services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -85,7 +85,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"key": "value"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "different services",
name: "different services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -96,7 +96,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "two pods, one service pod",
name: "two pods, one service pod",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -110,7 +110,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "five pods, one service pod in no namespace",
name: "five pods, one service pod in no namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
@ -123,7 +123,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}, ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "four pods, one service pod in default namespace",
name: "four pods, one service pod in default namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
@ -137,7 +137,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}, ObjectMeta: metav1.ObjectMeta{Namespace: "ns1"}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "five pods, one service pod in specific namespace",
name: "five pods, one service pod in specific namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -149,7 +149,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "three pods, two service pods on different machines",
name: "three pods, two service pods on different machines",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -162,7 +162,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 0}},
test: "four pods, three service pods",
name: "four pods, three service pods",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -174,7 +174,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches",
name: "service with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
@ -189,7 +189,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
// "baz=blah" matches both labels1 and labels2, and "foo=bar" matches only labels 1. This means that we assume that we want to
// do spreading between all pods. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replication controller",
name: "service with partial pod label matches with service and replication controller",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
@ -203,7 +203,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replica set",
name: "service with partial pod label matches with service and replica set",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
@ -216,7 +216,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replica set",
name: "service with partial pod label matches with service and replica set",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
@ -230,7 +230,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
// Taken together Service and Replication Controller should match all Pods, hence result should be equal to one above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replication controller should be treated equally",
name: "disjoined service and replication controller should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
@ -244,7 +244,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replica set should be treated equally",
name: "disjoined service and replica set should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
@ -257,7 +257,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replica set should be treated equally",
name: "disjoined service and replica set should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
@ -270,7 +270,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
// Both Nodes have one pod from the given RC, hence both get 0 score.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "Replication controller with partial pod label matches",
name: "Replication controller with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
@ -283,7 +283,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "Replica set with partial pod label matches",
name: "Replica set with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
@ -296,7 +296,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use StatefulSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "StatefulSet with partial pod label matches",
name: "StatefulSet with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
@ -308,7 +308,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another replication controller with partial pod label matches",
name: "Another replication controller with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
@ -321,7 +321,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another replication set with partial pod label matches",
name: "Another replication set with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
@ -334,34 +334,36 @@ func TestSelectorSpreadPriority(t *testing.T) {
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
// We use StatefulSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another stateful set with partial pod label matches",
name: "Another stateful set with partial pod label matches",
},
}
for i, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeNodeList(test.nodes))
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeNodeList(test.nodes))
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v index : %d\n", err, i)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v \n", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}
@ -407,13 +409,12 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []string
rcs []*v1.ReplicationController
rss []*extensions.ReplicaSet
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: new(v1.Pod),
@ -425,7 +426,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "nothing scheduled",
name: "nothing scheduled",
},
{
pod: buildPod("", labels1, nil),
@ -438,7 +439,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "no services",
name: "no services",
},
{
pod: buildPod("", labels1, nil),
@ -452,7 +453,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "different services",
name: "different services",
},
{
pod: buildPod("", labels1, nil),
@ -469,7 +470,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "two pods, 0 matching",
name: "two pods, 0 matching",
},
{
pod: buildPod("", labels1, nil),
@ -486,7 +487,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "two pods, 1 matching (in z2)",
name: "two pods, 1 matching (in z2)",
},
{
pod: buildPod("", labels1, nil),
@ -506,7 +507,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: 3}, // Pod on node
{Host: nodeMachine3Zone3, Score: 6}, // Pod in zone
},
test: "five pods, 3 matching (z2=2, z3=1)",
name: "five pods, 3 matching (z2=2, z3=1)",
},
{
pod: buildPod("", labels1, nil),
@ -525,7 +526,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "four pods, 3 matching (z1=1, z2=1, z3=1)",
name: "four pods, 3 matching (z1=1, z2=1, z3=1)",
},
{
pod: buildPod("", labels1, nil),
@ -544,7 +545,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "four pods, 3 matching (z1=1, z2=1, z3=1)",
name: "four pods, 3 matching (z1=1, z2=1, z3=1)",
},
{
pod: buildPod("", labels1, controllerRef("ReplicationController", "name", "abc123")),
@ -569,36 +570,38 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "Replication controller spreading (z1=0, z2=1, z3=2)",
name: "Replication controller spreading (z1=0, z2=1, z3=2)",
},
}
for i, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(labeledNodes))
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(labeledNodes))
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
if err != nil {
t.Errorf("unexpected error: %v index : %d", err, i)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}
@ -640,7 +643,7 @@ func TestZoneSpreadPriority(t *testing.T) {
nodes map[string]map[string]string
services []*v1.Service
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: new(v1.Pod),
@ -648,7 +651,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "nothing scheduled",
name: "nothing scheduled",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -657,7 +660,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "no services",
name: "no services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -667,7 +670,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "different services",
name: "different services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -681,7 +684,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: 0}, {Host: "machine22", Score: 0},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three pods, one service pod",
name: "three pods, one service pod",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -695,7 +698,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 5}, {Host: "machine12", Score: 5},
{Host: "machine21", Score: 5}, {Host: "machine22", Score: 5},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three pods, two service pods on different machines",
name: "three pods, two service pods on different machines",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
@ -710,7 +713,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 0}, {Host: "machine12", Score: 0},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three service label match pods in different namespaces",
name: "three service label match pods in different namespaces",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -725,7 +728,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 6}, {Host: "machine12", Score: 6},
{Host: "machine21", Score: 3}, {Host: "machine22", Score: 3},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "four pods, three service pods",
name: "four pods, three service pods",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -739,7 +742,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 3}, {Host: "machine12", Score: 3},
{Host: "machine21", Score: 6}, {Host: "machine22", Score: 6},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "service with partial pod label matches",
name: "service with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -754,7 +757,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 7}, {Host: "machine12", Score: 7},
{Host: "machine21", Score: 5}, {Host: "machine22", Score: 5},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "service pod on non-zoned node",
name: "service pod on non-zoned node",
},
}
// these local variables just make sure controllerLister\replicaSetLister\statefulSetLister not nil
@ -763,28 +766,30 @@ func TestZoneSpreadPriority(t *testing.T) {
rcs := []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}}
rss := []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}}
for i, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(test.nodes))
zoneSpread := ServiceAntiAffinity{podLister: schedulertesting.FakePodLister(test.pods), serviceLister: schedulertesting.FakeServiceLister(test.services), label: "zone"}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(test.nodes))
zoneSpread := ServiceAntiAffinity{podLister: schedulertesting.FakePodLister(test.pods), serviceLister: schedulertesting.FakeServiceLister(test.services), label: "zone"}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(rcs),
schedulertesting.FakeReplicaSetLister(rss),
schedulertesting.FakeStatefulSetLister(sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(zoneSpread.CalculateAntiAffinityPriorityMap, zoneSpread.CalculateAntiAffinityPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v index : %d", err, i)
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(rcs),
schedulertesting.FakeReplicaSetLister(rss),
schedulertesting.FakeStatefulSetLister(sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(zoneSpread.CalculateAntiAffinityPriorityMap, zoneSpread.CalculateAntiAffinityPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("test index %d (%s): expected %#v, got %#v", i, test.test, test.expectedList, list)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -22,7 +22,7 @@ import (
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// CountIntolerableTaintsPreferNoSchedule gives the count of intolerable taints of a pod with effect PreferNoSchedule

View File

@ -23,7 +23,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func nodeWithTaints(nodeName string, taints []v1.Taint) *v1.Node {
@ -54,11 +54,11 @@ func TestTaintAndToleration(t *testing.T) {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
// basic test case
{
test: "node with taints tolerated by the pod, gets a higher score than those node with intolerable taints",
name: "node with taints tolerated by the pod, gets a higher score than those node with intolerable taints",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
@ -84,7 +84,7 @@ func TestTaintAndToleration(t *testing.T) {
},
// the count of taints that are tolerated by pod, does not matter.
{
test: "the nodes that all of their taints are tolerated by the pod, get the same score, no matter how many tolerable taints a node has",
name: "the nodes that all of their taints are tolerated by the pod, get the same score, no matter how many tolerable taints a node has",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
@ -127,7 +127,7 @@ func TestTaintAndToleration(t *testing.T) {
},
// the count of taints on a node that are not tolerated by pod, matters.
{
test: "the more intolerable taints a node has, the lower score it gets.",
name: "the more intolerable taints a node has, the lower score it gets.",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
@ -163,7 +163,7 @@ func TestTaintAndToleration(t *testing.T) {
},
// taints-tolerations priority only takes care about the taints and tolerations that have effect PreferNoSchedule
{
test: "only taints and tolerations that have effect PreferNoSchedule are checked by taints-tolerations priority function",
name: "only taints and tolerations that have effect PreferNoSchedule are checked by taints-tolerations priority function",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
@ -205,7 +205,7 @@ func TestTaintAndToleration(t *testing.T) {
},
},
{
test: "Default behaviour No taints and tolerations, lands on node with no taints",
name: "Default behaviour No taints and tolerations, lands on node with no taints",
//pod without tolerations
pod: podWithTolerations([]v1.Toleration{}),
nodes: []*v1.Node{
@ -226,16 +226,17 @@ func TestTaintAndToleration(t *testing.T) {
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce, nil)
list, err := ttp(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("%s, unexpected error: %v", test.test, err)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce, nil)
list, err := ttp(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s,\nexpected:\n\t%+v,\ngot:\n\t%+v", test.test, test.expectedList, list)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected:\n\t%+v,\ngot:\n\t%+v", test.expectedList, list)
}
})
}
}

View File

@ -22,7 +22,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func makeNode(node string, milliCPU, memory int64) *v1.Node {

View File

@ -32,9 +32,8 @@ const DefaultMilliCPURequest int64 = 100 // 0.1 core
// DefaultMemoryRequest defines default memory request size.
const DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
// GetNonzeroRequests returns the default resource request if none is found or what is provided on the request
// TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity v1.ResourceList"
// as an additional argument here) rather than using constants
// GetNonzeroRequests returns the default resource request if none is found or
// what is provided on the request.
func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
var outMilliCPU, outMemory int64
// Override if un-set, but not if explicitly set to zero

View File

@ -19,7 +19,7 @@ package algorithm
import (
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// SchedulerExtender is an interface for external processes to influence scheduling
@ -29,7 +29,9 @@ type SchedulerExtender interface {
// Filter based on extender-implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
Filter(pod *v1.Pod,
nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo,
) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
// Prioritize based on extender-implemented priority functions. The returned scores & weight
// are used to compute the weighted score for an extender. The weighted scores are added to
@ -45,6 +47,27 @@ type SchedulerExtender interface {
// IsInterested returns true if at least one extended resource requested by
// this pod is managed by this extender.
IsInterested(pod *v1.Pod) bool
// ProcessPreemption returns nodes with their victim pods processed by extender based on
// given:
// 1. Pod to schedule
// 2. Candidate nodes and victim pods (nodeToVictims) generated by previous scheduling process.
// 3. nodeNameToInfo to restore v1.Node from node name if extender cache is enabled.
// The possible changes made by extender may include:
// 1. Subset of given candidate nodes after preemption phase of extender.
// 2. A different set of victim pod for every given candidate node after preemption phase of extender.
ProcessPreemption(
pod *v1.Pod,
nodeToVictims map[*v1.Node]*schedulerapi.Victims,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
) (map[*v1.Node]*schedulerapi.Victims, error)
// SupportsPreemption returns if the scheduler extender support preemption or not.
SupportsPreemption() bool
// IsIgnorable returns true indicates scheduling should not fail when this extender
// is unavailable. This gives scheduler ability to fail fast and tolerate non-critical extenders as well.
IsIgnorable() bool
}
// ScheduleAlgorithm is an interface implemented by things that know how to schedule pods

View File

@ -22,9 +22,15 @@ import (
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/labels"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// NodeFieldSelectorKeys is a map that: the key are node field selector keys; the values are
// the functions to get the value of the node field.
var NodeFieldSelectorKeys = map[string]func(*v1.Node) string{
NodeFieldSelectorKeyNodeName: func(n *v1.Node) string { return n.Name },
}
// FitPredicate is a function that indicates if a pod fits into an existing node.
// The failure information is given by the error.
type FitPredicate func(pod *v1.Pod, meta PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)
@ -78,9 +84,6 @@ type PredicateFailureReason interface {
GetReason() string
}
// GetEquivalencePodFunc is a function that gets a EquivalencePod from a pod.
type GetEquivalencePodFunc func(pod *v1.Pod) interface{}
// NodeLister interface represents anything that can list nodes for a scheduler.
type NodeLister interface {
// We explicitly return []*v1.Node, instead of v1.NodeList, to avoid

View File

@ -21,7 +21,7 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// EmptyPriorityMetadataProducer should returns a no-op PriorityMetadataProducer type.

View File

@ -16,6 +16,10 @@ limitations under the License.
package algorithm
import (
api "k8s.io/kubernetes/pkg/apis/core"
)
const (
// TaintNodeNotReady will be added when node is not ready
// and feature-gate for TaintBasedEvictions flag is enabled,
@ -36,6 +40,11 @@ const (
// It is deprecated since 1.9
DeprecatedTaintNodeUnreachable = "node.alpha.kubernetes.io/unreachable"
// TaintNodeUnschedulable will be added when node becomes unschedulable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node becomes scheduable.
TaintNodeUnschedulable = "node.kubernetes.io/unschedulable"
// TaintNodeOutOfDisk will be added when node becomes out of disk
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
@ -66,4 +75,11 @@ const (
// from the cloud-controller-manager intitializes this node, and then removes
// the taint
TaintExternalCloudProvider = "node.cloudprovider.kubernetes.io/uninitialized"
// TaintNodeShutdown when node is shutdown in external cloud provider
TaintNodeShutdown = "node.cloudprovider.kubernetes.io/shutdown"
// NodeFieldSelectorKeyNodeName ('metadata.name') uses this as node field selector key
// when selecting node by node's name.
NodeFieldSelectorKeyNodeName = api.ObjectNameField
)

View File

@ -31,7 +31,6 @@ go_test(
],
embed = [":go_default_library"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/apis/core/install:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/api:go_default_library",
@ -39,6 +38,7 @@ go_test(
"//pkg/scheduler/factory:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",

View File

@ -24,12 +24,12 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
utiltesting "k8s.io/client-go/util/testing"
"k8s.io/kubernetes/pkg/api/legacyscheme"
_ "k8s.io/kubernetes/pkg/apis/core/install"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
latestschedulerapi "k8s.io/kubernetes/pkg/scheduler/api/latest"
@ -349,7 +349,17 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
]
],"extenders": [{
"urlPrefix": "/prefix",
"filterVerb": "filter",
"prioritizeVerb": "prioritize",
"weight": 1,
"BindVerb": "bind",
"enableHttps": true,
"tlsConfig": {"Insecure":true},
"httpTimeout": 1,
"nodeCacheCapable": true
}]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
@ -382,6 +392,17 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
ExtenderConfigs: []schedulerapi.ExtenderConfig{{
URLPrefix: "/prefix",
FilterVerb: "filter",
PrioritizeVerb: "prioritize",
Weight: 1,
BindVerb: "bind", // 1.7 was missing json tags on the BindVerb field and required "BindVerb"
EnableHTTPS: true,
TLSConfig: &restclient.TLSClientConfig{Insecure: true},
HTTPTimeout: 1,
NodeCacheCapable: true,
}},
},
},
// Do not change this JSON after the corresponding release has been tagged.
@ -419,7 +440,17 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
]
],"extenders": [{
"urlPrefix": "/prefix",
"filterVerb": "filter",
"prioritizeVerb": "prioritize",
"weight": 1,
"bindVerb": "bind",
"enableHttps": true,
"tlsConfig": {"Insecure":true},
"httpTimeout": 1,
"nodeCacheCapable": true
}]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
@ -453,6 +484,17 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
ExtenderConfigs: []schedulerapi.ExtenderConfig{{
URLPrefix: "/prefix",
FilterVerb: "filter",
PrioritizeVerb: "prioritize",
Weight: 1,
BindVerb: "bind", // 1.8 became case-insensitive and tolerated "bindVerb"
EnableHTTPS: true,
TLSConfig: &restclient.TLSClientConfig{Insecure: true},
HTTPTimeout: 1,
NodeCacheCapable: true,
}},
},
},
// Do not change this JSON after the corresponding release has been tagged.
@ -480,7 +522,6 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{"name": "CheckVolumeBinding"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
@ -492,7 +533,17 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
]
],"extenders": [{
"urlPrefix": "/prefix",
"filterVerb": "filter",
"prioritizeVerb": "prioritize",
"weight": 1,
"bindVerb": "bind",
"enableHttps": true,
"tlsConfig": {"Insecure":true},
"httpTimeout": 1,
"nodeCacheCapable": true
}]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
@ -527,6 +578,240 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
ExtenderConfigs: []schedulerapi.ExtenderConfig{{
URLPrefix: "/prefix",
FilterVerb: "filter",
PrioritizeVerb: "prioritize",
Weight: 1,
BindVerb: "bind", // 1.9 was case-insensitive and tolerated "bindVerb"
EnableHTTPS: true,
TLSConfig: &restclient.TLSClientConfig{Insecure: true},
HTTPTimeout: 1,
NodeCacheCapable: true,
}},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.10": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "CheckNodeDiskPressure"},
{"name": "CheckNodePIDPressure"},
{"name": "CheckNodeCondition"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "CheckVolumeBinding"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
],"extenders": [{
"urlPrefix": "/prefix",
"filterVerb": "filter",
"prioritizeVerb": "prioritize",
"weight": 1,
"bindVerb": "bind",
"enableHttps": true,
"tlsConfig": {"Insecure":true},
"httpTimeout": 1,
"nodeCacheCapable": true,
"managedResources": [{"name":"example.com/foo","ignoredByScheduler":true}],
"ignorable":true
}]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "CheckNodeDiskPressure"},
{Name: "CheckNodePIDPressure"},
{Name: "CheckNodeCondition"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "CheckVolumeBinding"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
ExtenderConfigs: []schedulerapi.ExtenderConfig{{
URLPrefix: "/prefix",
FilterVerb: "filter",
PrioritizeVerb: "prioritize",
Weight: 1,
BindVerb: "bind", // 1.10 was case-insensitive and tolerated "bindVerb"
EnableHTTPS: true,
TLSConfig: &restclient.TLSClientConfig{Insecure: true},
HTTPTimeout: 1,
NodeCacheCapable: true,
ManagedResources: []schedulerapi.ExtenderManagedResource{{Name: v1.ResourceName("example.com/foo"), IgnoredByScheduler: true}},
Ignorable: true,
}},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.11": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "CheckNodeDiskPressure"},
{"name": "CheckNodePIDPressure"},
{"name": "CheckNodeCondition"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "CheckVolumeBinding"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2},
{
"name": "RequestedToCapacityRatioPriority",
"weight": 2,
"argument": {
"requestedToCapacityRatioArguments": {
"shape": [
{"utilization": 0, "score": 0},
{"utilization": 50, "score": 7}
]
}
}}
],"extenders": [{
"urlPrefix": "/prefix",
"filterVerb": "filter",
"prioritizeVerb": "prioritize",
"weight": 1,
"bindVerb": "bind",
"enableHttps": true,
"tlsConfig": {"Insecure":true},
"httpTimeout": 1,
"nodeCacheCapable": true,
"managedResources": [{"name":"example.com/foo","ignoredByScheduler":true}],
"ignorable":true
}]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "CheckNodeDiskPressure"},
{Name: "CheckNodePIDPressure"},
{Name: "CheckNodeCondition"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "CheckVolumeBinding"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
{
Name: "RequestedToCapacityRatioPriority",
Weight: 2,
Argument: &schedulerapi.PriorityArgument{
RequestedToCapacityRatioArguments: &schedulerapi.RequestedToCapacityRatioArguments{
UtilizationShape: []schedulerapi.UtilizationShapePoint{
{Utilization: 0, Score: 0},
{Utilization: 50, Score: 7},
}},
},
},
},
ExtenderConfigs: []schedulerapi.ExtenderConfig{{
URLPrefix: "/prefix",
FilterVerb: "filter",
PrioritizeVerb: "prioritize",
Weight: 1,
BindVerb: "bind", // 1.11 restored case-sensitivity, but allowed either "BindVerb" or "bindVerb"
EnableHTTPS: true,
TLSConfig: &restclient.TLSClientConfig{Insecure: true},
HTTPTimeout: 1,
NodeCacheCapable: true,
ManagedResources: []schedulerapi.ExtenderManagedResource{{Name: v1.ResourceName("example.com/foo"), IgnoredByScheduler: true}},
Ignorable: true,
}},
},
},
}
@ -561,7 +846,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
informerFactory := informers.NewSharedInformerFactory(client, 0)
if _, err := factory.NewConfigFactory(
@ -579,6 +864,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
informerFactory.Storage().V1().StorageClasses(),
v1.DefaultHardPodAffinitySymmetricWeight,
enableEquivalenceCache,
false,
).CreateFromConfig(policy); err != nil {
t.Errorf("%s: Error constructing: %v", v, err)
continue

View File

@ -17,6 +17,8 @@ limitations under the License.
package defaults
import (
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -26,8 +28,6 @@ import (
"k8s.io/kubernetes/pkg/scheduler/algorithm/priorities"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/factory"
"github.com/golang/glog"
)
const (
@ -56,7 +56,7 @@ func init() {
// For example:
// https://github.com/kubernetes/kubernetes/blob/36a218e/plugin/pkg/scheduler/factory/factory.go#L422
// Registers predicates and priorities that are not enabled by default, but user can pick when creating his
// Registers predicates and priorities that are not enabled by default, but user can pick when creating their
// own set of priorities/predicates.
// PodFitsPorts has been replaced by PodFitsHostPorts for better user understanding.
@ -77,13 +77,6 @@ func init() {
// Fit is determined by node selector query.
factory.RegisterFitPredicate(predicates.MatchNodeSelectorPred, predicates.PodMatchNodeSelector)
// Use equivalence class to speed up heavy predicates phase.
factory.RegisterGetEquivalencePodFunction(
func(args factory.PluginFactoryArgs) algorithm.GetEquivalencePodFunc {
return predicates.NewEquivalencePodGenerator(args.PVCInfo)
},
)
// ServiceSpreadingPriority is a priority config factory that spreads pods by minimizing
// the number of pods (belonging to the same service) on the same node.
// Register the factory so that it's available, but do not include it as part of the default priorities
@ -107,6 +100,11 @@ func init() {
factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1)
// Optional, cluster-autoscaler friendly priority function - give used nodes higher priority.
factory.RegisterPriorityFunction2("MostRequestedPriority", priorities.MostRequestedPriorityMap, nil, 1)
factory.RegisterPriorityFunction2(
"RequestedToCapacityRatioPriority",
priorities.RequestedToCapacityRatioResourceAllocationPriorityDefault().PriorityMap,
nil,
1)
}
func defaultPredicates() sets.String {
@ -160,6 +158,9 @@ func defaultPredicates() sets.String {
// Fit is determined by node disk pressure condition.
factory.RegisterFitPredicate(predicates.CheckNodeDiskPressurePred, predicates.CheckNodeDiskPressurePredicate),
// Fit is determined by node pid pressure condition.
factory.RegisterFitPredicate(predicates.CheckNodePIDPressurePred, predicates.CheckNodePIDPressurePredicate),
// Fit is determined by node conditions: not ready, network unavailable or out of disk.
factory.RegisterMandatoryFitPredicate(predicates.CheckNodeConditionPred, predicates.CheckNodeConditionPredicate),
@ -179,16 +180,24 @@ func defaultPredicates() sets.String {
// ApplyFeatureGates applies algorithm by feature gates.
func ApplyFeatureGates() {
if utilfeature.DefaultFeatureGate.Enabled(features.TaintNodesByCondition) {
// Remove "CheckNodeCondition" predicate
// Remove "CheckNodeCondition", "CheckNodeMemoryPressure", "CheckNodePIDPressurePred"
// and "CheckNodeDiskPressure" predicates
factory.RemoveFitPredicate(predicates.CheckNodeConditionPred)
// Remove Key "CheckNodeCondition" From All Algorithm Provider
factory.RemoveFitPredicate(predicates.CheckNodeMemoryPressurePred)
factory.RemoveFitPredicate(predicates.CheckNodeDiskPressurePred)
factory.RemoveFitPredicate(predicates.CheckNodePIDPressurePred)
// Remove key "CheckNodeCondition", "CheckNodeMemoryPressure" and "CheckNodeDiskPressure"
// from ALL algorithm provider
// The key will be removed from all providers which in algorithmProviderMap[]
// if you just want remove specific provider, call func RemovePredicateKeyFromAlgoProvider()
factory.RemovePredicateKeyFromAlgorithmProviderMap(predicates.CheckNodeConditionPred)
factory.RemovePredicateKeyFromAlgorithmProviderMap(predicates.CheckNodeMemoryPressurePred)
factory.RemovePredicateKeyFromAlgorithmProviderMap(predicates.CheckNodeDiskPressurePred)
factory.RemovePredicateKeyFromAlgorithmProviderMap(predicates.CheckNodePIDPressurePred)
// Fit is determined based on whether a pod can tolerate all of the node's taints
factory.RegisterMandatoryFitPredicate(predicates.PodToleratesNodeTaintsPred, predicates.PodToleratesNodeTaints)
// Insert Key "PodToleratesNodeTaints" To All Algorithm Provider
// Insert Key "PodToleratesNodeTaints" and "CheckNodeUnschedulable" To All Algorithm Provider
// The key will insert to all providers which in algorithmProviderMap[]
// if you just want insert to specific provider, call func InsertPredicateKeyToAlgoProvider()
factory.InsertPredicateKeyToAlgorithmProviderMap(predicates.PodToleratesNodeTaintsPred)

View File

@ -76,6 +76,7 @@ func TestDefaultPredicates(t *testing.T) {
"GeneralPredicates",
"CheckNodeMemoryPressure",
"CheckNodeDiskPressure",
"CheckNodePIDPressure",
"CheckNodeCondition",
"PodToleratesNodeTaints",
predicates.CheckVolumeBindingPred,

View File

@ -36,14 +36,6 @@ const (
MaxPriority = 10
// MaxWeight defines the max weight value.
MaxWeight = MaxInt / MaxPriority
// HighestUserDefinablePriority is the highest priority for user defined priority classes. Priority values larger than 1 billion are reserved for Kubernetes system use.
HighestUserDefinablePriority = int32(1000000000)
// SystemCriticalPriority is the beginning of the range of priority values for critical system components.
SystemCriticalPriority = 2 * HighestUserDefinablePriority
// NOTE: In order to avoid conflict of names with user-defined priority classes, all the names must
// start with scheduling.SystemPriorityClassPrefix which is by default "system-".
SystemClusterCritical = "system-cluster-critical"
SystemNodeCritical = "system-node-critical"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@ -117,6 +109,8 @@ type PriorityArgument struct {
// The priority function that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelPreference *LabelPreference
// The RequestedToCapacityRatio priority function is parametrized with function shape.
RequestedToCapacityRatioArguments *RequestedToCapacityRatioArguments
}
// ServiceAffinity holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
@ -151,6 +145,20 @@ type LabelPreference struct {
Presence bool
}
// RequestedToCapacityRatioArguments holds arguments specific to RequestedToCapacityRatio priority function
type RequestedToCapacityRatioArguments struct {
// Array of point defining priority function shape
UtilizationShape []UtilizationShapePoint
}
// UtilizationShapePoint represents single point of priority function shape
type UtilizationShapePoint struct {
// Utilization (x axis). Valid values are 0 to 100. Fully utilized node maps to 100.
Utilization int
// Score assigned to given utilization (y axis). Valid values are 0 to 10.
Score int
}
// ExtenderManagedResource describes the arguments of extended resources
// managed by an extender.
type ExtenderManagedResource struct {
@ -168,6 +176,8 @@ type ExtenderConfig struct {
URLPrefix string
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
FilterVerb string
// Verb for the preempt call, empty if not supported. This verb is appended to the URLPrefix when issuing the preempt call to extender.
PreemptVerb string
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
PrioritizeVerb string
// The numeric multiplier for the node scores that the prioritize call generates.
@ -198,13 +208,53 @@ type ExtenderConfig struct {
// will skip checking the resource in predicates.
// +optional
ManagedResources []ExtenderManagedResource
// Ignorable specifies if the extender is ignorable, i.e. scheduling should not
// fail when the extender returns an error or is not reachable.
Ignorable bool
}
// ExtenderPreemptionResult represents the result returned by preemption phase of extender.
type ExtenderPreemptionResult struct {
NodeNameToMetaVictims map[string]*MetaVictims
}
// ExtenderPreemptionArgs represents the arguments needed by the extender to preempt pods on nodes.
type ExtenderPreemptionArgs struct {
// Pod being scheduled
Pod *v1.Pod
// Victims map generated by scheduler preemption phase
// Only set NodeNameToMetaVictims if ExtenderConfig.NodeCacheCapable == true. Otherwise, only set NodeNameToVictims.
NodeNameToVictims map[string]*Victims
NodeNameToMetaVictims map[string]*MetaVictims
}
// Victims represents:
// pods: a group of pods expected to be preempted.
// numPDBViolations: the count of violations of PodDisruptionBudget
type Victims struct {
Pods []*v1.Pod
NumPDBViolations int
}
// MetaPod represent identifier for a v1.Pod
type MetaPod struct {
UID string
}
// MetaVictims represents:
// pods: a group of pods expected to be preempted.
// Only Pod identifiers will be sent and user are expect to get v1.Pod in their own way.
// numPDBViolations: the count of violations of PodDisruptionBudget
type MetaVictims struct {
Pods []*MetaPod
NumPDBViolations int
}
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
// nodes for a pod.
type ExtenderArgs struct {
// Pod being scheduled
Pod v1.Pod
Pod *v1.Pod
// List of candidate nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *v1.NodeList
@ -259,12 +309,6 @@ type HostPriority struct {
// HostPriorityList declares a []HostPriority type.
type HostPriorityList []HostPriority
// SystemPriorityClasses defines special priority classes which are used by system critical pods that should not be preempted by workload pods.
var SystemPriorityClasses = map[string]int32{
SystemClusterCritical: SystemCriticalPriority,
SystemNodeCritical: SystemCriticalPriority + 1000,
}
func (h HostPriorityList) Len() int {
return len(h)
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package v1
import (
gojson "encoding/json"
"time"
apiv1 "k8s.io/api/core/v1"
@ -91,6 +92,8 @@ type PriorityArgument struct {
// The priority function that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelPreference *LabelPreference `json:"labelPreference"`
// The RequestedToCapacityRatio priority function is parametrized with function shape.
RequestedToCapacityRatioArguments *RequestedToCapacityRatioArguments `json:"requestedToCapacityRatioArguments"`
}
// ServiceAffinity holds the parameters that are used to configure the corresponding predicate in scheduler policy configuration.
@ -125,6 +128,20 @@ type LabelPreference struct {
Presence bool `json:"presence"`
}
// RequestedToCapacityRatioArguments holds arguments specific to RequestedToCapacityRatio priority function
type RequestedToCapacityRatioArguments struct {
// Array of point defining priority function shape
UtilizationShape []UtilizationShapePoint `json:"shape"`
}
// UtilizationShapePoint represents single point of priority function shape
type UtilizationShapePoint struct {
// Utilization (x axis). Valid values are 0 to 100. Fully utilized node maps to 100.
Utilization int `json:"utilization"`
// Score assigned to given utilization (y axis). Valid values are 0 to 10.
Score int `json:"score"`
}
// ExtenderManagedResource describes the arguments of extended resources
// managed by an extender.
type ExtenderManagedResource struct {
@ -142,6 +159,8 @@ type ExtenderConfig struct {
URLPrefix string `json:"urlPrefix"`
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
FilterVerb string `json:"filterVerb,omitempty"`
// Verb for the preempt call, empty if not supported. This verb is appended to the URLPrefix when issuing the preempt call to extender.
PreemptVerb string `json:"preemptVerb,omitempty"`
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
PrioritizeVerb string `json:"prioritizeVerb,omitempty"`
// The numeric multiplier for the node scores that the prioritize call generates.
@ -172,13 +191,28 @@ type ExtenderConfig struct {
// will skip checking the resource in predicates.
// +optional
ManagedResources []ExtenderManagedResource `json:"managedResources,omitempty"`
// Ignorable specifies if the extender is ignorable, i.e. scheduling should not
// fail when the extender returns an error or is not reachable.
Ignorable bool `json:"ignorable,omitempty"`
}
// caseInsensitiveExtenderConfig is a type alias which lets us use the stdlib case-insensitive decoding
// to preserve compatibility with incorrectly specified scheduler config fields:
// * BindVerb, which originally did not specify a json tag, and required upper-case serialization in 1.7
// * TLSConfig, which uses a struct not intended for serialization, and does not include any json tags
type caseInsensitiveExtenderConfig *ExtenderConfig
// UnmarshalJSON implements the json.Unmarshaller interface.
// This preserves compatibility with incorrect case-insensitive configuration fields.
func (t *ExtenderConfig) UnmarshalJSON(b []byte) error {
return gojson.Unmarshal(b, caseInsensitiveExtenderConfig(t))
}
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
// nodes for a pod.
type ExtenderArgs struct {
// Pod being scheduled
Pod apiv1.Pod `json:"pod"`
Pod *apiv1.Pod `json:"pod"`
// List of candidate nodes where the pod can be scheduled; to be populated
// only if ExtenderConfig.NodeCacheCapable == false
Nodes *apiv1.NodeList `json:"nodes,omitempty"`
@ -187,6 +221,43 @@ type ExtenderArgs struct {
NodeNames *[]string `json:"nodenames,omitempty"`
}
// ExtenderPreemptionResult represents the result returned by preemption phase of extender.
type ExtenderPreemptionResult struct {
NodeNameToMetaVictims map[string]*MetaVictims `json:"nodeNameToMetaVictims,omitempty"`
}
// ExtenderPreemptionArgs represents the arguments needed by the extender to preempt pods on nodes.
type ExtenderPreemptionArgs struct {
// Pod being scheduled
Pod *apiv1.Pod `json:"pod"`
// Victims map generated by scheduler preemption phase
// Only set NodeNameToMetaVictims if ExtenderConfig.NodeCacheCapable == true. Otherwise, only set NodeNameToVictims.
NodeNameToVictims map[string]*Victims `json:"nodeToVictims,omitempty"`
NodeNameToMetaVictims map[string]*MetaVictims `json:"nodeNameToMetaVictims,omitempty"`
}
// Victims represents:
// pods: a group of pods expected to be preempted.
// numPDBViolations: the count of violations of PodDisruptionBudget
type Victims struct {
Pods []*apiv1.Pod `json:"pods"`
NumPDBViolations int `json:"numPDBViolations"`
}
// MetaPod represent identifier for a v1.Pod
type MetaPod struct {
UID string `json:"uid"`
}
// MetaVictims represents:
// pods: a group of pods expected to be preempted.
// Only Pod identifiers will be sent and user are expect to get v1.Pod in their own way.
// numPDBViolations: the count of violations of PodDisruptionBudget
type MetaVictims struct {
Pods []*MetaPod `json:"pods"`
NumPDBViolations int `json:"numPDBViolations"`
}
// FailedNodesMap represents the filtered out nodes, with node names and failure messages
type FailedNodesMap map[string]string

View File

@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2018 The Kubernetes Authors.
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -29,7 +29,15 @@ import (
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderArgs) DeepCopyInto(out *ExtenderArgs) {
*out = *in
in.Pod.DeepCopyInto(&out.Pod)
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
if *in == nil {
*out = nil
} else {
*out = new(core_v1.Pod)
(*in).DeepCopyInto(*out)
}
}
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
if *in == nil {
@ -188,6 +196,83 @@ func (in *ExtenderManagedResource) DeepCopy() *ExtenderManagedResource {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionArgs) DeepCopyInto(out *ExtenderPreemptionArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
if *in == nil {
*out = nil
} else {
*out = new(core_v1.Pod)
(*in).DeepCopyInto(*out)
}
}
if in.NodeNameToVictims != nil {
in, out := &in.NodeNameToVictims, &out.NodeNameToVictims
*out = make(map[string]*Victims, len(*in))
for key, val := range *in {
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(Victims)
val.DeepCopyInto((*out)[key])
}
}
}
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(MetaVictims)
val.DeepCopyInto((*out)[key])
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionArgs.
func (in *ExtenderPreemptionArgs) DeepCopy() *ExtenderPreemptionArgs {
if in == nil {
return nil
}
out := new(ExtenderPreemptionArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionResult) DeepCopyInto(out *ExtenderPreemptionResult) {
*out = *in
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(MetaVictims)
val.DeepCopyInto((*out)[key])
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionResult.
func (in *ExtenderPreemptionResult) DeepCopy() *ExtenderPreemptionResult {
if in == nil {
return nil
}
out := new(ExtenderPreemptionResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in FailedNodesMap) DeepCopyInto(out *FailedNodesMap) {
{
@ -283,6 +368,50 @@ func (in *LabelsPresence) DeepCopy() *LabelsPresence {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaPod) DeepCopyInto(out *MetaPod) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaPod.
func (in *MetaPod) DeepCopy() *MetaPod {
if in == nil {
return nil
}
out := new(MetaPod)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaVictims) DeepCopyInto(out *MetaVictims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*MetaPod, len(*in))
for i := range *in {
if (*in)[i] == nil {
(*out)[i] = nil
} else {
(*out)[i] = new(MetaPod)
(*in)[i].DeepCopyInto((*out)[i])
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaVictims.
func (in *MetaVictims) DeepCopy() *MetaVictims {
if in == nil {
return nil
}
out := new(MetaVictims)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Policy) DeepCopyInto(out *Policy) {
*out = *in
@ -409,6 +538,15 @@ func (in *PriorityArgument) DeepCopyInto(out *PriorityArgument) {
**out = **in
}
}
if in.RequestedToCapacityRatioArguments != nil {
in, out := &in.RequestedToCapacityRatioArguments, &out.RequestedToCapacityRatioArguments
if *in == nil {
*out = nil
} else {
*out = new(RequestedToCapacityRatioArguments)
(*in).DeepCopyInto(*out)
}
}
return
}
@ -447,6 +585,27 @@ func (in *PriorityPolicy) DeepCopy() *PriorityPolicy {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RequestedToCapacityRatioArguments) DeepCopyInto(out *RequestedToCapacityRatioArguments) {
*out = *in
if in.UtilizationShape != nil {
in, out := &in.UtilizationShape, &out.UtilizationShape
*out = make([]UtilizationShapePoint, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RequestedToCapacityRatioArguments.
func (in *RequestedToCapacityRatioArguments) DeepCopy() *RequestedToCapacityRatioArguments {
if in == nil {
return nil
}
out := new(RequestedToCapacityRatioArguments)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAffinity) DeepCopyInto(out *ServiceAffinity) {
*out = *in
@ -483,3 +642,47 @@ func (in *ServiceAntiAffinity) DeepCopy() *ServiceAntiAffinity {
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *UtilizationShapePoint) DeepCopyInto(out *UtilizationShapePoint) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UtilizationShapePoint.
func (in *UtilizationShapePoint) DeepCopy() *UtilizationShapePoint {
if in == nil {
return nil
}
out := new(UtilizationShapePoint)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Victims) DeepCopyInto(out *Victims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*core_v1.Pod, len(*in))
for i := range *in {
if (*in)[i] == nil {
(*out)[i] = nil
} else {
(*out)[i] = new(core_v1.Pod)
(*in)[i].DeepCopyInto((*out)[i])
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Victims.
func (in *Victims) DeepCopy() *Victims {
if in == nil {
return nil
}
out := new(Victims)
in.DeepCopyInto(out)
return out
}

View File

@ -61,6 +61,10 @@ func TestValidatePolicy(t *testing.T) {
policy: api.Policy{ExtenderConfigs: []api.ExtenderConfig{{URLPrefix: "http://127.0.0.1:8081/extender", FilterVerb: "filter"}}},
expected: nil,
},
{
policy: api.Policy{ExtenderConfigs: []api.ExtenderConfig{{URLPrefix: "http://127.0.0.1:8081/extender", PreemptVerb: "preempt"}}},
expected: nil,
},
{
policy: api.Policy{
ExtenderConfigs: []api.ExtenderConfig{

View File

@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2018 The Kubernetes Authors.
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -29,7 +29,15 @@ import (
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderArgs) DeepCopyInto(out *ExtenderArgs) {
*out = *in
in.Pod.DeepCopyInto(&out.Pod)
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
if *in == nil {
*out = nil
} else {
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
}
if in.Nodes != nil {
in, out := &in.Nodes, &out.Nodes
if *in == nil {
@ -188,6 +196,83 @@ func (in *ExtenderManagedResource) DeepCopy() *ExtenderManagedResource {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionArgs) DeepCopyInto(out *ExtenderPreemptionArgs) {
*out = *in
if in.Pod != nil {
in, out := &in.Pod, &out.Pod
if *in == nil {
*out = nil
} else {
*out = new(v1.Pod)
(*in).DeepCopyInto(*out)
}
}
if in.NodeNameToVictims != nil {
in, out := &in.NodeNameToVictims, &out.NodeNameToVictims
*out = make(map[string]*Victims, len(*in))
for key, val := range *in {
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(Victims)
val.DeepCopyInto((*out)[key])
}
}
}
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(MetaVictims)
val.DeepCopyInto((*out)[key])
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionArgs.
func (in *ExtenderPreemptionArgs) DeepCopy() *ExtenderPreemptionArgs {
if in == nil {
return nil
}
out := new(ExtenderPreemptionArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ExtenderPreemptionResult) DeepCopyInto(out *ExtenderPreemptionResult) {
*out = *in
if in.NodeNameToMetaVictims != nil {
in, out := &in.NodeNameToMetaVictims, &out.NodeNameToMetaVictims
*out = make(map[string]*MetaVictims, len(*in))
for key, val := range *in {
if val == nil {
(*out)[key] = nil
} else {
(*out)[key] = new(MetaVictims)
val.DeepCopyInto((*out)[key])
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtenderPreemptionResult.
func (in *ExtenderPreemptionResult) DeepCopy() *ExtenderPreemptionResult {
if in == nil {
return nil
}
out := new(ExtenderPreemptionResult)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in FailedNodesMap) DeepCopyInto(out *FailedNodesMap) {
{
@ -283,6 +368,50 @@ func (in *LabelsPresence) DeepCopy() *LabelsPresence {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaPod) DeepCopyInto(out *MetaPod) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaPod.
func (in *MetaPod) DeepCopy() *MetaPod {
if in == nil {
return nil
}
out := new(MetaPod)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetaVictims) DeepCopyInto(out *MetaVictims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*MetaPod, len(*in))
for i := range *in {
if (*in)[i] == nil {
(*out)[i] = nil
} else {
(*out)[i] = new(MetaPod)
(*in)[i].DeepCopyInto((*out)[i])
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaVictims.
func (in *MetaVictims) DeepCopy() *MetaVictims {
if in == nil {
return nil
}
out := new(MetaVictims)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Policy) DeepCopyInto(out *Policy) {
*out = *in
@ -409,6 +538,15 @@ func (in *PriorityArgument) DeepCopyInto(out *PriorityArgument) {
**out = **in
}
}
if in.RequestedToCapacityRatioArguments != nil {
in, out := &in.RequestedToCapacityRatioArguments, &out.RequestedToCapacityRatioArguments
if *in == nil {
*out = nil
} else {
*out = new(RequestedToCapacityRatioArguments)
(*in).DeepCopyInto(*out)
}
}
return
}
@ -447,6 +585,27 @@ func (in *PriorityPolicy) DeepCopy() *PriorityPolicy {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RequestedToCapacityRatioArguments) DeepCopyInto(out *RequestedToCapacityRatioArguments) {
*out = *in
if in.UtilizationShape != nil {
in, out := &in.UtilizationShape, &out.UtilizationShape
*out = make([]UtilizationShapePoint, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RequestedToCapacityRatioArguments.
func (in *RequestedToCapacityRatioArguments) DeepCopy() *RequestedToCapacityRatioArguments {
if in == nil {
return nil
}
out := new(RequestedToCapacityRatioArguments)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServiceAffinity) DeepCopyInto(out *ServiceAffinity) {
*out = *in
@ -483,3 +642,47 @@ func (in *ServiceAntiAffinity) DeepCopy() *ServiceAntiAffinity {
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *UtilizationShapePoint) DeepCopyInto(out *UtilizationShapePoint) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UtilizationShapePoint.
func (in *UtilizationShapePoint) DeepCopy() *UtilizationShapePoint {
if in == nil {
return nil
}
out := new(UtilizationShapePoint)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Victims) DeepCopyInto(out *Victims) {
*out = *in
if in.Pods != nil {
in, out := &in.Pods, &out.Pods
*out = make([]*v1.Pod, len(*in))
for i := range *in {
if (*in)[i] == nil {
(*out)[i] = nil
} else {
(*out)[i] = new(v1.Pod)
(*in)[i].DeepCopyInto((*out)[i])
}
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Victims.
func (in *Victims) DeepCopy() *Victims {
if in == nil {
return nil
}
out := new(Victims)
in.DeepCopyInto(out)
return out
}

View File

@ -8,10 +8,11 @@ go_library(
"node_info.go",
"util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/schedulercache",
importpath = "k8s.io/kubernetes/pkg/scheduler/cache",
visibility = ["//visibility:public"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
@ -20,7 +21,7 @@ go_library(
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)
@ -32,14 +33,19 @@ go_test(
],
embed = [":go_default_library"],
deps = [
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/util/parsers:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
package cache
import (
"fmt"
@ -24,6 +24,8 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"github.com/golang/glog"
policy "k8s.io/api/policy/v1beta1"
@ -80,10 +82,42 @@ func newSchedulerCache(ttl, period time.Duration, stop <-chan struct{}) *schedul
}
}
// Snapshot takes a snapshot of the current schedulerCache. The method has performance impact,
// and should be only used in non-critical path.
func (cache *schedulerCache) Snapshot() *Snapshot {
cache.mu.Lock()
defer cache.mu.Unlock()
nodes := make(map[string]*NodeInfo)
for k, v := range cache.nodes {
nodes[k] = v.Clone()
}
assumedPods := make(map[string]bool)
for k, v := range cache.assumedPods {
assumedPods[k] = v
}
pdbs := make(map[string]*policy.PodDisruptionBudget)
for k, v := range cache.pdbs {
pdbs[k] = v.DeepCopy()
}
return &Snapshot{
Nodes: nodes,
AssumedPods: assumedPods,
Pdbs: pdbs,
}
}
func (cache *schedulerCache) UpdateNodeNameToInfoMap(nodeNameToInfo map[string]*NodeInfo) error {
cache.mu.Lock()
defer cache.mu.Unlock()
for name, info := range cache.nodes {
if utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && info.TransientInfo != nil {
// Transient scheduler info is reset here.
info.TransientInfo.resetTransientSchedulerInfo()
}
if current, ok := nodeNameToInfo[name]; !ok || current.generation != info.generation {
nodeNameToInfo[name] = info.Clone()
}
@ -398,7 +432,7 @@ func (cache *schedulerCache) AddPDB(pdb *policy.PodDisruptionBudget) error {
defer cache.mu.Unlock()
// Unconditionally update cache.
cache.pdbs[pdb.Name] = pdb
cache.pdbs[string(pdb.UID)] = pdb
return nil
}
@ -410,7 +444,7 @@ func (cache *schedulerCache) RemovePDB(pdb *policy.PodDisruptionBudget) error {
cache.mu.Lock()
defer cache.mu.Unlock()
delete(cache.pdbs, pdb.Name)
delete(cache.pdbs, string(pdb.UID))
return nil
}
@ -426,6 +460,13 @@ func (cache *schedulerCache) ListPDBs(selector labels.Selector) ([]*policy.PodDi
return pdbs, nil
}
func (cache *schedulerCache) IsUpToDate(n *NodeInfo) bool {
cache.mu.Lock()
defer cache.mu.Unlock()
node, ok := cache.nodes[n.Node().Name]
return ok && n.generation == node.generation
}
func (cache *schedulerCache) run() {
go wait.Until(cache.cleanupExpiredAssumedPods, cache.period, cache.stop)
}

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
package cache
import (
"fmt"
@ -28,7 +28,11 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
@ -72,6 +76,8 @@ func (b *hostPortInfoBuilder) build() schedutil.HostPortInfo {
// TestAssumePodScheduled tests that after a pod is assumed, its information is aggregated
// on node level.
func TestAssumePodScheduled(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
nodeName := "node"
testPods := []*v1.Pod{
makeBasePod(t, nodeName, "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}),
@ -98,9 +104,11 @@ func TestAssumePodScheduled(t *testing.T) {
MilliCPU: 100,
Memory: 500,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[0]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
},
}, {
pods: []*v1.Pod{testPods[1], testPods[2]},
@ -113,9 +121,11 @@ func TestAssumePodScheduled(t *testing.T) {
MilliCPU: 300,
Memory: 1524,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[1], testPods[2]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).add("TCP", "127.0.0.1", 8080).build(),
imageSizes: map[string]int64{},
},
}, { // test non-zero request
pods: []*v1.Pod{testPods[3]},
@ -128,9 +138,11 @@ func TestAssumePodScheduled(t *testing.T) {
MilliCPU: priorityutil.DefaultMilliCPURequest,
Memory: priorityutil.DefaultMemoryRequest,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[3]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
},
}, {
pods: []*v1.Pod{testPods[4]},
@ -144,9 +156,11 @@ func TestAssumePodScheduled(t *testing.T) {
MilliCPU: 100,
Memory: 500,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[4]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
},
}, {
pods: []*v1.Pod{testPods[4], testPods[5]},
@ -160,9 +174,11 @@ func TestAssumePodScheduled(t *testing.T) {
MilliCPU: 300,
Memory: 1524,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[4], testPods[5]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).add("TCP", "127.0.0.1", 8080).build(),
imageSizes: map[string]int64{},
},
}, {
pods: []*v1.Pod{testPods[6]},
@ -175,9 +191,11 @@ func TestAssumePodScheduled(t *testing.T) {
MilliCPU: 100,
Memory: 500,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[6]},
usedPorts: newHostPortInfoBuilder().build(),
imageSizes: map[string]int64{},
},
},
}
@ -218,6 +236,8 @@ func assumeAndFinishBinding(cache *schedulerCache, pod *v1.Pod, assumedTime time
// TestExpirePod tests that assumed pods will be removed if expired.
// The removal will be reflected in node info.
func TestExpirePod(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
nodeName := "node"
testPods := []*v1.Pod{
makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}),
@ -251,9 +271,11 @@ func TestExpirePod(t *testing.T) {
MilliCPU: 200,
Memory: 1024,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[1]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 8080).build(),
imageSizes: map[string]int64{},
},
}}
@ -275,6 +297,8 @@ func TestExpirePod(t *testing.T) {
// TestAddPodWillConfirm tests that a pod being Add()ed will be confirmed if assumed.
// The pod info should still exist after manually expiring unconfirmed pods.
func TestAddPodWillConfirm(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
nodeName := "node"
now := time.Now()
ttl := 10 * time.Second
@ -300,9 +324,11 @@ func TestAddPodWillConfirm(t *testing.T) {
MilliCPU: 100,
Memory: 500,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[0]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
},
}}
@ -325,6 +351,46 @@ func TestAddPodWillConfirm(t *testing.T) {
}
}
func TestSnapshot(t *testing.T) {
nodeName := "node"
now := time.Now()
ttl := 10 * time.Second
testPods := []*v1.Pod{
makeBasePod(t, nodeName, "test-1", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}),
makeBasePod(t, nodeName, "test-2", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}}),
}
tests := []struct {
podsToAssume []*v1.Pod
podsToAdd []*v1.Pod
}{{ // two pod were assumed at same time. But first one is called Add() and gets confirmed.
podsToAssume: []*v1.Pod{testPods[0], testPods[1]},
podsToAdd: []*v1.Pod{testPods[0]},
}}
for _, tt := range tests {
cache := newSchedulerCache(ttl, time.Second, nil)
for _, podToAssume := range tt.podsToAssume {
if err := assumeAndFinishBinding(cache, podToAssume, now); err != nil {
t.Fatalf("assumePod failed: %v", err)
}
}
for _, podToAdd := range tt.podsToAdd {
if err := cache.AddPod(podToAdd); err != nil {
t.Fatalf("AddPod failed: %v", err)
}
}
snapshot := cache.Snapshot()
if !reflect.DeepEqual(snapshot.Nodes, cache.nodes) {
t.Fatalf("expect \n%+v; got \n%+v", cache.nodes, snapshot.Nodes)
}
if !reflect.DeepEqual(snapshot.AssumedPods, cache.assumedPods) {
t.Fatalf("expect \n%+v; got \n%+v", cache.assumedPods, snapshot.AssumedPods)
}
}
}
// TestAddPodWillReplaceAssumed tests that a pod being Add()ed will replace any assumed pod.
func TestAddPodWillReplaceAssumed(t *testing.T) {
now := time.Now()
@ -355,9 +421,11 @@ func TestAddPodWillReplaceAssumed(t *testing.T) {
MilliCPU: 200,
Memory: 500,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{updatedPod.DeepCopy()},
usedPorts: newHostPortInfoBuilder().add("TCP", "0.0.0.0", 90).build(),
imageSizes: map[string]int64{},
},
},
}}
@ -389,6 +457,8 @@ func TestAddPodWillReplaceAssumed(t *testing.T) {
// TestAddPodAfterExpiration tests that a pod being Add()ed will be added back if expired.
func TestAddPodAfterExpiration(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
nodeName := "node"
ttl := 10 * time.Second
basePod := makeBasePod(t, nodeName, "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}})
@ -407,9 +477,11 @@ func TestAddPodAfterExpiration(t *testing.T) {
MilliCPU: 100,
Memory: 500,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{basePod},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
},
}}
@ -436,6 +508,8 @@ func TestAddPodAfterExpiration(t *testing.T) {
// TestUpdatePod tests that a pod will be updated if added before.
func TestUpdatePod(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
nodeName := "node"
ttl := 10 * time.Second
testPods := []*v1.Pod{
@ -443,7 +517,6 @@ func TestUpdatePod(t *testing.T) {
makeBasePod(t, nodeName, "test", "200m", "1Ki", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 8080, Protocol: "TCP"}}),
}
tests := []struct {
podsToAssume []*v1.Pod
podsToAdd []*v1.Pod
podsToUpdate []*v1.Pod
@ -460,9 +533,11 @@ func TestUpdatePod(t *testing.T) {
MilliCPU: 200,
Memory: 1024,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[1]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 8080).build(),
imageSizes: map[string]int64{},
}, {
requestedResource: &Resource{
MilliCPU: 100,
@ -472,9 +547,11 @@ func TestUpdatePod(t *testing.T) {
MilliCPU: 100,
Memory: 500,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[0]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
}},
}}
@ -502,6 +579,8 @@ func TestUpdatePod(t *testing.T) {
// TestExpireAddUpdatePod test the sequence that a pod is expired, added, then updated
func TestExpireAddUpdatePod(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
nodeName := "node"
ttl := 10 * time.Second
testPods := []*v1.Pod{
@ -527,9 +606,11 @@ func TestExpireAddUpdatePod(t *testing.T) {
MilliCPU: 200,
Memory: 1024,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[1]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 8080).build(),
imageSizes: map[string]int64{},
}, {
requestedResource: &Resource{
MilliCPU: 100,
@ -539,9 +620,11 @@ func TestExpireAddUpdatePod(t *testing.T) {
MilliCPU: 100,
Memory: 500,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{testPods[0]},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
}},
}}
@ -583,6 +666,7 @@ func makePodWithEphemeralStorage(nodeName, ephemeralStorage string) *v1.Pod {
ObjectMeta: metav1.ObjectMeta{
Namespace: "default-namespace",
Name: "pod-with-ephemeral-storage",
UID: types.UID("pod-with-ephemeral-storage"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{{
@ -596,6 +680,8 @@ func makePodWithEphemeralStorage(nodeName, ephemeralStorage string) *v1.Pod {
}
func TestEphemeralStorageResource(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
nodeName := "node"
podE := makePodWithEphemeralStorage(nodeName, "500")
tests := []struct {
@ -612,9 +698,11 @@ func TestEphemeralStorageResource(t *testing.T) {
MilliCPU: priorityutil.DefaultMilliCPURequest,
Memory: priorityutil.DefaultMemoryRequest,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{podE},
usedPorts: schedutil.HostPortInfo{},
imageSizes: map[string]int64{},
},
},
}
@ -639,6 +727,8 @@ func TestEphemeralStorageResource(t *testing.T) {
// TestRemovePod tests after added pod is removed, its information should also be subtracted.
func TestRemovePod(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
nodeName := "node"
basePod := makeBasePod(t, nodeName, "test", "100m", "500", "", []v1.ContainerPort{{HostIP: "127.0.0.1", HostPort: 80, Protocol: "TCP"}})
tests := []struct {
@ -655,9 +745,11 @@ func TestRemovePod(t *testing.T) {
MilliCPU: 100,
Memory: 500,
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
pods: []*v1.Pod{basePod},
usedPorts: newHostPortInfoBuilder().add("TCP", "127.0.0.1", 80).build(),
imageSizes: map[string]int64{},
},
}}
@ -809,6 +901,7 @@ func TestNodeOperators(t *testing.T) {
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
UID: types.UID("pod1"),
},
Spec: v1.PodSpec{
NodeName: nodeName,
@ -859,6 +952,7 @@ func TestNodeOperators(t *testing.T) {
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
UID: types.UID("pod1"),
},
Spec: v1.PodSpec{
NodeName: nodeName,
@ -877,6 +971,7 @@ func TestNodeOperators(t *testing.T) {
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod2",
UID: types.UID("pod2"),
},
Spec: v1.PodSpec{
NodeName: nodeName,
@ -912,6 +1007,8 @@ func TestNodeOperators(t *testing.T) {
t.Errorf("Failed to find node %v in schedulercache.", node.Name)
}
// Generations are globally unique. We check in our unit tests that they are incremented correctly.
expected.generation = got.generation
if !reflect.DeepEqual(got, expected) {
t.Errorf("Failed to add node into schedulercache:\n got: %+v \nexpected: %+v", got, expected)
}
@ -923,6 +1020,7 @@ func TestNodeOperators(t *testing.T) {
if !found || len(cachedNodes) != 1 {
t.Errorf("failed to dump cached nodes:\n got: %v \nexpected: %v", cachedNodes, cache.nodes)
}
expected.generation = newNode.generation
if !reflect.DeepEqual(newNode, expected) {
t.Errorf("Failed to clone node:\n got: %+v, \n expected: %+v", newNode, expected)
}
@ -930,12 +1028,15 @@ func TestNodeOperators(t *testing.T) {
// Case 3: update node attribute successfully.
node.Status.Allocatable[v1.ResourceMemory] = mem50m
expected.allocatableResource.Memory = mem50m.Value()
expected.generation++
cache.UpdateNode(nil, node)
got, found = cache.nodes[node.Name]
if !found {
t.Errorf("Failed to find node %v in schedulercache after UpdateNode.", node.Name)
}
if got.generation <= expected.generation {
t.Errorf("generation is not incremented. got: %v, expected: %v", got.generation, expected.generation)
}
expected.generation = got.generation
if !reflect.DeepEqual(got, expected) {
t.Errorf("Failed to update node in schedulercache:\n got: %+v \nexpected: %+v", got, expected)
@ -957,6 +1058,17 @@ func BenchmarkList1kNodes30kPods(b *testing.B) {
}
}
func BenchmarkUpdate1kNodes30kPods(b *testing.B) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
cache := setupCacheOf1kNodes30kPods(b)
b.ResetTimer()
for n := 0; n < b.N; n++ {
cachedNodes := map[string]*NodeInfo{}
cache.UpdateNodeNameToInfoMap(cachedNodes)
}
}
func BenchmarkExpire100Pods(b *testing.B) {
benchmarkExpire(b, 100)
}
@ -1000,6 +1112,7 @@ func makeBasePod(t testingMode, nodeName, objName, cpu, mem, extended string, po
}
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: types.UID(objName),
Namespace: "node_info_cache_test",
Name: objName,
},
@ -1046,13 +1159,14 @@ func setupCacheWithAssumedPods(b *testing.B, podNum int, assumedTime time.Time)
return cache
}
func makePDB(name, namespace string, labels map[string]string, minAvailable int) *v1beta1.PodDisruptionBudget {
func makePDB(name, namespace string, uid types.UID, labels map[string]string, minAvailable int) *v1beta1.PodDisruptionBudget {
intstrMin := intstr.FromInt(minAvailable)
pdb := &v1beta1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: name,
Labels: labels,
UID: uid,
},
Spec: v1beta1.PodDisruptionBudgetSpec{
MinAvailable: &intstrMin,
@ -1067,14 +1181,14 @@ func makePDB(name, namespace string, labels map[string]string, minAvailable int)
func TestPDBOperations(t *testing.T) {
ttl := 10 * time.Second
testPDBs := []*v1beta1.PodDisruptionBudget{
makePDB("pdb0", "ns1", map[string]string{"tkey1": "tval1"}, 3),
makePDB("pdb1", "ns1", map[string]string{"tkey1": "tval1", "tkey2": "tval2"}, 1),
makePDB("pdb2", "ns3", map[string]string{"tkey3": "tval3", "tkey2": "tval2"}, 10),
makePDB("pdb0", "ns1", "uid0", map[string]string{"tkey1": "tval1"}, 3),
makePDB("pdb1", "ns1", "uid1", map[string]string{"tkey1": "tval1", "tkey2": "tval2"}, 1),
makePDB("pdb2", "ns3", "uid2", map[string]string{"tkey3": "tval3", "tkey2": "tval2"}, 10),
}
updatedPDBs := []*v1beta1.PodDisruptionBudget{
makePDB("pdb0", "ns1", map[string]string{"tkey4": "tval4"}, 8),
makePDB("pdb1", "ns1", map[string]string{"tkey1": "tval1"}, 1),
makePDB("pdb2", "ns3", map[string]string{"tkey3": "tval3", "tkey1": "tval1", "tkey2": "tval2"}, 10),
makePDB("pdb0", "ns1", "uid0", map[string]string{"tkey4": "tval4"}, 8),
makePDB("pdb1", "ns1", "uid1", map[string]string{"tkey1": "tval1"}, 1),
makePDB("pdb2", "ns3", "uid2", map[string]string{"tkey3": "tval3", "tkey1": "tval1", "tkey2": "tval2"}, 10),
}
tests := []struct {
pdbsToAdd []*v1beta1.PodDisruptionBudget
@ -1134,7 +1248,7 @@ func TestPDBOperations(t *testing.T) {
found := false
// find it among the cached ones
for _, cpdb := range cachedPDBs {
if pdb.Name == cpdb.Name {
if pdb.UID == cpdb.UID {
found = true
if !reflect.DeepEqual(pdb, cpdb) {
t.Errorf("%v is not equal to %v", pdb, cpdb)
@ -1143,9 +1257,32 @@ func TestPDBOperations(t *testing.T) {
}
}
if !found {
t.Errorf("PDB with name '%v' was not found in the cache.", pdb.Name)
t.Errorf("PDB with uid '%v' was not found in the cache.", pdb.UID)
}
}
}
}
func TestIsUpToDate(t *testing.T) {
cache := New(time.Duration(0), wait.NeverStop)
if err := cache.AddNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "n1"}}); err != nil {
t.Errorf("Could not add node: %v", err)
}
s := cache.Snapshot()
node := s.Nodes["n1"]
if !cache.IsUpToDate(node) {
t.Errorf("Node incorrectly marked as stale")
}
pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", UID: "p1"}, Spec: v1.PodSpec{NodeName: "n1"}}
if err := cache.AddPod(pod); err != nil {
t.Errorf("Could not add pod: %v", err)
}
if cache.IsUpToDate(node) {
t.Errorf("Node incorrectly marked as up to date")
}
badNode := &NodeInfo{node: &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "n2"}}}
if cache.IsUpToDate(badNode) {
t.Errorf("Nonexistant node incorrectly marked as up to date")
}
}

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
package cache
import (
"k8s.io/api/core/v1"
@ -119,4 +119,17 @@ type Cache interface {
// FilteredList returns all cached pods that pass the filter.
FilteredList(filter PodFilter, selector labels.Selector) ([]*v1.Pod, error)
// Snapshot takes a snapshot on current cache
Snapshot() *Snapshot
// IsUpToDate returns true if the given NodeInfo matches the current data in the cache.
IsUpToDate(n *NodeInfo) bool
}
// Snapshot is a snapshot of cache state
type Snapshot struct {
AssumedPods map[string]bool
Nodes map[string]*NodeInfo
Pdbs map[string]*policy.PodDisruptionBudget
}

View File

@ -14,22 +14,27 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
package cache
import (
"errors"
"fmt"
"sync"
"sync/atomic"
"github.com/golang/glog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientcache "k8s.io/client-go/tools/cache"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/util"
)
var emptyResource = Resource{}
var (
emptyResource = Resource{}
generation int64
)
// NodeInfo is node level aggregated information.
type NodeInfo struct {
@ -49,24 +54,83 @@ type NodeInfo struct {
// as int64, to avoid conversions and accessing map.
allocatableResource *Resource
// Cached tains of the node for faster lookup.
// Cached taints of the node for faster lookup.
taints []v1.Taint
taintsErr error
// This is a map from image name to image size, also for checking image existence on the node
// Cache it here to avoid rebuilding the map during scheduling, e.g., in image_locality.go
imageSizes map[string]int64
// TransientInfo holds the information pertaining to a scheduling cycle. This will be destructed at the end of
// scheduling cycle.
// TODO: @ravig. Remove this once we have a clear approach for message passing across predicates and priorities.
TransientInfo *transientSchedulerInfo
// Cached conditions of node for faster lookup.
memoryPressureCondition v1.ConditionStatus
diskPressureCondition v1.ConditionStatus
pidPressureCondition v1.ConditionStatus
// Whenever NodeInfo changes, generation is bumped.
// This is used to avoid cloning it if the object didn't change.
generation int64
}
//initializeNodeTransientInfo initializes transient information pertaining to node.
func initializeNodeTransientInfo() nodeTransientInfo {
return nodeTransientInfo{AllocatableVolumesCount: 0, RequestedVolumes: 0}
}
// nextGeneration: Let's make sure history never forgets the name...
// Increments the generation number monotonically ensuring that generation numbers never collide.
// Collision of the generation numbers would be particularly problematic if a node was deleted and
// added back with the same name. See issue#63262.
func nextGeneration() int64 {
return atomic.AddInt64(&generation, 1)
}
// nodeTransientInfo contains transient node information while scheduling.
type nodeTransientInfo struct {
// AllocatableVolumesCount contains number of volumes that could be attached to node.
AllocatableVolumesCount int
// Requested number of volumes on a particular node.
RequestedVolumes int
}
// transientSchedulerInfo is a transient structure which is destructed at the end of each scheduling cycle.
// It consists of items that are valid for a scheduling cycle and is used for message passing across predicates and
// priorities. Some examples which could be used as fields are number of volumes being used on node, current utilization
// on node etc.
// IMPORTANT NOTE: Make sure that each field in this structure is documented along with usage. Expand this structure
// only when absolutely needed as this data structure will be created and destroyed during every scheduling cycle.
type transientSchedulerInfo struct {
TransientLock sync.Mutex
// NodeTransInfo holds the information related to nodeTransientInformation. NodeName is the key here.
TransNodeInfo nodeTransientInfo
}
// newTransientSchedulerInfo returns a new scheduler transient structure with initialized values.
func newTransientSchedulerInfo() *transientSchedulerInfo {
tsi := &transientSchedulerInfo{
TransNodeInfo: initializeNodeTransientInfo(),
}
return tsi
}
// resetTransientSchedulerInfo resets the transientSchedulerInfo.
func (transientSchedInfo *transientSchedulerInfo) resetTransientSchedulerInfo() {
transientSchedInfo.TransientLock.Lock()
defer transientSchedInfo.TransientLock.Unlock()
// Reset TransientNodeInfo.
transientSchedInfo.TransNodeInfo.AllocatableVolumesCount = 0
transientSchedInfo.TransNodeInfo.RequestedVolumes = 0
}
// Resource is a collection of compute resource.
type Resource struct {
MilliCPU int64
Memory int64
NvidiaGPU int64
EphemeralStorage int64
// We store allowedPodNumber (which is Node.Status.Allocatable.Pods().Value())
// explicitly as int, to avoid conversions and improve performance.
@ -94,8 +158,6 @@ func (r *Resource) Add(rl v1.ResourceList) {
r.MilliCPU += rQuant.MilliValue()
case v1.ResourceMemory:
r.Memory += rQuant.Value()
case v1.ResourceNvidiaGPU:
r.NvidiaGPU += rQuant.Value()
case v1.ResourcePods:
r.AllowedPodNumber += int(rQuant.Value())
case v1.ResourceEphemeralStorage:
@ -113,7 +175,6 @@ func (r *Resource) ResourceList() v1.ResourceList {
result := v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(r.MilliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(r.Memory, resource.BinarySI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(r.NvidiaGPU, resource.DecimalSI),
v1.ResourcePods: *resource.NewQuantity(int64(r.AllowedPodNumber), resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(r.EphemeralStorage, resource.BinarySI),
}
@ -132,7 +193,6 @@ func (r *Resource) Clone() *Resource {
res := &Resource{
MilliCPU: r.MilliCPU,
Memory: r.Memory,
NvidiaGPU: r.NvidiaGPU,
AllowedPodNumber: r.AllowedPodNumber,
EphemeralStorage: r.EphemeralStorage,
}
@ -159,6 +219,37 @@ func (r *Resource) SetScalar(name v1.ResourceName, quantity int64) {
r.ScalarResources[name] = quantity
}
// SetMaxResource compares with ResourceList and takes max value for each Resource.
func (r *Resource) SetMaxResource(rl v1.ResourceList) {
if r == nil {
return
}
for rName, rQuantity := range rl {
switch rName {
case v1.ResourceMemory:
if mem := rQuantity.Value(); mem > r.Memory {
r.Memory = mem
}
case v1.ResourceCPU:
if cpu := rQuantity.MilliValue(); cpu > r.MilliCPU {
r.MilliCPU = cpu
}
case v1.ResourceEphemeralStorage:
if ephemeralStorage := rQuantity.Value(); ephemeralStorage > r.EphemeralStorage {
r.EphemeralStorage = ephemeralStorage
}
default:
if v1helper.IsScalarResourceName(rName) {
value := rQuantity.Value()
if value > r.ScalarResources[rName] {
r.SetScalar(rName, value)
}
}
}
}
}
// NewNodeInfo returns a ready to use empty NodeInfo object.
// If any pods are given in arguments, their information will be aggregated in
// the returned object.
@ -167,8 +258,10 @@ func NewNodeInfo(pods ...*v1.Pod) *NodeInfo {
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
allocatableResource: &Resource{},
generation: 0,
TransientInfo: newTransientSchedulerInfo(),
generation: nextGeneration(),
usedPorts: make(util.HostPortInfo),
imageSizes: make(map[string]int64),
}
for _, pod := range pods {
ni.AddPod(pod)
@ -200,6 +293,14 @@ func (n *NodeInfo) UsedPorts() util.HostPortInfo {
return n.usedPorts
}
// ImageSizes returns the image size information on this node.
func (n *NodeInfo) ImageSizes() map[string]int64 {
if n == nil {
return nil
}
return n.imageSizes
}
// PodsWithAffinity return all pods with (anti)affinity constraints on this node.
func (n *NodeInfo) PodsWithAffinity() []*v1.Pod {
if n == nil {
@ -240,6 +341,14 @@ func (n *NodeInfo) DiskPressureCondition() v1.ConditionStatus {
return n.diskPressureCondition
}
// PIDPressureCondition returns the pid pressure condition status on this node.
func (n *NodeInfo) PIDPressureCondition() v1.ConditionStatus {
if n == nil {
return v1.ConditionUnknown
}
return n.pidPressureCondition
}
// RequestedResource returns aggregated resource request of pods on this node.
func (n *NodeInfo) RequestedResource() Resource {
if n == nil {
@ -267,6 +376,7 @@ func (n *NodeInfo) AllocatableResource() Resource {
// SetAllocatableResource sets the allocatableResource information of given node.
func (n *NodeInfo) SetAllocatableResource(allocatableResource *Resource) {
n.allocatableResource = allocatableResource
n.generation = nextGeneration()
}
// Clone returns a copy of this node.
@ -277,9 +387,12 @@ func (n *NodeInfo) Clone() *NodeInfo {
nonzeroRequest: n.nonzeroRequest.Clone(),
allocatableResource: n.allocatableResource.Clone(),
taintsErr: n.taintsErr,
TransientInfo: n.TransientInfo,
memoryPressureCondition: n.memoryPressureCondition,
diskPressureCondition: n.diskPressureCondition,
pidPressureCondition: n.pidPressureCondition,
usedPorts: make(util.HostPortInfo),
imageSizes: n.imageSizes,
generation: n.generation,
}
if len(n.pods) > 0 {
@ -299,6 +412,17 @@ func (n *NodeInfo) Clone() *NodeInfo {
return clone
}
// VolumeLimits returns volume limits associated with the node
func (n *NodeInfo) VolumeLimits() map[v1.ResourceName]int64 {
volumeLimits := map[v1.ResourceName]int64{}
for k, v := range n.AllocatableResource().ScalarResources {
if v1helper.IsAttachableVolumeResourceName(k) {
volumeLimits[k] = v
}
}
return volumeLimits
}
// String returns representation of human readable format of this NodeInfo.
func (n *NodeInfo) String() string {
podKeys := make([]string, len(n.pods))
@ -319,7 +443,6 @@ func (n *NodeInfo) AddPod(pod *v1.Pod) {
res, non0CPU, non0Mem := calculateResource(pod)
n.requestedResource.MilliCPU += res.MilliCPU
n.requestedResource.Memory += res.Memory
n.requestedResource.NvidiaGPU += res.NvidiaGPU
n.requestedResource.EphemeralStorage += res.EphemeralStorage
if n.requestedResource.ScalarResources == nil && len(res.ScalarResources) > 0 {
n.requestedResource.ScalarResources = map[v1.ResourceName]int64{}
@ -337,7 +460,7 @@ func (n *NodeInfo) AddPod(pod *v1.Pod) {
// Consume ports when pods added.
n.updateUsedPorts(pod, true)
n.generation++
n.generation = nextGeneration()
}
// RemovePod subtracts pod information from this NodeInfo.
@ -375,7 +498,6 @@ func (n *NodeInfo) RemovePod(pod *v1.Pod) error {
n.requestedResource.MilliCPU -= res.MilliCPU
n.requestedResource.Memory -= res.Memory
n.requestedResource.NvidiaGPU -= res.NvidiaGPU
n.requestedResource.EphemeralStorage -= res.EphemeralStorage
if len(res.ScalarResources) > 0 && n.requestedResource.ScalarResources == nil {
n.requestedResource.ScalarResources = map[v1.ResourceName]int64{}
@ -389,7 +511,7 @@ func (n *NodeInfo) RemovePod(pod *v1.Pod) error {
// Release ports when remove Pods.
n.updateUsedPorts(pod, false)
n.generation++
n.generation = nextGeneration()
return nil
}
@ -425,6 +547,17 @@ func (n *NodeInfo) updateUsedPorts(pod *v1.Pod, add bool) {
}
}
func (n *NodeInfo) updateImageSizes() {
node := n.Node()
imageSizes := make(map[string]int64)
for _, image := range node.Status.Images {
for _, name := range image.Names {
imageSizes[name] = image.SizeBytes
}
}
n.imageSizes = imageSizes
}
// SetNode sets the overall node information.
func (n *NodeInfo) SetNode(node *v1.Node) error {
n.node = node
@ -439,11 +572,15 @@ func (n *NodeInfo) SetNode(node *v1.Node) error {
n.memoryPressureCondition = cond.Status
case v1.NodeDiskPressure:
n.diskPressureCondition = cond.Status
case v1.NodePIDPressure:
n.pidPressureCondition = cond.Status
default:
// We ignore other conditions.
}
}
n.generation++
n.TransientInfo = newTransientSchedulerInfo()
n.updateImageSizes()
n.generation = nextGeneration()
return nil
}
@ -458,7 +595,8 @@ func (n *NodeInfo) RemoveNode(node *v1.Node) error {
n.taints, n.taintsErr = nil, nil
n.memoryPressureCondition = v1.ConditionUnknown
n.diskPressureCondition = v1.ConditionUnknown
n.generation++
n.pidPressureCondition = v1.ConditionUnknown
n.generation = nextGeneration()
return nil
}
@ -476,18 +614,18 @@ func (n *NodeInfo) FilterOutPods(pods []*v1.Pod) []*v1.Pod {
}
filtered := make([]*v1.Pod, 0, len(pods))
for _, p := range pods {
if p.Spec.NodeName == node.Name {
// If pod is on the given node, add it to 'filtered' only if it is present in nodeInfo.
podKey, _ := getPodKey(p)
for _, np := range n.Pods() {
npodkey, _ := getPodKey(np)
if npodkey == podKey {
filtered = append(filtered, p)
break
}
}
} else {
if p.Spec.NodeName != node.Name {
filtered = append(filtered, p)
continue
}
// If pod is on the given node, add it to 'filtered' only if it is present in nodeInfo.
podKey, _ := getPodKey(p)
for _, np := range n.Pods() {
npodkey, _ := getPodKey(np)
if npodkey == podKey {
filtered = append(filtered, p)
break
}
}
}
return filtered
@ -495,7 +633,11 @@ func (n *NodeInfo) FilterOutPods(pods []*v1.Pod) []*v1.Pod {
// getPodKey returns the string key of a pod.
func getPodKey(pod *v1.Pod) (string, error) {
return clientcache.MetaNamespaceKeyFunc(pod)
uid := string(pod.UID)
if len(uid) == 0 {
return "", errors.New("Cannot get cache key for pod with empty UID")
}
return uid, nil
}
// Filter implements PodFilter interface. It returns false only if the pod node name

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
package cache
import (
"fmt"
@ -24,7 +24,9 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/util/parsers"
)
func TestNewResource(t *testing.T) {
@ -40,7 +42,6 @@ func TestNewResource(t *testing.T) {
resourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(4, -3),
v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(1000, resource.DecimalSI),
v1.ResourcePods: *resource.NewQuantity(80, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
"scalar.test/" + "scalar1": *resource.NewQuantity(1, resource.DecimalSI),
@ -49,7 +50,6 @@ func TestNewResource(t *testing.T) {
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
@ -75,7 +75,6 @@ func TestResourceList(t *testing.T) {
expected: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(0, -3),
v1.ResourceMemory: *resource.NewQuantity(0, resource.BinarySI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourcePods: *resource.NewQuantity(0, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(0, resource.BinarySI),
},
@ -84,18 +83,21 @@ func TestResourceList(t *testing.T) {
resource: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
ScalarResources: map[v1.ResourceName]int64{
"scalar.test/scalar1": 1,
"hugepages-test": 2,
"attachable-volumes-aws-ebs": 39,
},
},
expected: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(4, -3),
v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(1000, resource.DecimalSI),
v1.ResourcePods: *resource.NewQuantity(80, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
"scalar.test/" + "scalar1": *resource.NewQuantity(1, resource.DecimalSI),
"attachable-volumes-aws-ebs": *resource.NewQuantity(39, resource.DecimalSI),
v1.ResourceHugePagesPrefix + "test": *resource.NewQuantity(2, resource.BinarySI),
},
},
@ -122,7 +124,6 @@ func TestResourceClone(t *testing.T) {
resource: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
@ -130,7 +131,6 @@ func TestResourceClone(t *testing.T) {
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
@ -167,7 +167,6 @@ func TestResourceAddScalar(t *testing.T) {
resource: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"hugepages-test": 2},
@ -177,7 +176,6 @@ func TestResourceAddScalar(t *testing.T) {
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
NvidiaGPU: 1000,
EphemeralStorage: 5000,
AllowedPodNumber: 80,
ScalarResources: map[v1.ResourceName]int64{"hugepages-test": 2, "scalar2": 200},
@ -193,6 +191,96 @@ func TestResourceAddScalar(t *testing.T) {
}
}
func TestSetMaxResource(t *testing.T) {
tests := []struct {
resource *Resource
resourceList v1.ResourceList
expected *Resource
}{
{
resource: &Resource{},
resourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(4, -3),
v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI),
},
expected: &Resource{
MilliCPU: 4,
Memory: 2000,
EphemeralStorage: 5000,
},
},
{
resource: &Resource{
MilliCPU: 4,
Memory: 4000,
EphemeralStorage: 5000,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 1, "hugepages-test": 2},
},
resourceList: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: *resource.NewScaledQuantity(4, -3),
v1.ResourceMemory: *resource.NewQuantity(2000, resource.BinarySI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(7000, resource.BinarySI),
"scalar.test/scalar1": *resource.NewQuantity(4, resource.DecimalSI),
v1.ResourceHugePagesPrefix + "test": *resource.NewQuantity(5, resource.BinarySI),
},
expected: &Resource{
MilliCPU: 4,
Memory: 4000,
EphemeralStorage: 7000,
ScalarResources: map[v1.ResourceName]int64{"scalar.test/scalar1": 4, "hugepages-test": 5},
},
},
}
for _, test := range tests {
test.resource.SetMaxResource(test.resourceList)
if !reflect.DeepEqual(test.expected, test.resource) {
t.Errorf("expected: %#v, got: %#v", test.expected, test.resource)
}
}
}
func TestImageSizes(t *testing.T) {
ni := fakeNodeInfo()
ni.node = &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
},
Status: v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/10:" + parsers.DefaultImageTag,
"gcr.io/10:v1",
},
SizeBytes: int64(10 * 1024 * 1024),
},
{
Names: []string{
"gcr.io/50:" + parsers.DefaultImageTag,
"gcr.io/50:v1",
},
SizeBytes: int64(50 * 1024 * 1024),
},
},
},
}
ni.updateImageSizes()
expected := map[string]int64{
"gcr.io/10:" + parsers.DefaultImageTag: 10 * 1024 * 1024,
"gcr.io/10:v1": 10 * 1024 * 1024,
"gcr.io/50:" + parsers.DefaultImageTag: 50 * 1024 * 1024,
"gcr.io/50:v1": 50 * 1024 * 1024,
}
imageSizes := ni.ImageSizes()
if !reflect.DeepEqual(expected, imageSizes) {
t.Errorf("expected: %#v, got: %#v", expected, imageSizes)
}
}
func TestNewNodeInfo(t *testing.T) {
nodeName := "test-node"
pods := []*v1.Pod{
@ -204,7 +292,6 @@ func TestNewNodeInfo(t *testing.T) {
requestedResource: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
@ -212,11 +299,11 @@ func TestNewNodeInfo(t *testing.T) {
nonzeroRequest: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
@ -225,11 +312,13 @@ func TestNewNodeInfo(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -256,6 +345,7 @@ func TestNewNodeInfo(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -281,7 +371,12 @@ func TestNewNodeInfo(t *testing.T) {
},
}
gen := generation
ni := NewNodeInfo(pods...)
if ni.generation <= gen {
t.Errorf("generation is not incremented. previous: %v, current: %v", gen, ni.generation)
}
expected.generation = ni.generation
if !reflect.DeepEqual(expected, ni) {
t.Errorf("expected: %#v, got: %#v", expected, ni)
}
@ -297,6 +392,7 @@ func TestNodeInfoClone(t *testing.T) {
nodeInfo: &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
@ -305,11 +401,15 @@ func TestNodeInfoClone(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{
"gcr.io/10": 10 * 1024 * 1024,
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -336,6 +436,7 @@ func TestNodeInfoClone(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -363,6 +464,7 @@ func TestNodeInfoClone(t *testing.T) {
expected: &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
@ -371,11 +473,15 @@ func TestNodeInfoClone(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{
"gcr.io/10": 10 * 1024 * 1024,
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -402,6 +508,7 @@ func TestNodeInfoClone(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -446,6 +553,7 @@ func TestNodeInfoAddPod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -472,6 +580,7 @@ func TestNodeInfoAddPod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -504,7 +613,6 @@ func TestNodeInfoAddPod(t *testing.T) {
requestedResource: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
@ -512,11 +620,11 @@ func TestNodeInfoAddPod(t *testing.T) {
nonzeroRequest: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
@ -525,11 +633,13 @@ func TestNodeInfoAddPod(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -556,6 +666,7 @@ func TestNodeInfoAddPod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -582,10 +693,16 @@ func TestNodeInfoAddPod(t *testing.T) {
}
ni := fakeNodeInfo()
gen := ni.generation
for _, pod := range pods {
ni.AddPod(pod)
if ni.generation <= gen {
t.Errorf("generation is not incremented. Prev: %v, current: %v", gen, ni.generation)
}
gen = ni.generation
}
expected.generation = ni.generation
if !reflect.DeepEqual(expected, ni) {
t.Errorf("expected: %#v, got: %#v", expected, ni)
}
@ -615,7 +732,6 @@ func TestNodeInfoRemovePod(t *testing.T) {
requestedResource: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
@ -623,11 +739,11 @@ func TestNodeInfoRemovePod(t *testing.T) {
nonzeroRequest: &Resource{
MilliCPU: 300,
Memory: 1524,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 2,
usedPorts: util.HostPortInfo{
@ -636,11 +752,13 @@ func TestNodeInfoRemovePod(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -667,6 +785,7 @@ func TestNodeInfoRemovePod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -697,6 +816,7 @@ func TestNodeInfoRemovePod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-1",
UID: types.UID("test-1"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -729,7 +849,6 @@ func TestNodeInfoRemovePod(t *testing.T) {
requestedResource: &Resource{
MilliCPU: 200,
Memory: 1024,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
@ -737,11 +856,11 @@ func TestNodeInfoRemovePod(t *testing.T) {
nonzeroRequest: &Resource{
MilliCPU: 200,
Memory: 1024,
NvidiaGPU: 0,
EphemeralStorage: 0,
AllowedPodNumber: 0,
ScalarResources: map[v1.ResourceName]int64(nil),
},
TransientInfo: newTransientSchedulerInfo(),
allocatableResource: &Resource{},
generation: 3,
usedPorts: util.HostPortInfo{
@ -749,11 +868,13 @@ func TestNodeInfoRemovePod(t *testing.T) {
{Protocol: "TCP", Port: 8080}: {},
},
},
imageSizes: map[string]int64{},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "node_info_cache_test",
Name: "test-2",
UID: types.UID("test-2"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
@ -784,6 +905,7 @@ func TestNodeInfoRemovePod(t *testing.T) {
for _, test := range tests {
ni := fakeNodeInfo(pods...)
gen := ni.generation
err := ni.RemovePod(test.pod)
if err != nil {
if test.errExpected {
@ -794,8 +916,13 @@ func TestNodeInfoRemovePod(t *testing.T) {
} else {
t.Errorf("expected no error, got: %v", err)
}
} else {
if ni.generation <= gen {
t.Errorf("generation is not incremented. Prev: %v, current: %v", gen, ni.generation)
}
}
test.expectedNodeInfo.generation = ni.generation
if !reflect.DeepEqual(test.expectedNodeInfo, ni) {
t.Errorf("expected: %#v, got: %#v", test.expectedNodeInfo, ni)
}

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
package cache
import "k8s.io/api/core/v1"

View File

@ -21,7 +21,7 @@ go_test(
"//pkg/scheduler/algorithm/priorities:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
@ -29,6 +29,7 @@ go_test(
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
],
@ -49,13 +50,12 @@ go_library(
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//pkg/util/hash:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/golang/groupcache/lru:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",

View File

@ -17,35 +17,30 @@ limitations under the License.
package core
import (
"fmt"
"hash/fnv"
"sync"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
hashutil "k8s.io/kubernetes/pkg/util/hash"
"github.com/golang/glog"
"github.com/golang/groupcache/lru"
)
// We use predicate names as cache's key, its count is limited
const maxCacheEntries = 100
// EquivalenceCache holds:
// 1. a map of AlgorithmCache with node name as key
// 2. function to get equivalence pod
type EquivalenceCache struct {
sync.RWMutex
getEquivalencePod algorithm.GetEquivalencePodFunc
algorithmCache map[string]AlgorithmCache
mu sync.RWMutex
algorithmCache map[string]AlgorithmCache
}
// The AlgorithmCache stores PredicateMap with predicate name as key
type AlgorithmCache struct {
// Only consider predicates for now
predicatesCache *lru.Cache
}
// The AlgorithmCache stores PredicateMap with predicate name as key, PredicateMap as value.
type AlgorithmCache map[string]PredicateMap
// PredicateMap stores HostPrediacte with equivalence hash as key
type PredicateMap map[uint64]HostPredicate
@ -56,82 +51,105 @@ type HostPredicate struct {
FailReasons []algorithm.PredicateFailureReason
}
func newAlgorithmCache() AlgorithmCache {
return AlgorithmCache{
predicatesCache: lru.New(maxCacheEntries),
}
}
// NewEquivalenceCache creates a EquivalenceCache object.
func NewEquivalenceCache(getEquivalencePodFunc algorithm.GetEquivalencePodFunc) *EquivalenceCache {
// NewEquivalenceCache returns EquivalenceCache to speed up predicates by caching
// result from previous scheduling.
func NewEquivalenceCache() *EquivalenceCache {
return &EquivalenceCache{
getEquivalencePod: getEquivalencePodFunc,
algorithmCache: make(map[string]AlgorithmCache),
algorithmCache: make(map[string]AlgorithmCache),
}
}
// UpdateCachedPredicateItem updates pod predicate for equivalence class
func (ec *EquivalenceCache) UpdateCachedPredicateItem(
podName, nodeName, predicateKey string,
// RunPredicate will return a cached predicate result. In case of a cache miss, the predicate will
// be run and its results cached for the next call.
//
// NOTE: RunPredicate will not update the equivalence cache if the given NodeInfo is stale.
func (ec *EquivalenceCache) RunPredicate(
pred algorithm.FitPredicate,
predicateKey string,
pod *v1.Pod,
meta algorithm.PredicateMetadata,
nodeInfo *schedulercache.NodeInfo,
equivClassInfo *equivalenceClassInfo,
cache schedulercache.Cache,
) (bool, []algorithm.PredicateFailureReason, error) {
if nodeInfo == nil || nodeInfo.Node() == nil {
// This may happen during tests.
return false, []algorithm.PredicateFailureReason{}, fmt.Errorf("nodeInfo is nil or node is invalid")
}
fit, reasons, invalid := ec.lookupResult(pod.GetName(), nodeInfo.Node().GetName(), predicateKey, equivClassInfo.hash)
if !invalid {
return fit, reasons, nil
}
fit, reasons, err := pred(pod, meta, nodeInfo)
if err != nil {
return fit, reasons, err
}
if cache != nil {
ec.updateResult(pod.GetName(), predicateKey, fit, reasons, equivClassInfo.hash, cache, nodeInfo)
}
return fit, reasons, nil
}
// updateResult updates the cached result of a predicate.
func (ec *EquivalenceCache) updateResult(
podName, predicateKey string,
fit bool,
reasons []algorithm.PredicateFailureReason,
equivalenceHash uint64,
needLock bool,
cache schedulercache.Cache,
nodeInfo *schedulercache.NodeInfo,
) {
if needLock {
ec.Lock()
defer ec.Unlock()
ec.mu.Lock()
defer ec.mu.Unlock()
if nodeInfo == nil || nodeInfo.Node() == nil {
// This may happen during tests.
return
}
// Skip update if NodeInfo is stale.
if !cache.IsUpToDate(nodeInfo) {
return
}
nodeName := nodeInfo.Node().GetName()
if _, exist := ec.algorithmCache[nodeName]; !exist {
ec.algorithmCache[nodeName] = newAlgorithmCache()
ec.algorithmCache[nodeName] = AlgorithmCache{}
}
predicateItem := HostPredicate{
Fit: fit,
FailReasons: reasons,
}
// if cached predicate map already exists, just update the predicate by key
if v, ok := ec.algorithmCache[nodeName].predicatesCache.Get(predicateKey); ok {
predicateMap := v.(PredicateMap)
if predicateMap, ok := ec.algorithmCache[nodeName][predicateKey]; ok {
// maps in golang are references, no need to add them back
predicateMap[equivalenceHash] = predicateItem
} else {
ec.algorithmCache[nodeName].predicatesCache.Add(predicateKey,
ec.algorithmCache[nodeName][predicateKey] =
PredicateMap{
equivalenceHash: predicateItem,
})
}
}
glog.V(5).Infof("Updated cached predicate: %v for pod: %v on node: %s, with item %v", predicateKey, podName, nodeName, predicateItem)
}
// PredicateWithECache returns:
// 1. if fit
// 2. reasons if not fit
// 3. if this cache is invalid
// based on cached predicate results
func (ec *EquivalenceCache) PredicateWithECache(
// lookupResult returns cached predicate results:
// 1. if pod fit
// 2. reasons if pod did not fit
// 3. if cache item is not found
func (ec *EquivalenceCache) lookupResult(
podName, nodeName, predicateKey string,
equivalenceHash uint64, needLock bool,
equivalenceHash uint64,
) (bool, []algorithm.PredicateFailureReason, bool) {
if needLock {
ec.RLock()
defer ec.RUnlock()
}
ec.mu.RLock()
defer ec.mu.RUnlock()
glog.V(5).Infof("Begin to calculate predicate: %v for pod: %s on node: %s based on equivalence cache",
predicateKey, podName, nodeName)
if algorithmCache, exist := ec.algorithmCache[nodeName]; exist {
if cachePredicate, exist := algorithmCache.predicatesCache.Get(predicateKey); exist {
predicateMap := cachePredicate.(PredicateMap)
// TODO(resouer) Is it possible a race that cache failed to update immediately?
if hostPredicate, ok := predicateMap[equivalenceHash]; ok {
if hostPredicate.Fit {
return true, []algorithm.PredicateFailureReason{}, false
}
return false, hostPredicate.FailReasons, false
}
// is invalid
return false, []algorithm.PredicateFailureReason{}, true
if hostPredicate, exist := ec.algorithmCache[nodeName][predicateKey][equivalenceHash]; exist {
if hostPredicate.Fit {
return true, []algorithm.PredicateFailureReason{}, false
}
return false, hostPredicate.FailReasons, false
}
// is invalid
return false, []algorithm.PredicateFailureReason{}, true
}
@ -140,12 +158,10 @@ func (ec *EquivalenceCache) InvalidateCachedPredicateItem(nodeName string, predi
if len(predicateKeys) == 0 {
return
}
ec.Lock()
defer ec.Unlock()
if algorithmCache, exist := ec.algorithmCache[nodeName]; exist {
for predicateKey := range predicateKeys {
algorithmCache.predicatesCache.Remove(predicateKey)
}
ec.mu.Lock()
defer ec.mu.Unlock()
for predicateKey := range predicateKeys {
delete(ec.algorithmCache[nodeName], predicateKey)
}
glog.V(5).Infof("Done invalidating cached predicates: %v on node: %s", predicateKeys, nodeName)
}
@ -155,13 +171,12 @@ func (ec *EquivalenceCache) InvalidateCachedPredicateItemOfAllNodes(predicateKey
if len(predicateKeys) == 0 {
return
}
ec.Lock()
defer ec.Unlock()
ec.mu.Lock()
defer ec.mu.Unlock()
// algorithmCache uses nodeName as key, so we just iterate it and invalid given predicates
for _, algorithmCache := range ec.algorithmCache {
for predicateKey := range predicateKeys {
// just use keys is enough
algorithmCache.predicatesCache.Remove(predicateKey)
delete(algorithmCache, predicateKey)
}
}
glog.V(5).Infof("Done invalidating cached predicates: %v on all node", predicateKeys)
@ -169,8 +184,8 @@ func (ec *EquivalenceCache) InvalidateCachedPredicateItemOfAllNodes(predicateKey
// InvalidateAllCachedPredicateItemOfNode marks all cached items on given node as invalid
func (ec *EquivalenceCache) InvalidateAllCachedPredicateItemOfNode(nodeName string) {
ec.Lock()
defer ec.Unlock()
ec.mu.Lock()
defer ec.mu.Unlock()
delete(ec.algorithmCache, nodeName)
glog.V(5).Infof("Done invalidating all cached predicates on node: %s", nodeName)
}
@ -191,21 +206,21 @@ func (ec *EquivalenceCache) InvalidateCachedPredicateItemForPodAdd(pod *v1.Pod,
// it will also fits to equivalence class of existing pods
// GeneralPredicates: will always be affected by adding a new pod
invalidPredicates := sets.NewString("GeneralPredicates")
invalidPredicates := sets.NewString(predicates.GeneralPred)
// MaxPDVolumeCountPredicate: we check the volumes of pod to make decision.
for _, vol := range pod.Spec.Volumes {
if vol.PersistentVolumeClaim != nil {
invalidPredicates.Insert("MaxEBSVolumeCount", "MaxGCEPDVolumeCount", "MaxAzureDiskVolumeCount")
invalidPredicates.Insert(predicates.MaxEBSVolumeCountPred, predicates.MaxGCEPDVolumeCountPred, predicates.MaxAzureDiskVolumeCountPred)
} else {
if vol.AWSElasticBlockStore != nil {
invalidPredicates.Insert("MaxEBSVolumeCount")
invalidPredicates.Insert(predicates.MaxEBSVolumeCountPred)
}
if vol.GCEPersistentDisk != nil {
invalidPredicates.Insert("MaxGCEPDVolumeCount")
invalidPredicates.Insert(predicates.MaxGCEPDVolumeCountPred)
}
if vol.AzureDisk != nil {
invalidPredicates.Insert("MaxAzureDiskVolumeCount")
invalidPredicates.Insert(predicates.MaxAzureDiskVolumeCountPred)
}
}
}
@ -219,9 +234,11 @@ type equivalenceClassInfo struct {
hash uint64
}
// getEquivalenceClassInfo returns the equivalence class of given pod.
// getEquivalenceClassInfo returns a hash of the given pod.
// The hashing function returns the same value for any two pods that are
// equivalent from the perspective of scheduling.
func (ec *EquivalenceCache) getEquivalenceClassInfo(pod *v1.Pod) *equivalenceClassInfo {
equivalencePod := ec.getEquivalencePod(pod)
equivalencePod := getEquivalenceHash(pod)
if equivalencePod != nil {
hash := fnv.New32a()
hashutil.DeepHashObject(hash, equivalencePod)
@ -231,3 +248,60 @@ func (ec *EquivalenceCache) getEquivalenceClassInfo(pod *v1.Pod) *equivalenceCla
}
return nil
}
// equivalencePod is the set of pod attributes which must match for two pods to
// be considered equivalent for scheduling purposes. For correctness, this must
// include any Pod field which is used by a FitPredicate.
//
// NOTE: For equivalence hash to be formally correct, lists and maps in the
// equivalencePod should be normalized. (e.g. by sorting them) However, the
// vast majority of equivalent pod classes are expected to be created from a
// single pod template, so they will all have the same ordering.
type equivalencePod struct {
Namespace *string
Labels map[string]string
Affinity *v1.Affinity
Containers []v1.Container // See note about ordering
InitContainers []v1.Container // See note about ordering
NodeName *string
NodeSelector map[string]string
Tolerations []v1.Toleration
Volumes []v1.Volume // See note about ordering
}
// getEquivalenceHash returns the equivalencePod for a Pod.
func getEquivalenceHash(pod *v1.Pod) *equivalencePod {
ep := &equivalencePod{
Namespace: &pod.Namespace,
Labels: pod.Labels,
Affinity: pod.Spec.Affinity,
Containers: pod.Spec.Containers,
InitContainers: pod.Spec.InitContainers,
NodeName: &pod.Spec.NodeName,
NodeSelector: pod.Spec.NodeSelector,
Tolerations: pod.Spec.Tolerations,
Volumes: pod.Spec.Volumes,
}
// DeepHashObject considers nil and empty slices to be different. Normalize them.
if len(ep.Containers) == 0 {
ep.Containers = nil
}
if len(ep.InitContainers) == 0 {
ep.InitContainers = nil
}
if len(ep.Tolerations) == 0 {
ep.Tolerations = nil
}
if len(ep.Volumes) == 0 {
ep.Volumes = nil
}
// Normalize empty maps also.
if len(ep.Labels) == 0 {
ep.Labels = nil
}
if len(ep.NodeSelector) == 0 {
ep.NodeSelector = nil
}
// TODO(misterikkit): Also normalize nested maps and slices.
return ep
}

View File

@ -17,22 +17,291 @@ limitations under the License.
package core
import (
"errors"
"reflect"
"sync"
"testing"
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
// makeBasicPod returns a Pod object with many of the fields populated.
func makeBasicPod(name string) *v1.Pod {
isController := true
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: "test-ns",
Labels: map[string]string{"app": "web", "env": "prod"},
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "123",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "failure-domain.beta.kubernetes.io/zone",
Operator: "Exists",
},
},
},
},
},
},
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{"app": "db"}},
TopologyKey: "kubernetes.io/hostname",
},
},
},
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{"app": "web"}},
TopologyKey: "kubernetes.io/hostname",
},
},
},
},
InitContainers: []v1.Container{
{
Name: "init-pause",
Image: "gcr.io/google_containers/pause",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
"cpu": resource.MustParse("1"),
"mem": resource.MustParse("100Mi"),
},
},
},
},
Containers: []v1.Container{
{
Name: "pause",
Image: "gcr.io/google_containers/pause",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
"cpu": resource.MustParse("1"),
"mem": resource.MustParse("100Mi"),
},
},
VolumeMounts: []v1.VolumeMount{
{
Name: "nfs",
MountPath: "/srv/data",
},
},
},
},
NodeSelector: map[string]string{"node-type": "awesome"},
Tolerations: []v1.Toleration{
{
Effect: "NoSchedule",
Key: "experimental",
Operator: "Exists",
},
},
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol1",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol2",
},
},
},
{
Name: "nfs",
VolumeSource: v1.VolumeSource{
NFS: &v1.NFSVolumeSource{
Server: "nfs.corp.example.com",
},
},
},
},
},
}
}
type predicateItemType struct {
fit bool
reasons []algorithm.PredicateFailureReason
}
func TestUpdateCachedPredicateItem(t *testing.T) {
// upToDateCache is a fake Cache where IsUpToDate always returns true.
type upToDateCache = schedulertesting.FakeCache
// staleNodeCache is a fake Cache where IsUpToDate always returns false.
type staleNodeCache struct {
schedulertesting.FakeCache
}
func (c *staleNodeCache) IsUpToDate(*schedulercache.NodeInfo) bool { return false }
// mockPredicate provides an algorithm.FitPredicate with pre-set return values.
type mockPredicate struct {
fit bool
reasons []algorithm.PredicateFailureReason
err error
callCount int
}
func (p *mockPredicate) predicate(*v1.Pod, algorithm.PredicateMetadata, *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
p.callCount++
return p.fit, p.reasons, p.err
}
func TestRunPredicate(t *testing.T) {
tests := []struct {
name string
pred mockPredicate
cache schedulercache.Cache
expectFit, expectCacheHit, expectCacheWrite bool
expectedReasons []algorithm.PredicateFailureReason
expectedError string
}{
{
name: "pod fits/cache hit",
pred: mockPredicate{},
cache: &upToDateCache{},
expectFit: true,
expectCacheHit: true,
expectCacheWrite: false,
},
{
name: "pod fits/cache miss",
pred: mockPredicate{fit: true},
cache: &upToDateCache{},
expectFit: true,
expectCacheHit: false,
expectCacheWrite: true,
},
{
name: "pod fits/cache miss/no write",
pred: mockPredicate{fit: true},
cache: &staleNodeCache{},
expectFit: true,
expectCacheHit: false,
expectCacheWrite: false,
},
{
name: "pod doesn't fit/cache miss",
pred: mockPredicate{reasons: []algorithm.PredicateFailureReason{predicates.ErrFakePredicate}},
cache: &upToDateCache{},
expectFit: false,
expectCacheHit: false,
expectCacheWrite: true,
expectedReasons: []algorithm.PredicateFailureReason{predicates.ErrFakePredicate},
},
{
name: "pod doesn't fit/cache hit",
pred: mockPredicate{},
cache: &upToDateCache{},
expectFit: false,
expectCacheHit: true,
expectCacheWrite: false,
expectedReasons: []algorithm.PredicateFailureReason{predicates.ErrFakePredicate},
},
{
name: "predicate error",
pred: mockPredicate{err: errors.New("This is expected")},
cache: &upToDateCache{},
expectFit: false,
expectCacheHit: false,
expectCacheWrite: false,
expectedError: "This is expected",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
node := schedulercache.NewNodeInfo()
node.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "n1"}})
pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1"}}
meta := algorithm.EmptyPredicateMetadataProducer(nil, nil)
ecache := NewEquivalenceCache()
equivClass := ecache.getEquivalenceClassInfo(pod)
if test.expectCacheHit {
ecache.updateResult(pod.Name, "testPredicate", test.expectFit, test.expectedReasons, equivClass.hash, test.cache, node)
}
fit, reasons, err := ecache.RunPredicate(test.pred.predicate, "testPredicate", pod, meta, node, equivClass, test.cache)
if err != nil {
if err.Error() != test.expectedError {
t.Errorf("Expected error %v but got %v", test.expectedError, err)
}
} else if len(test.expectedError) > 0 {
t.Errorf("Expected error %v but got nil", test.expectedError)
}
if fit && !test.expectFit {
t.Errorf("pod should not fit")
}
if !fit && test.expectFit {
t.Errorf("pod should fit")
}
if len(reasons) != len(test.expectedReasons) {
t.Errorf("Expected failures: %v but got %v", test.expectedReasons, reasons)
} else {
for i, reason := range reasons {
if reason != test.expectedReasons[i] {
t.Errorf("Expected failures: %v but got %v", test.expectedReasons, reasons)
break
}
}
}
if test.expectCacheHit && test.pred.callCount != 0 {
t.Errorf("Predicate should not be called")
}
if !test.expectCacheHit && test.pred.callCount == 0 {
t.Errorf("Predicate should be called")
}
_, _, invalid := ecache.lookupResult(pod.Name, node.Node().Name, "testPredicate", equivClass.hash)
if invalid && test.expectCacheWrite {
t.Errorf("Cache write should happen")
}
if !test.expectCacheHit && test.expectCacheWrite && invalid {
t.Errorf("Cache write should happen")
}
if !test.expectCacheHit && !test.expectCacheWrite && !invalid {
t.Errorf("Cache write should not happen")
}
})
}
}
func TestUpdateResult(t *testing.T) {
tests := []struct {
name string
pod string
@ -43,6 +312,7 @@ func TestUpdateCachedPredicateItem(t *testing.T) {
equivalenceHash uint64
expectPredicateMap bool
expectCacheItem HostPredicate
cache schedulercache.Cache
}{
{
name: "test 1",
@ -55,6 +325,7 @@ func TestUpdateCachedPredicateItem(t *testing.T) {
expectCacheItem: HostPredicate{
Fit: true,
},
cache: &upToDateCache{},
},
{
name: "test 2",
@ -67,38 +338,39 @@ func TestUpdateCachedPredicateItem(t *testing.T) {
expectCacheItem: HostPredicate{
Fit: false,
},
cache: &upToDateCache{},
},
}
for _, test := range tests {
// this case does not need to calculate equivalence hash, just pass an empty function
fakeGetEquivalencePodFunc := func(pod *v1.Pod) interface{} { return nil }
ecache := NewEquivalenceCache(fakeGetEquivalencePodFunc)
ecache := NewEquivalenceCache()
if test.expectPredicateMap {
ecache.algorithmCache[test.nodeName] = newAlgorithmCache()
ecache.algorithmCache[test.nodeName] = AlgorithmCache{}
predicateItem := HostPredicate{
Fit: true,
}
ecache.algorithmCache[test.nodeName].predicatesCache.Add(test.predicateKey,
ecache.algorithmCache[test.nodeName][test.predicateKey] =
PredicateMap{
test.equivalenceHash: predicateItem,
})
}
}
ecache.UpdateCachedPredicateItem(
node := schedulercache.NewNodeInfo()
node.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}})
ecache.updateResult(
test.pod,
test.nodeName,
test.predicateKey,
test.fit,
test.reasons,
test.equivalenceHash,
true,
test.cache,
node,
)
value, ok := ecache.algorithmCache[test.nodeName].predicatesCache.Get(test.predicateKey)
cachedMapItem, ok := ecache.algorithmCache[test.nodeName][test.predicateKey]
if !ok {
t.Errorf("Failed: %s, can't find expected cache item: %v",
test.name, test.expectCacheItem)
} else {
cachedMapItem := value.(PredicateMap)
if !reflect.DeepEqual(cachedMapItem[test.equivalenceHash], test.expectCacheItem) {
t.Errorf("Failed: %s, expected cached item: %v, but got: %v",
test.name, test.expectCacheItem, cachedMapItem[test.equivalenceHash])
@ -107,7 +379,15 @@ func TestUpdateCachedPredicateItem(t *testing.T) {
}
}
func TestPredicateWithECache(t *testing.T) {
// slicesEqual wraps reflect.DeepEqual, but returns true when comparing nil and empty slice.
func slicesEqual(a, b []algorithm.PredicateFailureReason) bool {
if len(a) == 0 && len(b) == 0 {
return true
}
return reflect.DeepEqual(a, b)
}
func TestLookupResult(t *testing.T) {
tests := []struct {
name string
podName string
@ -119,6 +399,7 @@ func TestPredicateWithECache(t *testing.T) {
expectedInvalidPredicateKey bool
expectedInvalidEquivalenceHash bool
expectedPredicateItem predicateItemType
cache schedulercache.Cache
}{
{
name: "test 1",
@ -136,6 +417,7 @@ func TestPredicateWithECache(t *testing.T) {
fit: false,
reasons: []algorithm.PredicateFailureReason{},
},
cache: &upToDateCache{},
},
{
name: "test 2",
@ -152,6 +434,7 @@ func TestPredicateWithECache(t *testing.T) {
fit: true,
reasons: []algorithm.PredicateFailureReason{},
},
cache: &upToDateCache{},
},
{
name: "test 3",
@ -169,6 +452,7 @@ func TestPredicateWithECache(t *testing.T) {
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
cache: &upToDateCache{},
},
{
name: "test 4",
@ -187,22 +471,23 @@ func TestPredicateWithECache(t *testing.T) {
fit: false,
reasons: []algorithm.PredicateFailureReason{},
},
cache: &upToDateCache{},
},
}
for _, test := range tests {
// this case does not need to calculate equivalence hash, just pass an empty function
fakeGetEquivalencePodFunc := func(pod *v1.Pod) interface{} { return nil }
ecache := NewEquivalenceCache(fakeGetEquivalencePodFunc)
ecache := NewEquivalenceCache()
node := schedulercache.NewNodeInfo()
node.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}})
// set cached item to equivalence cache
ecache.UpdateCachedPredicateItem(
ecache.updateResult(
test.podName,
test.nodeName,
test.predicateKey,
test.cachedItem.fit,
test.cachedItem.reasons,
test.equivalenceHashForUpdatePredicate,
true,
test.cache,
node,
)
// if we want to do invalid, invalid the cached item
if test.expectedInvalidPredicateKey {
@ -211,11 +496,10 @@ func TestPredicateWithECache(t *testing.T) {
ecache.InvalidateCachedPredicateItem(test.nodeName, predicateKeys)
}
// calculate predicate with equivalence cache
fit, reasons, invalid := ecache.PredicateWithECache(test.podName,
fit, reasons, invalid := ecache.lookupResult(test.podName,
test.nodeName,
test.predicateKey,
test.equivalenceHashForCalPredicate,
true,
)
// returned invalid should match expectedInvalidPredicateKey or expectedInvalidEquivalenceHash
if test.equivalenceHashForUpdatePredicate != test.equivalenceHashForCalPredicate {
@ -233,212 +517,53 @@ func TestPredicateWithECache(t *testing.T) {
if fit != test.expectedPredicateItem.fit {
t.Errorf("Failed: %s, expected fit: %v, but got: %v", test.name, test.cachedItem.fit, fit)
}
if !reflect.DeepEqual(reasons, test.expectedPredicateItem.reasons) {
if !slicesEqual(reasons, test.expectedPredicateItem.reasons) {
t.Errorf("Failed: %s, expected reasons: %v, but got: %v",
test.name, test.cachedItem.reasons, reasons)
test.name, test.expectedPredicateItem.reasons, reasons)
}
}
}
func TestGetHashEquivalencePod(t *testing.T) {
func TestGetEquivalenceHash(t *testing.T) {
testNamespace := "test"
ecache := NewEquivalenceCache()
pvcInfo := predicates.FakePersistentVolumeClaimInfo{
pod1 := makeBasicPod("pod1")
pod2 := makeBasicPod("pod2")
pod3 := makeBasicPod("pod3")
pod3.Spec.Volumes = []v1.Volume{
{
ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol1", Name: "someEBSVol1", Namespace: testNamespace},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someEBSVol1"},
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol111",
},
},
},
}
pod4 := makeBasicPod("pod4")
pod4.Spec.Volumes = []v1.Volume{
{
ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol2", Name: "someEBSVol2", Namespace: testNamespace},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someNonEBSVol"},
},
{
ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol3-0", Name: "someEBSVol3-0", Namespace: testNamespace},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{UID: "someEBSVol3-1", Name: "someEBSVol3-1", Namespace: testNamespace},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
},
}
// use default equivalence class generator
ecache := NewEquivalenceCache(predicates.NewEquivalencePodGenerator(pvcInfo))
isController := true
pod1 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "123",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol1",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol2",
},
},
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol222",
},
},
},
}
pod2 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod2",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "123",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol2",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol1",
},
},
},
},
},
}
pod5 := makeBasicPod("pod5")
pod5.Spec.Volumes = []v1.Volume{}
pod3 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod3",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "567",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol3-1",
},
},
},
},
},
}
pod6 := makeBasicPod("pod6")
pod6.Spec.Volumes = nil
pod4 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod4",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "567",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol3-0",
},
},
},
},
},
}
pod7 := makeBasicPod("pod7")
pod7.Spec.NodeSelector = nil
pod5 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod5",
Namespace: testNamespace,
},
}
pod6 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod6",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "567",
Controller: &isController,
},
},
},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "no-exists-pvc",
},
},
},
},
},
}
pod7 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod7",
Namespace: testNamespace,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1",
Kind: "ReplicationController",
Name: "rc",
UID: "567",
Controller: &isController,
},
},
},
}
pod8 := makeBasicPod("pod8")
pod8.Spec.NodeSelector = make(map[string]string)
type podInfo struct {
pod *v1.Pod
@ -446,39 +571,41 @@ func TestGetHashEquivalencePod(t *testing.T) {
}
tests := []struct {
name string
podInfoList []podInfo
isEquivalent bool
}{
// pods with same controllerRef and same pvc claim
{
name: "pods with everything the same except name",
podInfoList: []podInfo{
{pod: pod1, hashIsValid: true},
{pod: pod2, hashIsValid: true},
},
isEquivalent: true,
},
// pods with same controllerRef but different pvc claim
{
name: "pods that only differ in their PVC volume sources",
podInfoList: []podInfo{
{pod: pod3, hashIsValid: true},
{pod: pod4, hashIsValid: true},
},
isEquivalent: false,
},
// pod without controllerRef
{
name: "pods that have no volumes, but one uses nil and one uses an empty slice",
podInfoList: []podInfo{
{pod: pod5, hashIsValid: false},
{pod: pod5, hashIsValid: true},
{pod: pod6, hashIsValid: true},
},
isEquivalent: false,
isEquivalent: true,
},
// pods with same controllerRef but one has non-exists pvc claim
{
name: "pods that have no NodeSelector, but one uses nil and one uses an empty map",
podInfoList: []podInfo{
{pod: pod6, hashIsValid: false},
{pod: pod7, hashIsValid: true},
{pod: pod8, hashIsValid: true},
},
isEquivalent: false,
isEquivalent: true,
},
}
@ -488,28 +615,30 @@ func TestGetHashEquivalencePod(t *testing.T) {
)
for _, test := range tests {
for i, podInfo := range test.podInfoList {
testPod := podInfo.pod
eclassInfo := ecache.getEquivalenceClassInfo(testPod)
if eclassInfo == nil && podInfo.hashIsValid {
t.Errorf("Failed: pod %v is expected to have valid hash", testPod)
}
t.Run(test.name, func(t *testing.T) {
for i, podInfo := range test.podInfoList {
testPod := podInfo.pod
eclassInfo := ecache.getEquivalenceClassInfo(testPod)
if eclassInfo == nil && podInfo.hashIsValid {
t.Errorf("Failed: pod %v is expected to have valid hash", testPod)
}
if eclassInfo != nil {
// NOTE(harry): the first element will be used as target so
// this logic can't verify more than two inequivalent pods
if i == 0 {
targetHash = eclassInfo.hash
targetPodInfo = podInfo
} else {
if targetHash != eclassInfo.hash {
if test.isEquivalent {
t.Errorf("Failed: pod: %v is expected to be equivalent to: %v", testPod, targetPodInfo.pod)
if eclassInfo != nil {
// NOTE(harry): the first element will be used as target so
// this logic can't verify more than two inequivalent pods
if i == 0 {
targetHash = eclassInfo.hash
targetPodInfo = podInfo
} else {
if targetHash != eclassInfo.hash {
if test.isEquivalent {
t.Errorf("Failed: pod: %v is expected to be equivalent to: %v", testPod, targetPodInfo.pod)
}
}
}
}
}
}
})
}
}
@ -519,9 +648,9 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
tests := []struct {
podName string
nodeName string
predicateKey string
equivalenceHashForUpdatePredicate uint64
cachedItem predicateItemType
cache schedulercache.Cache
}{
{
podName: "testPod",
@ -533,6 +662,7 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
predicates.ErrPodNotFitsHostPorts,
},
},
cache: &upToDateCache{},
},
{
podName: "testPod",
@ -544,6 +674,7 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
predicates.ErrPodNotFitsHostPorts,
},
},
cache: &upToDateCache{},
},
{
podName: "testPod",
@ -552,22 +683,23 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
cachedItem: predicateItemType{
fit: true,
},
cache: &upToDateCache{},
},
}
// this case does not need to calculate equivalence hash, just pass an empty function
fakeGetEquivalencePodFunc := func(pod *v1.Pod) interface{} { return nil }
ecache := NewEquivalenceCache(fakeGetEquivalencePodFunc)
ecache := NewEquivalenceCache()
for _, test := range tests {
node := schedulercache.NewNodeInfo()
node.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}})
// set cached item to equivalence cache
ecache.UpdateCachedPredicateItem(
ecache.updateResult(
test.podName,
test.nodeName,
testPredicate,
test.cachedItem.fit,
test.cachedItem.reasons,
test.equivalenceHashForUpdatePredicate,
true,
test.cache,
node,
)
}
@ -577,7 +709,7 @@ func TestInvalidateCachedPredicateItemOfAllNodes(t *testing.T) {
// there should be no cached predicate any more
for _, test := range tests {
if algorithmCache, exist := ecache.algorithmCache[test.nodeName]; exist {
if _, exist := algorithmCache.predicatesCache.Get(testPredicate); exist {
if _, exist := algorithmCache[testPredicate]; exist {
t.Errorf("Failed: cached item for predicate key: %v on node: %v should be invalidated",
testPredicate, test.nodeName)
break
@ -592,9 +724,9 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
tests := []struct {
podName string
nodeName string
predicateKey string
equivalenceHashForUpdatePredicate uint64
cachedItem predicateItemType
cache schedulercache.Cache
}{
{
podName: "testPod",
@ -604,6 +736,7 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
cache: &upToDateCache{},
},
{
podName: "testPod",
@ -613,6 +746,7 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
fit: false,
reasons: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts},
},
cache: &upToDateCache{},
},
{
podName: "testPod",
@ -621,22 +755,23 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
cachedItem: predicateItemType{
fit: true,
},
cache: &upToDateCache{},
},
}
// this case does not need to calculate equivalence hash, just pass an empty function
fakeGetEquivalencePodFunc := func(pod *v1.Pod) interface{} { return nil }
ecache := NewEquivalenceCache(fakeGetEquivalencePodFunc)
ecache := NewEquivalenceCache()
for _, test := range tests {
node := schedulercache.NewNodeInfo()
node.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: test.nodeName}})
// set cached item to equivalence cache
ecache.UpdateCachedPredicateItem(
ecache.updateResult(
test.podName,
test.nodeName,
testPredicate,
test.cachedItem.fit,
test.cachedItem.reasons,
test.equivalenceHashForUpdatePredicate,
true,
test.cache,
node,
)
}
@ -649,3 +784,107 @@ func TestInvalidateAllCachedPredicateItemOfNode(t *testing.T) {
}
}
}
func BenchmarkEquivalenceHash(b *testing.B) {
pod := makeBasicPod("test")
for i := 0; i < b.N; i++ {
getEquivalenceHash(pod)
}
}
// syncingMockCache delegates method calls to an actual Cache,
// but calls to UpdateNodeNameToInfoMap synchronize with the test.
type syncingMockCache struct {
schedulercache.Cache
cycleStart, cacheInvalidated chan struct{}
once sync.Once
}
// UpdateNodeNameToInfoMap delegates to the real implementation, but on the first call, it
// synchronizes with the test.
//
// Since UpdateNodeNameToInfoMap is one of the first steps of (*genericScheduler).Schedule, we use
// this point to signal to the test that a scheduling cycle has started.
func (c *syncingMockCache) UpdateNodeNameToInfoMap(infoMap map[string]*schedulercache.NodeInfo) error {
err := c.Cache.UpdateNodeNameToInfoMap(infoMap)
c.once.Do(func() {
c.cycleStart <- struct{}{}
<-c.cacheInvalidated
})
return err
}
// TestEquivalenceCacheInvalidationRace tests that equivalence cache invalidation is correctly
// handled when an invalidation event happens early in a scheduling cycle. Specifically, the event
// occurs after schedulercache is snapshotted and before equivalence cache lock is acquired.
func TestEquivalenceCacheInvalidationRace(t *testing.T) {
// Create a predicate that returns false the first time and true on subsequent calls.
podWillFit := false
var callCount int
testPredicate := func(pod *v1.Pod,
meta algorithm.PredicateMetadata,
nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
callCount++
if !podWillFit {
podWillFit = true
return false, []algorithm.PredicateFailureReason{predicates.ErrFakePredicate}, nil
}
return true, nil, nil
}
// Set up the mock cache.
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
cache.AddNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}})
mockCache := &syncingMockCache{
Cache: cache,
cycleStart: make(chan struct{}),
cacheInvalidated: make(chan struct{}),
}
eCache := NewEquivalenceCache()
// Ensure that equivalence cache invalidation happens after the scheduling cycle starts, but before
// the equivalence cache would be updated.
go func() {
<-mockCache.cycleStart
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "new-pod", UID: "new-pod"},
Spec: v1.PodSpec{NodeName: "machine1"}}
if err := cache.AddPod(pod); err != nil {
t.Errorf("Could not add pod to cache: %v", err)
}
eCache.InvalidateAllCachedPredicateItemOfNode("machine1")
mockCache.cacheInvalidated <- struct{}{}
}()
// Set up the scheduler.
ps := map[string]algorithm.FitPredicate{"testPredicate": testPredicate}
predicates.SetPredicatesOrdering([]string{"testPredicate"})
prioritizers := []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}}
pvcLister := schedulertesting.FakePersistentVolumeClaimLister([]*v1.PersistentVolumeClaim{})
scheduler := NewGenericScheduler(
mockCache,
eCache,
NewSchedulingQueue(),
ps,
algorithm.EmptyPredicateMetadataProducer,
prioritizers,
algorithm.EmptyPriorityMetadataProducer,
nil, nil, pvcLister, true, false)
// First scheduling attempt should fail.
nodeLister := schedulertesting.FakeNodeLister(makeNodeList([]string{"machine1"}))
pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}}
machine, err := scheduler.Schedule(pod, nodeLister)
if machine != "" || err == nil {
t.Error("First scheduling attempt did not fail")
}
// Second scheduling attempt should succeed because cache was invalidated.
_, err = scheduler.Schedule(pod, nodeLister)
if err != nil {
t.Errorf("Second scheduling attempt failed: %v", err)
}
if callCount != 2 {
t.Errorf("Predicate should have been called twice. Was called %d times.", callCount)
}
}

View File

@ -30,7 +30,7 @@ import (
restclient "k8s.io/client-go/rest"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
const (
@ -41,6 +41,7 @@ const (
// HTTPExtender implements the algorithm.SchedulerExtender interface.
type HTTPExtender struct {
extenderURL string
preemptVerb string
filterVerb string
prioritizeVerb string
bindVerb string
@ -48,6 +49,7 @@ type HTTPExtender struct {
client *http.Client
nodeCacheCapable bool
managedResources sets.String
ignorable bool
}
func makeTransport(config *schedulerapi.ExtenderConfig) (http.RoundTripper, error) {
@ -93,6 +95,7 @@ func NewHTTPExtender(config *schedulerapi.ExtenderConfig) (algorithm.SchedulerEx
}
return &HTTPExtender{
extenderURL: config.URLPrefix,
preemptVerb: config.PreemptVerb,
filterVerb: config.FilterVerb,
prioritizeVerb: config.PrioritizeVerb,
bindVerb: config.BindVerb,
@ -100,13 +103,150 @@ func NewHTTPExtender(config *schedulerapi.ExtenderConfig) (algorithm.SchedulerEx
client: client,
nodeCacheCapable: config.NodeCacheCapable,
managedResources: managedResources,
ignorable: config.Ignorable,
}, nil
}
// IsIgnorable returns true indicates scheduling should not fail when this extender
// is unavailable
func (h *HTTPExtender) IsIgnorable() bool {
return h.ignorable
}
// SupportsPreemption returns if a extender support preemption.
// A extender should have preempt verb defined and enabled its own node cache.
func (h *HTTPExtender) SupportsPreemption() bool {
return len(h.preemptVerb) > 0
}
// ProcessPreemption returns filtered candidate nodes and victims after running preemption logic in extender.
func (h *HTTPExtender) ProcessPreemption(
pod *v1.Pod,
nodeToVictims map[*v1.Node]*schedulerapi.Victims,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
) (map[*v1.Node]*schedulerapi.Victims, error) {
var (
result schedulerapi.ExtenderPreemptionResult
args *schedulerapi.ExtenderPreemptionArgs
)
if !h.SupportsPreemption() {
return nil, fmt.Errorf("preempt verb is not defined for extender %v but run into ProcessPreemption", h.extenderURL)
}
if h.nodeCacheCapable {
// If extender has cached node info, pass NodeNameToMetaVictims in args.
nodeNameToMetaVictims := convertToNodeNameToMetaVictims(nodeToVictims)
args = &schedulerapi.ExtenderPreemptionArgs{
Pod: pod,
NodeNameToMetaVictims: nodeNameToMetaVictims,
}
} else {
nodeNameToVictims := convertToNodeNameToVictims(nodeToVictims)
args = &schedulerapi.ExtenderPreemptionArgs{
Pod: pod,
NodeNameToVictims: nodeNameToVictims,
}
}
if err := h.send(h.preemptVerb, args, &result); err != nil {
return nil, err
}
// Extender will always return NodeNameToMetaVictims.
// So let's convert it to NodeToVictims by using NodeNameToInfo.
newNodeToVictims, err := h.convertToNodeToVictims(result.NodeNameToMetaVictims, nodeNameToInfo)
if err != nil {
return nil, err
}
// Do not override nodeToVictims
return newNodeToVictims, nil
}
// convertToNodeToVictims converts "nodeNameToMetaVictims" from object identifiers,
// such as UIDs and names, to object pointers.
func (h *HTTPExtender) convertToNodeToVictims(
nodeNameToMetaVictims map[string]*schedulerapi.MetaVictims,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
) (map[*v1.Node]*schedulerapi.Victims, error) {
nodeToVictims := map[*v1.Node]*schedulerapi.Victims{}
for nodeName, metaVictims := range nodeNameToMetaVictims {
victims := &schedulerapi.Victims{
Pods: []*v1.Pod{},
}
for _, metaPod := range metaVictims.Pods {
pod, err := h.convertPodUIDToPod(metaPod, nodeName, nodeNameToInfo)
if err != nil {
return nil, err
}
victims.Pods = append(victims.Pods, pod)
}
nodeToVictims[nodeNameToInfo[nodeName].Node()] = victims
}
return nodeToVictims, nil
}
// convertPodUIDToPod returns v1.Pod object for given MetaPod and node name.
// The v1.Pod object is restored by nodeInfo.Pods().
// It should return error if there's cache inconsistency between default scheduler and extender
// so that this pod or node is missing from nodeNameToInfo.
func (h *HTTPExtender) convertPodUIDToPod(
metaPod *schedulerapi.MetaPod,
nodeName string,
nodeNameToInfo map[string]*schedulercache.NodeInfo) (*v1.Pod, error) {
var nodeInfo *schedulercache.NodeInfo
if nodeInfo, ok := nodeNameToInfo[nodeName]; ok {
for _, pod := range nodeInfo.Pods() {
if string(pod.UID) == metaPod.UID {
return pod, nil
}
}
return nil, fmt.Errorf("extender: %v claims to preempt pod (UID: %v) on node: %v, but the pod is not found on that node",
h.extenderURL, metaPod, nodeInfo.Node().Name)
}
return nil, fmt.Errorf("extender: %v claims to preempt on node: %v but the node is not found in nodeNameToInfo map",
h.extenderURL, nodeInfo.Node().Name)
}
// convertToNodeNameToMetaVictims converts from struct type to meta types.
func convertToNodeNameToMetaVictims(
nodeToVictims map[*v1.Node]*schedulerapi.Victims,
) map[string]*schedulerapi.MetaVictims {
nodeNameToVictims := map[string]*schedulerapi.MetaVictims{}
for node, victims := range nodeToVictims {
metaVictims := &schedulerapi.MetaVictims{
Pods: []*schedulerapi.MetaPod{},
}
for _, pod := range victims.Pods {
metaPod := &schedulerapi.MetaPod{
UID: string(pod.UID),
}
metaVictims.Pods = append(metaVictims.Pods, metaPod)
}
nodeNameToVictims[node.GetName()] = metaVictims
}
return nodeNameToVictims
}
// convertToNodeNameToVictims converts from node type to node name as key.
func convertToNodeNameToVictims(
nodeToVictims map[*v1.Node]*schedulerapi.Victims,
) map[string]*schedulerapi.Victims {
nodeNameToVictims := map[string]*schedulerapi.Victims{}
for node, victims := range nodeToVictims {
nodeNameToVictims[node.GetName()] = victims
}
return nodeNameToVictims
}
// Filter based on extender implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
func (h *HTTPExtender) Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo) ([]*v1.Node, schedulerapi.FailedNodesMap, error) {
func (h *HTTPExtender) Filter(
pod *v1.Pod,
nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo,
) ([]*v1.Node, schedulerapi.FailedNodesMap, error) {
var (
result schedulerapi.ExtenderFilterResult
nodeList *v1.NodeList
@ -133,7 +273,7 @@ func (h *HTTPExtender) Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[
}
args = &schedulerapi.ExtenderArgs{
Pod: *pod,
Pod: pod,
Nodes: nodeList,
NodeNames: nodeNames,
}
@ -193,7 +333,7 @@ func (h *HTTPExtender) Prioritize(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.
}
args = &schedulerapi.ExtenderArgs{
Pod: *pod,
Pod: pod,
Nodes: nodeList,
NodeNames: nodeNames,
}
@ -254,7 +394,7 @@ func (h *HTTPExtender) send(action string, args interface{}, result interface{})
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("Failed %v with extender at URL %v, code %v", action, h.extenderURL, resp.StatusCode)
return fmt.Errorf("Failed %v with extender at URL %v, code %v", action, url, resp.StatusCode)
}
return json.NewDecoder(resp.Body).Decode(result)

View File

@ -26,8 +26,9 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
"k8s.io/kubernetes/pkg/scheduler/util"
)
type fitPredicate func(pod *v1.Pod, node *v1.Node) (bool, error)
@ -111,22 +112,159 @@ type FakeExtender struct {
nodeCacheCapable bool
filteredNodes []*v1.Node
unInterested bool
ignorable bool
// Cached node information for fake extender
cachedNodeNameToInfo map[string]*schedulercache.NodeInfo
}
func (f *FakeExtender) IsIgnorable() bool {
return f.ignorable
}
func (f *FakeExtender) SupportsPreemption() bool {
// Assume preempt verb is always defined.
return true
}
func (f *FakeExtender) ProcessPreemption(
pod *v1.Pod,
nodeToVictims map[*v1.Node]*schedulerapi.Victims,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
) (map[*v1.Node]*schedulerapi.Victims, error) {
nodeToVictimsCopy := map[*v1.Node]*schedulerapi.Victims{}
// We don't want to change the original nodeToVictims
for k, v := range nodeToVictims {
// In real world implementation, extender's user should have their own way to get node object
// by name if needed (e.g. query kube-apiserver etc).
//
// For test purpose, we just use node from parameters directly.
nodeToVictimsCopy[k] = v
}
for node, victims := range nodeToVictimsCopy {
// Try to do preemption on extender side.
extenderVictimPods, extendernPDBViolations, fits, err := f.selectVictimsOnNodeByExtender(pod, node, nodeNameToInfo)
if err != nil {
return nil, err
}
// If it's unfit after extender's preemption, this node is unresolvable by preemption overall,
// let's remove it from potential preemption nodes.
if !fits {
delete(nodeToVictimsCopy, node)
} else {
// Append new victims to original victims
nodeToVictimsCopy[node].Pods = append(victims.Pods, extenderVictimPods...)
nodeToVictimsCopy[node].NumPDBViolations = victims.NumPDBViolations + extendernPDBViolations
}
}
return nodeToVictimsCopy, nil
}
// selectVictimsOnNodeByExtender checks the given nodes->pods map with predicates on extender's side.
// Returns:
// 1. More victim pods (if any) amended by preemption phase of extender.
// 2. Number of violating victim (used to calculate PDB).
// 3. Fits or not after preemption phase on extender's side.
func (f *FakeExtender) selectVictimsOnNodeByExtender(
pod *v1.Pod,
node *v1.Node,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
) ([]*v1.Pod, int, bool, error) {
// If a extender support preemption but have no cached node info, let's run filter to make sure
// default scheduler's decision still stand with given pod and node.
if !f.nodeCacheCapable {
fits, err := f.runPredicate(pod, node)
if err != nil {
return nil, 0, false, err
}
if !fits {
return nil, 0, false, nil
}
return []*v1.Pod{}, 0, true, nil
}
// Otherwise, as a extender support preemption and have cached node info, we will assume cachedNodeNameToInfo is available
// and get cached node info by given node name.
nodeInfoCopy := f.cachedNodeNameToInfo[node.GetName()].Clone()
potentialVictims := util.SortableList{CompFunc: util.HigherPriorityPod}
removePod := func(rp *v1.Pod) {
nodeInfoCopy.RemovePod(rp)
}
addPod := func(ap *v1.Pod) {
nodeInfoCopy.AddPod(ap)
}
// As the first step, remove all the lower priority pods from the node and
// check if the given pod can be scheduled.
podPriority := util.GetPodPriority(pod)
for _, p := range nodeInfoCopy.Pods() {
if util.GetPodPriority(p) < podPriority {
potentialVictims.Items = append(potentialVictims.Items, p)
removePod(p)
}
}
potentialVictims.Sort()
// If the new pod does not fit after removing all the lower priority pods,
// we are almost done and this node is not suitable for preemption.
fits, err := f.runPredicate(pod, nodeInfoCopy.Node())
if err != nil {
return nil, 0, false, err
}
if !fits {
return nil, 0, false, nil
}
var victims []*v1.Pod
// TODO(harry): handle PDBs in the future.
numViolatingVictim := 0
reprievePod := func(p *v1.Pod) bool {
addPod(p)
fits, _ := f.runPredicate(pod, nodeInfoCopy.Node())
if !fits {
removePod(p)
victims = append(victims, p)
}
return fits
}
// For now, assume all potential victims to be non-violating.
// Now we try to reprieve non-violating victims.
for _, p := range potentialVictims.Items {
reprievePod(p.(*v1.Pod))
}
return victims, numViolatingVictim, true, nil
}
// runPredicate run predicates of extender one by one for given pod and node.
// Returns: fits or not.
func (f *FakeExtender) runPredicate(pod *v1.Pod, node *v1.Node) (bool, error) {
fits := true
var err error
for _, predicate := range f.predicates {
fits, err = predicate(pod, node)
if err != nil {
return false, err
}
if !fits {
break
}
}
return fits, nil
}
func (f *FakeExtender) Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo) ([]*v1.Node, schedulerapi.FailedNodesMap, error) {
filtered := []*v1.Node{}
failedNodesMap := schedulerapi.FailedNodesMap{}
for _, node := range nodes {
fits := true
for _, predicate := range f.predicates {
fit, err := predicate(pod, node)
if err != nil {
return []*v1.Node{}, schedulerapi.FailedNodesMap{}, err
}
if !fit {
fits = false
break
}
fits, err := f.runPredicate(pod, node)
if err != nil {
return []*v1.Node{}, schedulerapi.FailedNodesMap{}, err
}
if fits {
filtered = append(filtered, node)
@ -192,15 +330,13 @@ var _ algorithm.SchedulerExtender = &FakeExtender{}
func TestGenericSchedulerWithExtenders(t *testing.T) {
tests := []struct {
name string
predicates map[string]algorithm.FitPredicate
prioritizers []algorithm.PriorityConfig
extenders []FakeExtender
extenderPredicates []fitPredicate
extenderPrioritizers []priorityConfig
nodes []string
expectedHost string
expectsErr bool
name string
predicates map[string]algorithm.FitPredicate
prioritizers []algorithm.PriorityConfig
extenders []FakeExtender
nodes []string
expectedHost string
expectsErr bool
}{
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
@ -314,7 +450,7 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
// Filter/Prioritize phases if the extender is not interested in
// the pod.
//
// If scheduler sends the pod by mistake, the test will fail
// If scheduler sends the pod by mistake, the test would fail
// because of the errors from errorPredicateExtender and/or
// errorPrioritizerExtender.
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
@ -331,6 +467,28 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
expectedHost: "machine2", // machine2 has higher score
name: "test 8",
},
{
// Scheduling is expected to not fail in
// Filter/Prioritize phases if the extender is not available and ignorable.
//
// If scheduler did not ignore the extender, the test would fail
// because of the errors from errorPredicateExtender.
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{errorPredicateExtender},
ignorable: true,
},
{
predicates: []fitPredicate{machine1PredicateExtender},
},
},
nodes: []string{"machine1", "machine2"},
expectsErr: false,
expectedHost: "machine1",
name: "test 9",
},
}
for _, test := range tests {
@ -340,11 +498,22 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
}
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
for _, name := range test.nodes {
cache.AddNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: name}})
cache.AddNode(createNode(name))
}
queue := NewSchedulingQueue()
scheduler := NewGenericScheduler(
cache, nil, queue, test.predicates, algorithm.EmptyPredicateMetadataProducer, test.prioritizers, algorithm.EmptyPriorityMetadataProducer, extenders, nil, schedulertesting.FakePersistentVolumeClaimLister{}, false)
cache,
nil,
queue,
test.predicates,
algorithm.EmptyPredicateMetadataProducer,
test.prioritizers,
algorithm.EmptyPriorityMetadataProducer,
extenders,
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false,
false)
podIgnored := &v1.Pod{}
machine, err := scheduler.Schedule(podIgnored, schedulertesting.FakeNodeLister(makeNodeList(test.nodes)))
if test.expectsErr {
@ -362,3 +531,7 @@ func TestGenericSchedulerWithExtenders(t *testing.T) {
}
}
}
func createNode(name string) *v1.Node {
return &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: name}}
}

View File

@ -25,6 +25,8 @@ import (
"sync/atomic"
"time"
"github.com/golang/glog"
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -36,12 +38,10 @@ import (
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/metrics"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
"github.com/golang/glog"
)
// FailedPredicateMap declares a map[string][]algorithm.PredicateFailureReason type.
@ -54,13 +54,7 @@ type FitError struct {
FailedPredicates FailedPredicateMap
}
// Victims describes pod victims.
type Victims struct {
pods []*v1.Pod
numPDBViolations int
}
// ErrNoNodesAvailable defines an error of no nodes available.
// ErrNoNodesAvailable is used to describe the error that no nodes available to schedule pods.
var ErrNoNodesAvailable = fmt.Errorf("no nodes available to schedule pods")
const (
@ -104,9 +98,10 @@ type genericScheduler struct {
cachedNodeInfoMap map[string]*schedulercache.NodeInfo
volumeBinder *volumebinder.VolumeBinder
pvcLister corelisters.PersistentVolumeClaimLister
disablePreemption bool
}
// Schedule tries to schedule the given pod to one of node in the node list.
// Schedule tries to schedule the given pod to one of the nodes in the node list.
// If it succeeds, it will return the name of the node.
// If it fails, it will return a FitError error with reasons.
func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister) (string, error) {
@ -133,7 +128,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
trace.Step("Computing predicates")
startPredicateEvalTime := time.Now()
filteredNodes, failedPredicateMap, err := findNodesThatFit(pod, g.cachedNodeInfoMap, nodes, g.predicates, g.extenders, g.predicateMetaProducer, g.equivalenceCache, g.schedulingQueue, g.alwaysCheckAllPredicates)
filteredNodes, failedPredicateMap, err := g.findNodesThatFit(pod, nodes)
if err != nil {
return "", err
}
@ -146,6 +141,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
}
}
metrics.SchedulingAlgorithmPredicateEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPredicateEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PredicateEvaluation).Observe(metrics.SinceInSeconds(startPredicateEvalTime))
trace.Step("Prioritizing")
startPriorityEvalTime := time.Now()
@ -161,6 +157,7 @@ func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister
return "", err
}
metrics.SchedulingAlgorithmPriorityEvaluationDuration.Observe(metrics.SinceInMicroseconds(startPriorityEvalTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PriorityEvaluation).Observe(metrics.SinceInSeconds(startPriorityEvalTime))
trace.Step("Selecting host")
return g.selectHost(priorityList)
@ -234,34 +231,77 @@ func (g *genericScheduler) Preempt(pod *v1.Pod, nodeLister algorithm.NodeLister,
if err != nil {
return nil, nil, nil, err
}
nodeToVictims, err := selectNodesForPreemption(pod, g.cachedNodeInfoMap, potentialNodes, g.predicates, g.predicateMetaProducer, g.schedulingQueue, pdbs)
nodeToVictims, err := selectNodesForPreemption(pod, g.cachedNodeInfoMap, potentialNodes, g.predicates,
g.predicateMetaProducer, g.schedulingQueue, pdbs)
if err != nil {
return nil, nil, nil, err
}
for len(nodeToVictims) > 0 {
node := pickOneNodeForPreemption(nodeToVictims)
if node == nil {
return nil, nil, nil, err
}
passes, pErr := nodePassesExtendersForPreemption(pod, node.Name, nodeToVictims[node].pods, g.cachedNodeInfoMap, g.extenders)
if passes && pErr == nil {
// Lower priority pods nominated to run on this node, may no longer fit on
// this node. So, we should remove their nomination. Removing their
// nomination updates these pods and moves them to the active queue. It
// lets scheduler find another place for them.
nominatedPods := g.getLowerPriorityNominatedPods(pod, node.Name)
return node, nodeToVictims[node].pods, nominatedPods, err
}
if pErr != nil {
glog.Errorf("Error occurred while checking extenders for preemption on node %v: %v", node, pErr)
}
// Remove the node from the map and try to pick a different node.
delete(nodeToVictims, node)
// We will only check nodeToVictims with extenders that support preemption.
// Extenders which do not support preemption may later prevent preemptor from being scheduled on the nominated
// node. In that case, scheduler will find a different host for the preemptor in subsequent scheduling cycles.
nodeToVictims, err = g.processPreemptionWithExtenders(pod, nodeToVictims)
if err != nil {
return nil, nil, nil, err
}
return nil, nil, nil, err
candidateNode := pickOneNodeForPreemption(nodeToVictims)
if candidateNode == nil {
return nil, nil, nil, err
}
// Lower priority pods nominated to run on this node, may no longer fit on
// this node. So, we should remove their nomination. Removing their
// nomination updates these pods and moves them to the active queue. It
// lets scheduler find another place for them.
nominatedPods := g.getLowerPriorityNominatedPods(pod, candidateNode.Name)
if nodeInfo, ok := g.cachedNodeInfoMap[candidateNode.Name]; ok {
return nodeInfo.Node(), nodeToVictims[candidateNode].Pods, nominatedPods, err
}
return nil, nil, nil, fmt.Errorf(
"preemption failed: the target node %s has been deleted from scheduler cache",
candidateNode.Name)
}
// GetLowerPriorityNominatedPods returns pods whose priority is smaller than the
// processPreemptionWithExtenders processes preemption with extenders
func (g *genericScheduler) processPreemptionWithExtenders(
pod *v1.Pod,
nodeToVictims map[*v1.Node]*schedulerapi.Victims,
) (map[*v1.Node]*schedulerapi.Victims, error) {
if len(nodeToVictims) > 0 {
for _, extender := range g.extenders {
if extender.SupportsPreemption() {
newNodeToVictims, err := extender.ProcessPreemption(
pod,
nodeToVictims,
g.cachedNodeInfoMap,
)
if err != nil {
if extender.IsIgnorable() {
glog.Warningf("Skipping extender %v as it returned error %v and has ignorable flag set",
extender, err)
continue
}
return nil, err
}
// Replace nodeToVictims with new result after preemption. So the
// rest of extenders can continue use it as parameter.
nodeToVictims = newNodeToVictims
// If node list becomes empty, no preemption can happen regardless of other extenders.
if len(nodeToVictims) == 0 {
break
}
}
}
}
return nodeToVictims, nil
}
// getLowerPriorityNominatedPods returns pods whose priority is smaller than the
// priority of the given "pod" and are nominated to run on the given node.
// Note: We could possibly check if the nominated lower priority pods still fit
// and return those that no longer fit, but that would require lots of
@ -270,6 +310,7 @@ func (g *genericScheduler) Preempt(pod *v1.Pod, nodeLister algorithm.NodeLister,
// small number of nominated pods per node.
func (g *genericScheduler) getLowerPriorityNominatedPods(pod *v1.Pod, nodeName string) []*v1.Pod {
pods := g.schedulingQueue.WaitingPodsForNode(nodeName)
if len(pods) == 0 {
return nil
}
@ -286,21 +327,11 @@ func (g *genericScheduler) getLowerPriorityNominatedPods(pod *v1.Pod, nodeName s
// Filters the nodes to find the ones that fit based on the given predicate functions
// Each node is passed through the predicate functions to determine if it is a fit
func findNodesThatFit(
pod *v1.Pod,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
nodes []*v1.Node,
predicateFuncs map[string]algorithm.FitPredicate,
extenders []algorithm.SchedulerExtender,
metadataProducer algorithm.PredicateMetadataProducer,
ecache *EquivalenceCache,
schedulingQueue SchedulingQueue,
alwaysCheckAllPredicates bool,
) ([]*v1.Node, FailedPredicateMap, error) {
func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v1.Node, FailedPredicateMap, error) {
var filtered []*v1.Node
failedPredicateMap := FailedPredicateMap{}
if len(predicateFuncs) == 0 {
if len(g.predicates) == 0 {
filtered = nodes
} else {
// Create filtered list with enough space to avoid growing it
@ -311,12 +342,12 @@ func findNodesThatFit(
var filteredLen int32
// We can use the same metadata producer for all nodes.
meta := metadataProducer(pod, nodeNameToInfo)
meta := g.predicateMetaProducer(pod, g.cachedNodeInfoMap)
var equivCacheInfo *equivalenceClassInfo
if ecache != nil {
if g.equivalenceCache != nil {
// getEquivalenceClassInfo will return immediately if no equivalence pod found
equivCacheInfo = ecache.getEquivalenceClassInfo(pod)
equivCacheInfo = g.equivalenceCache.getEquivalenceClassInfo(pod)
}
checkNode := func(i int) {
@ -324,11 +355,12 @@ func findNodesThatFit(
fits, failedPredicates, err := podFitsOnNode(
pod,
meta,
nodeNameToInfo[nodeName],
predicateFuncs,
ecache,
schedulingQueue,
alwaysCheckAllPredicates,
g.cachedNodeInfoMap[nodeName],
g.predicates,
g.cache,
g.equivalenceCache,
g.schedulingQueue,
g.alwaysCheckAllPredicates,
equivCacheInfo,
)
if err != nil {
@ -352,14 +384,20 @@ func findNodesThatFit(
}
}
if len(filtered) > 0 && len(extenders) != 0 {
for _, extender := range extenders {
if len(filtered) > 0 && len(g.extenders) != 0 {
for _, extender := range g.extenders {
if !extender.IsInterested(pod) {
continue
}
filteredList, failedMap, err := extender.Filter(pod, filtered, nodeNameToInfo)
filteredList, failedMap, err := extender.Filter(pod, filtered, g.cachedNodeInfoMap)
if err != nil {
return []*v1.Node{}, FailedPredicateMap{}, err
if extender.IsIgnorable() {
glog.Warningf("Skipping extender %v as it returned error %v and has ignorable flag set",
extender, err)
continue
} else {
return []*v1.Node{}, FailedPredicateMap{}, err
}
}
for failedNodeName, failedMsg := range failedMap {
@ -422,6 +460,7 @@ func podFitsOnNode(
meta algorithm.PredicateMetadata,
info *schedulercache.NodeInfo,
predicateFuncs map[string]algorithm.FitPredicate,
cache schedulercache.Cache,
ecache *EquivalenceCache,
queue SchedulingQueue,
alwaysCheckAllPredicates bool,
@ -430,12 +469,7 @@ func podFitsOnNode(
var (
eCacheAvailable bool
failedPredicates []algorithm.PredicateFailureReason
invalid bool
fit bool
reasons []algorithm.PredicateFailureReason
err error
)
predicateResults := make(map[string]HostPredicate)
podsAdded := false
// We run predicates twice in some cases. If the node has greater or equal priority
@ -469,38 +503,20 @@ func podFitsOnNode(
// when pods are nominated or their nominations change.
eCacheAvailable = equivCacheInfo != nil && !podsAdded
for _, predicateKey := range predicates.Ordering() {
var (
fit bool
reasons []algorithm.PredicateFailureReason
err error
)
//TODO (yastij) : compute average predicate restrictiveness to export it as Prometheus metric
if predicate, exist := predicateFuncs[predicateKey]; exist {
if eCacheAvailable {
// Lock ecache here to avoid a race condition against cache invalidation invoked
// in event handlers. This race has existed despite locks in eCache implementation.
ecache.Lock()
// PredicateWithECache will return its cached predicate results.
fit, reasons, invalid = ecache.PredicateWithECache(pod.GetName(), info.Node().GetName(), predicateKey, equivCacheInfo.hash, false)
}
if !eCacheAvailable || invalid {
// we need to execute predicate functions since equivalence cache does not work
fit, reasons, err = ecache.RunPredicate(predicate, predicateKey, pod, metaToUse, nodeInfoToUse, equivCacheInfo, cache)
} else {
fit, reasons, err = predicate(pod, metaToUse, nodeInfoToUse)
if err != nil {
return false, []algorithm.PredicateFailureReason{}, err
}
if eCacheAvailable {
// Store data to update eCache after this loop.
if res, exists := predicateResults[predicateKey]; exists {
res.Fit = res.Fit && fit
res.FailReasons = append(res.FailReasons, reasons...)
predicateResults[predicateKey] = res
} else {
predicateResults[predicateKey] = HostPredicate{Fit: fit, FailReasons: reasons}
}
result := predicateResults[predicateKey]
ecache.UpdateCachedPredicateItem(pod.GetName(), info.Node().GetName(), predicateKey, result.Fit, result.FailReasons, equivCacheInfo.hash, false)
}
}
if eCacheAvailable {
ecache.Unlock()
if err != nil {
return false, []algorithm.PredicateFailureReason{}, err
}
if !fit {
@ -508,7 +524,9 @@ func podFitsOnNode(
failedPredicates = append(failedPredicates, reasons...)
// if alwaysCheckAllPredicates is false, short circuit all predicates when one predicate fails.
if !alwaysCheckAllPredicates {
glog.V(5).Infoln("since alwaysCheckAllPredicates has not been set, the predicate evaluation is short circuited and there are chances of other predicates failing as well.")
glog.V(5).Infoln("since alwaysCheckAllPredicates has not been set, the predicate" +
"evaluation is short circuited and there are chances" +
"of other predicates failing as well.")
break
}
}
@ -683,7 +701,7 @@ func EqualPriorityMap(_ *v1.Pod, _ interface{}, nodeInfo *schedulercache.NodeInf
// 5. If there are still ties, the first such node is picked (sort of randomly).
// The 'minNodes1' and 'minNodes2' are being reused here to save the memory
// allocation and garbage collection time.
func pickOneNodeForPreemption(nodesToVictims map[*v1.Node]*Victims) *v1.Node {
func pickOneNodeForPreemption(nodesToVictims map[*v1.Node]*schedulerapi.Victims) *v1.Node {
if len(nodesToVictims) == 0 {
return nil
}
@ -691,14 +709,14 @@ func pickOneNodeForPreemption(nodesToVictims map[*v1.Node]*Victims) *v1.Node {
var minNodes1 []*v1.Node
lenNodes1 := 0
for node, victims := range nodesToVictims {
if len(victims.pods) == 0 {
if len(victims.Pods) == 0 {
// We found a node that doesn't need any preemption. Return it!
// This should happen rarely when one or more pods are terminated between
// the time that scheduler tries to schedule the pod and the time that
// preemption logic tries to find nodes for preemption.
return node
}
numPDBViolatingPods := victims.numPDBViolations
numPDBViolatingPods := victims.NumPDBViolations
if numPDBViolatingPods < minNumPDBViolatingPods {
minNumPDBViolatingPods = numPDBViolatingPods
minNodes1 = nil
@ -722,7 +740,7 @@ func pickOneNodeForPreemption(nodesToVictims map[*v1.Node]*Victims) *v1.Node {
node := minNodes1[i]
victims := nodesToVictims[node]
// highestPodPriority is the highest priority among the victims on this node.
highestPodPriority := util.GetPodPriority(victims.pods[0])
highestPodPriority := util.GetPodPriority(victims.Pods[0])
if highestPodPriority < minHighestPriority {
minHighestPriority = highestPodPriority
lenNodes2 = 0
@ -743,7 +761,7 @@ func pickOneNodeForPreemption(nodesToVictims map[*v1.Node]*Victims) *v1.Node {
for i := 0; i < lenNodes2; i++ {
var sumPriorities int64
node := minNodes2[i]
for _, pod := range nodesToVictims[node].pods {
for _, pod := range nodesToVictims[node].Pods {
// We add MaxInt32+1 to all priorities to make all of them >= 0. This is
// needed so that a node with a few pods with negative priority is not
// picked over a node with a smaller number of pods with the same negative
@ -769,7 +787,7 @@ func pickOneNodeForPreemption(nodesToVictims map[*v1.Node]*Victims) *v1.Node {
lenNodes2 = 0
for i := 0; i < lenNodes1; i++ {
node := minNodes1[i]
numPods := len(nodesToVictims[node].pods)
numPods := len(nodesToVictims[node].Pods)
if numPods < minNumPods {
minNumPods = numPods
lenNodes2 = 0
@ -797,9 +815,9 @@ func selectNodesForPreemption(pod *v1.Pod,
metadataProducer algorithm.PredicateMetadataProducer,
queue SchedulingQueue,
pdbs []*policy.PodDisruptionBudget,
) (map[*v1.Node]*Victims, error) {
) (map[*v1.Node]*schedulerapi.Victims, error) {
nodeNameToVictims := map[*v1.Node]*Victims{}
nodeToVictims := map[*v1.Node]*schedulerapi.Victims{}
var resultLock sync.Mutex
// We can use the same metadata producer for all nodes.
@ -813,50 +831,16 @@ func selectNodesForPreemption(pod *v1.Pod,
pods, numPDBViolations, fits := selectVictimsOnNode(pod, metaCopy, nodeNameToInfo[nodeName], predicates, queue, pdbs)
if fits {
resultLock.Lock()
victims := Victims{
pods: pods,
numPDBViolations: numPDBViolations,
victims := schedulerapi.Victims{
Pods: pods,
NumPDBViolations: numPDBViolations,
}
nodeNameToVictims[potentialNodes[i]] = &victims
nodeToVictims[potentialNodes[i]] = &victims
resultLock.Unlock()
}
}
workqueue.Parallelize(16, len(potentialNodes), checkNode)
return nodeNameToVictims, nil
}
func nodePassesExtendersForPreemption(
pod *v1.Pod,
nodeName string,
victims []*v1.Pod,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
extenders []algorithm.SchedulerExtender) (bool, error) {
// If there are any extenders, run them and filter the list of candidate nodes.
if len(extenders) == 0 {
return true, nil
}
// Remove the victims from the corresponding nodeInfo and send nodes to the
// extenders for filtering.
originalNodeInfo := nodeNameToInfo[nodeName]
nodeInfoCopy := nodeNameToInfo[nodeName].Clone()
for _, victim := range victims {
nodeInfoCopy.RemovePod(victim)
}
nodeNameToInfo[nodeName] = nodeInfoCopy
defer func() { nodeNameToInfo[nodeName] = originalNodeInfo }()
filteredNodes := []*v1.Node{nodeInfoCopy.Node()}
for _, extender := range extenders {
var err error
var failedNodesMap map[string]string
filteredNodes, failedNodesMap, err = extender.Filter(pod, filteredNodes, nodeNameToInfo)
if err != nil {
return false, err
}
if _, found := failedNodesMap[nodeName]; found || len(filteredNodes) == 0 {
return false, nil
}
}
return true, nil
return nodeToVictims, nil
}
// filterPodsWithPDBViolation groups the given "pods" into two groups of "violatingPods"
@ -951,7 +935,7 @@ func selectVictimsOnNode(
// that we should check is if the "pod" is failing to schedule due to pod affinity
// failure.
// TODO(bsalamat): Consider checking affinity to lower priority pods if feasible with reasonable performance.
if fits, _, err := podFitsOnNode(pod, meta, nodeInfoCopy, fitPredicates, nil, queue, false, nil); !fits {
if fits, _, err := podFitsOnNode(pod, meta, nodeInfoCopy, fitPredicates, nil, nil, queue, false, nil); !fits {
if err != nil {
glog.Warningf("Encountered error while selecting victims on node %v: %v", nodeInfo.Node().Name, err)
}
@ -965,7 +949,7 @@ func selectVictimsOnNode(
violatingVictims, nonViolatingVictims := filterPodsWithPDBViolation(potentialVictims.Items, pdbs)
reprievePod := func(p *v1.Pod) bool {
addPod(p)
fits, _, _ := podFitsOnNode(pod, meta, nodeInfoCopy, fitPredicates, nil, queue, false, nil)
fits, _, _ := podFitsOnNode(pod, meta, nodeInfoCopy, fitPredicates, nil, nil, queue, false, nil)
if !fits {
removePod(p)
victims = append(victims, p)
@ -996,6 +980,7 @@ func nodesWherePreemptionMightHelp(pod *v1.Pod, nodes []*v1.Node, failedPredicat
// (which is the case today), the !found case should never happen, but we'd prefer
// to rely less on such assumptions in the code when checking does not impose
// significant overhead.
// Also, we currently assume all failures returned by extender as resolvable.
for _, failedPredicate := range failedPredicates {
switch failedPredicate {
case
@ -1082,7 +1067,9 @@ func NewGenericScheduler(
extenders []algorithm.SchedulerExtender,
volumeBinder *volumebinder.VolumeBinder,
pvcLister corelisters.PersistentVolumeClaimLister,
alwaysCheckAllPredicates bool) algorithm.ScheduleAlgorithm {
alwaysCheckAllPredicates bool,
disablePreemption bool,
) algorithm.ScheduleAlgorithm {
return &genericScheduler{
cache: cache,
equivalenceCache: eCache,
@ -1096,5 +1083,6 @@ func NewGenericScheduler(
volumeBinder: volumeBinder,
pvcLister: pvcLister,
alwaysCheckAllPredicates: alwaysCheckAllPredicates,
disablePreemption: disablePreemption,
}
}

View File

@ -30,6 +30,7 @@ import (
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
@ -37,7 +38,7 @@ import (
algorithmpriorities "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
@ -203,10 +204,10 @@ func TestGenericScheduler(t *testing.T) {
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
nodes: []string{"machine1", "machine2"},
expectsErr: true,
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
name: "test 1",
wErr: &FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
NumAllNodes: 2,
FailedPredicates: FailedPredicateMap{
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrFakePredicate},
@ -217,7 +218,7 @@ func TestGenericScheduler(t *testing.T) {
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "ignore"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "ignore", UID: types.UID("ignore")}},
expectedHosts: sets.NewString("machine1", "machine2"),
name: "test 2",
wErr: nil,
@ -227,7 +228,7 @@ func TestGenericScheduler(t *testing.T) {
predicates: map[string]algorithm.FitPredicate{"matches": matchesPredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine2"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine2", UID: types.UID("machine2")}},
expectedHosts: sets.NewString("machine2"),
name: "test 3",
wErr: nil,
@ -236,7 +237,7 @@ func TestGenericScheduler(t *testing.T) {
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"3", "2", "1"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "ignore"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "ignore", UID: types.UID("ignore")}},
expectedHosts: sets.NewString("3"),
name: "test 4",
wErr: nil,
@ -245,7 +246,7 @@ func TestGenericScheduler(t *testing.T) {
predicates: map[string]algorithm.FitPredicate{"matches": matchesPredicate},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"3", "2", "1"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
expectedHosts: sets.NewString("2"),
name: "test 5",
wErr: nil,
@ -254,7 +255,7 @@ func TestGenericScheduler(t *testing.T) {
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}, {Function: reverseNumericPriority, Weight: 2}},
nodes: []string{"3", "2", "1"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
expectedHosts: sets.NewString("1"),
name: "test 6",
wErr: nil,
@ -263,11 +264,11 @@ func TestGenericScheduler(t *testing.T) {
predicates: map[string]algorithm.FitPredicate{"true": truePredicate, "false": falsePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"3", "2", "1"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
expectsErr: true,
name: "test 7",
wErr: &FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
NumAllNodes: 3,
FailedPredicates: FailedPredicateMap{
"3": []algorithm.PredicateFailureReason{algorithmpredicates.ErrFakePredicate},
@ -283,7 +284,7 @@ func TestGenericScheduler(t *testing.T) {
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{Name: "2"},
ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")},
Spec: v1.PodSpec{
NodeName: "2",
},
@ -292,13 +293,13 @@ func TestGenericScheduler(t *testing.T) {
},
},
},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"1", "2"},
expectsErr: true,
name: "test 8",
wErr: &FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
NumAllNodes: 2,
FailedPredicates: FailedPredicateMap{
"1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrFakePredicate},
@ -313,7 +314,7 @@ func TestGenericScheduler(t *testing.T) {
nodes: []string{"machine1", "machine2"},
pvcs: []*v1.PersistentVolumeClaim{{ObjectMeta: metav1.ObjectMeta{Name: "existingPVC"}}},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "ignore"},
ObjectMeta: metav1.ObjectMeta{Name: "ignore", UID: types.UID("ignore")},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
@ -336,7 +337,7 @@ func TestGenericScheduler(t *testing.T) {
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "ignore"},
ObjectMeta: metav1.ObjectMeta{Name: "ignore", UID: types.UID("ignore")},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
@ -360,7 +361,7 @@ func TestGenericScheduler(t *testing.T) {
nodes: []string{"machine1", "machine2"},
pvcs: []*v1.PersistentVolumeClaim{{ObjectMeta: metav1.ObjectMeta{Name: "existingPVC", DeletionTimestamp: &metav1.Time{}}}},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "ignore"},
ObjectMeta: metav1.ObjectMeta{Name: "ignore", UID: types.UID("ignore")},
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
@ -383,10 +384,10 @@ func TestGenericScheduler(t *testing.T) {
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
alwaysCheckAllPredicates: true,
nodes: []string{"1"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
name: "test alwaysCheckAllPredicates is true",
wErr: &FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
NumAllNodes: 1,
FailedPredicates: FailedPredicateMap{
"1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrFakePredicate, algorithmpredicates.ErrFakePredicate},
@ -403,13 +404,23 @@ func TestGenericScheduler(t *testing.T) {
cache.AddNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: name}})
}
pvcs := []*v1.PersistentVolumeClaim{}
for _, pvc := range test.pvcs {
pvcs = append(pvcs, pvc)
}
pvcs = append(pvcs, test.pvcs...)
pvcLister := schedulertesting.FakePersistentVolumeClaimLister(pvcs)
scheduler := NewGenericScheduler(
cache, nil, NewSchedulingQueue(), test.predicates, algorithm.EmptyPredicateMetadataProducer, test.prioritizers, algorithm.EmptyPriorityMetadataProducer, []algorithm.SchedulerExtender{}, nil, pvcLister, test.alwaysCheckAllPredicates)
cache,
nil,
NewSchedulingQueue(),
test.predicates,
algorithm.EmptyPredicateMetadataProducer,
test.prioritizers,
algorithm.EmptyPriorityMetadataProducer,
[]algorithm.SchedulerExtender{},
nil,
pvcLister,
test.alwaysCheckAllPredicates,
false)
machine, err := scheduler.Schedule(test.pod, schedulertesting.FakeNodeLister(makeNodeList(test.nodes)))
if !reflect.DeepEqual(err, test.wErr) {
@ -421,16 +432,35 @@ func TestGenericScheduler(t *testing.T) {
}
}
func TestFindFitAllError(t *testing.T) {
// makeScheduler makes a simple genericScheduler for testing.
func makeScheduler(predicates map[string]algorithm.FitPredicate, nodes []*v1.Node) *genericScheduler {
algorithmpredicates.SetPredicatesOrdering(order)
nodes := []string{"3", "2", "1"}
predicates := map[string]algorithm.FitPredicate{"true": truePredicate, "false": falsePredicate}
nodeNameToInfo := map[string]*schedulercache.NodeInfo{
"3": schedulercache.NewNodeInfo(),
"2": schedulercache.NewNodeInfo(),
"1": schedulercache.NewNodeInfo(),
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
for _, n := range nodes {
cache.AddNode(n)
}
_, predicateMap, err := findNodesThatFit(&v1.Pod{}, nodeNameToInfo, makeNodeList(nodes), predicates, nil, algorithm.EmptyPredicateMetadataProducer, nil, nil, false)
prioritizers := []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}}
s := NewGenericScheduler(
cache,
nil,
NewSchedulingQueue(),
predicates,
algorithm.EmptyPredicateMetadataProducer,
prioritizers,
algorithm.EmptyPriorityMetadataProducer,
nil, nil, nil, false, false)
cache.UpdateNodeNameToInfoMap(s.(*genericScheduler).cachedNodeInfoMap)
return s.(*genericScheduler)
}
func TestFindFitAllError(t *testing.T) {
predicates := map[string]algorithm.FitPredicate{"true": truePredicate, "matches": matchesPredicate}
nodes := makeNodeList([]string{"3", "2", "1"})
scheduler := makeScheduler(predicates, nodes)
_, predicateMap, err := scheduler.findNodesThatFit(&v1.Pod{}, nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
@ -441,9 +471,9 @@ func TestFindFitAllError(t *testing.T) {
}
for _, node := range nodes {
failures, found := predicateMap[node]
failures, found := predicateMap[node.Name]
if !found {
t.Errorf("failed to find node: %s in %v", node, predicateMap)
t.Errorf("failed to find node: %s in %v", node.Name, predicateMap)
}
if len(failures) != 1 || failures[0] != algorithmpredicates.ErrFakePredicate {
t.Errorf("unexpected failures: %v", failures)
@ -452,20 +482,13 @@ func TestFindFitAllError(t *testing.T) {
}
func TestFindFitSomeError(t *testing.T) {
algorithmpredicates.SetPredicatesOrdering(order)
nodes := []string{"3", "2", "1"}
predicates := map[string]algorithm.FitPredicate{"true": truePredicate, "matches": matchesPredicate}
pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "1"}}
nodeNameToInfo := map[string]*schedulercache.NodeInfo{
"3": schedulercache.NewNodeInfo(),
"2": schedulercache.NewNodeInfo(),
"1": schedulercache.NewNodeInfo(pod),
}
for name := range nodeNameToInfo {
nodeNameToInfo[name].SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: name}})
}
nodes := makeNodeList([]string{"3", "2", "1"})
scheduler := makeScheduler(predicates, nodes)
pod := &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "1", UID: types.UID("1")}}
_, predicateMap, err := scheduler.findNodesThatFit(pod, nodes)
_, predicateMap, err := findNodesThatFit(pod, nodeNameToInfo, makeNodeList(nodes), predicates, nil, algorithm.EmptyPredicateMetadataProducer, nil, nil, false)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
@ -475,12 +498,12 @@ func TestFindFitSomeError(t *testing.T) {
}
for _, node := range nodes {
if node == pod.Name {
if node.Name == pod.Name {
continue
}
failures, found := predicateMap[node]
failures, found := predicateMap[node.Name]
if !found {
t.Errorf("failed to find node: %s in %v", node, predicateMap)
t.Errorf("failed to find node: %s in %v", node.Name, predicateMap)
}
if len(failures) != 1 || failures[0] != algorithmpredicates.ErrFakePredicate {
t.Errorf("unexpected failures: %v", failures)
@ -509,7 +532,7 @@ func makeNode(node string, milliCPU, memory int64) *v1.Node {
func TestHumanReadableFitError(t *testing.T) {
err := &FitError{
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2"}},
Pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "2", UID: types.UID("2")}},
NumAllNodes: 3,
FailedPredicates: FailedPredicateMap{
"1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnderMemoryPressure},
@ -660,11 +683,11 @@ func TestZeroRequest(t *testing.T) {
}
}
func printNodeToVictims(nodeToVictims map[*v1.Node]*Victims) string {
func printNodeToVictims(nodeToVictims map[*v1.Node]*schedulerapi.Victims) string {
var output string
for node, victims := range nodeToVictims {
output += node.Name + ": ["
for _, pod := range victims.pods {
for _, pod := range victims.Pods {
output += pod.Name + ", "
}
output += "]"
@ -672,15 +695,15 @@ func printNodeToVictims(nodeToVictims map[*v1.Node]*Victims) string {
return output
}
func checkPreemptionVictims(testName string, expected map[string]map[string]bool, nodeToPods map[*v1.Node]*Victims) error {
func checkPreemptionVictims(testName string, expected map[string]map[string]bool, nodeToPods map[*v1.Node]*schedulerapi.Victims) error {
if len(expected) == len(nodeToPods) {
for k, victims := range nodeToPods {
if expPods, ok := expected[k.Name]; ok {
if len(victims.pods) != len(expPods) {
if len(victims.Pods) != len(expPods) {
return fmt.Errorf("test [%v]: unexpected number of pods. expected: %v, got: %v", testName, expected, printNodeToVictims(nodeToPods))
}
prevPriority := int32(math.MaxInt32)
for _, p := range victims.pods {
for _, p := range victims.Pods {
// Check that pods are sorted by their priority.
if *p.Spec.Priority > prevPriority {
return fmt.Errorf("test [%v]: pod %v of node %v was not sorted by priority", testName, p.Name, k)
@ -778,74 +801,74 @@ func TestSelectNodesForPreemption(t *testing.T) {
name: "a pod that does not fit on any machine",
predicates: map[string]algorithm.FitPredicate{"matches": falsePredicate},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "new"}, Spec: v1.PodSpec{Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "new", UID: types.UID("new")}, Spec: v1.PodSpec{Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "a"}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine2"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "a", UID: types.UID("a")}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b", UID: types.UID("b")}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine2"}}},
expected: map[string]map[string]bool{},
},
{
name: "a pod that fits with no preemption",
predicates: map[string]algorithm.FitPredicate{"matches": truePredicate},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "new"}, Spec: v1.PodSpec{Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "new", UID: types.UID("new")}, Spec: v1.PodSpec{Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "a"}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine2"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "a", UID: types.UID("a")}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b", UID: types.UID("b")}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine2"}}},
expected: map[string]map[string]bool{"machine1": {}, "machine2": {}},
},
{
name: "a pod that fits on one machine with no preemption",
predicates: map[string]algorithm.FitPredicate{"matches": matchesPredicate},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "a"}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine2"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "a", UID: types.UID("a")}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b", UID: types.UID("b")}, Spec: v1.PodSpec{Priority: &midPriority, NodeName: "machine2"}}},
expected: map[string]map[string]bool{"machine1": {}},
},
{
name: "a pod that fits on both machines when lower priority pods are preempted",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "a"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "a", UID: types.UID("a")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b", UID: types.UID("b")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
expected: map[string]map[string]bool{"machine1": {"a": true}, "machine2": {"b": true}},
},
{
name: "a pod that would fit on the machines, but other pods running are higher priority",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &lowPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &lowPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "a"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "a", UID: types.UID("a")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b", UID: types.UID("b")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
expected: map[string]map[string]bool{},
},
{
name: "medium priority pod is preempted, but lower priority one stays as it is small",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "a"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "c"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "a", UID: types.UID("a")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b", UID: types.UID("b")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "c", UID: types.UID("c")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
expected: map[string]map[string]bool{"machine1": {"b": true}, "machine2": {"c": true}},
},
{
name: "mixed priority pods are preempted",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "a"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "c"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "d"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &highPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "e"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "a", UID: types.UID("a")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "b", UID: types.UID("b")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "c", UID: types.UID("c")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "d", UID: types.UID("d")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &highPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "e", UID: types.UID("e")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}}},
expected: map[string]map[string]bool{"machine1": {"b": true, "c": true}},
},
{
@ -873,9 +896,9 @@ func TestSelectNodesForPreemption(t *testing.T) {
},
},
}}}},
{ObjectMeta: metav1.ObjectMeta{Name: "b"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "d"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &highPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "e"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "b", UID: types.UID("b")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "d", UID: types.UID("d")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &highPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "e", UID: types.UID("e")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}}},
expected: map[string]map[string]bool{"machine1": {"a": true}, "machine2": {}},
addAffinityPredicate: true,
},
@ -883,7 +906,7 @@ func TestSelectNodesForPreemption(t *testing.T) {
for _, test := range tests {
nodes := []*v1.Node{}
for _, n := range test.nodes {
node := makeNode(n, priorityutil.DefaultMilliCPURequest*5, priorityutil.DefaultMemoryRequest*5)
node := makeNode(n, 1000*5, priorityutil.DefaultMemoryRequest*5)
node.ObjectMeta.Labels = map[string]string{"hostname": node.Name}
nodes = append(nodes, node)
}
@ -916,47 +939,47 @@ func TestPickOneNodeForPreemption(t *testing.T) {
name: "No node needs preemption",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}}},
expected: []string{"machine1"},
},
{
name: "a pod that fits on both machines when lower priority pods are preempted",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
expected: []string{"machine1", "machine2"},
},
{
name: "a pod that fits on a machine with no preemption",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}}},
expected: []string{"machine3"},
},
{
name: "machine with min highest priority pod is picked",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2", UID: types.UID("m2.2")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1", UID: types.UID("m3.1")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2", UID: types.UID("m3.2")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine3"}},
},
expected: []string{"machine3"},
},
@ -964,16 +987,16 @@ func TestPickOneNodeForPreemption(t *testing.T) {
name: "when highest priorities are the same, minimum sum of priorities is picked",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2", UID: types.UID("m2.2")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1", UID: types.UID("m3.1")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2", UID: types.UID("m3.2")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
},
expected: []string{"machine2"},
},
@ -981,19 +1004,19 @@ func TestPickOneNodeForPreemption(t *testing.T) {
name: "when highest priority and sum are the same, minimum number of pods is picked",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.4"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.3", UID: types.UID("m1.3")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.4", UID: types.UID("m1.4")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &negPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2", UID: types.UID("m2.2")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &negPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1", UID: types.UID("m3.1")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2", UID: types.UID("m3.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.3", UID: types.UID("m3.3")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
},
expected: []string{"machine2"},
},
@ -1003,18 +1026,18 @@ func TestPickOneNodeForPreemption(t *testing.T) {
name: "sum of adjusted priorities is considered",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &highPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.3", UID: types.UID("m1.3")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &negPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.2", UID: types.UID("m2.2")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &negPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1", UID: types.UID("m3.1")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2", UID: types.UID("m3.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.3", UID: types.UID("m3.3")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
},
expected: []string{"machine2"},
},
@ -1022,23 +1045,23 @@ func TestPickOneNodeForPreemption(t *testing.T) {
name: "non-overlapping lowest high priority, sum priorities, and number of pods",
predicates: map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
nodes: []string{"machine1", "machine2", "machine3", "machine4"},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &veryHighPriority}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{Containers: veryLargeContainers, Priority: &veryHighPriority}},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.3", UID: types.UID("m1.3")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.4"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1", UID: types.UID("m3.1")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.2", UID: types.UID("m3.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.3", UID: types.UID("m3.3")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.4", UID: types.UID("m3.4")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &lowPriority, NodeName: "machine3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine4"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine4"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.3"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine4"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.4"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine4"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.1", UID: types.UID("m4.1")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine4"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.2", UID: types.UID("m4.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine4"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.3", UID: types.UID("m4.3")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine4"}},
{ObjectMeta: metav1.ObjectMeta{Name: "m4.4", UID: types.UID("m4.4")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &negPriority, NodeName: "machine4"}},
},
expected: []string{"machine1"},
},
@ -1085,7 +1108,7 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.ErrTaintsTolerationsNotMatch},
"machine4": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeLabelPresenceViolated},
},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}},
expected: map[string]bool{},
},
{
@ -1095,7 +1118,7 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
"machine2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrPodNotMatchHostName},
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.ErrNodeUnschedulable},
},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}, Spec: v1.PodSpec{Affinity: &v1.Affinity{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
@ -1120,7 +1143,7 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
"machine1": []algorithm.PredicateFailureReason{algorithmpredicates.ErrPodAffinityNotMatch},
"machine2": []algorithm.PredicateFailureReason{algorithmpredicates.ErrPodNotMatchHostName},
},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}, Spec: v1.PodSpec{Affinity: &v1.Affinity{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{Affinity: &v1.Affinity{
PodAffinity: &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
@ -1164,7 +1187,7 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
"machine3": []algorithm.PredicateFailureReason{algorithmpredicates.NewInsufficientResourceError(v1.ResourceMemory, 1000, 600, 400)},
"machine4": []algorithm.PredicateFailureReason{},
},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}},
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}},
expected: map[string]bool{"machine3": true, "machine4": true},
},
}
@ -1203,47 +1226,45 @@ func TestPreempt(t *testing.T) {
}{
{
name: "basic preemption logic",
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}, Spec: v1.PodSpec{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{
Containers: veryLargeContainers,
Priority: &highPriority},
},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1"}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m3.1", UID: types.UID("m3.1")}, Spec: v1.PodSpec{Containers: mediumContainers, Priority: &midPriority, NodeName: "machine3"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
},
expectedNode: "machine1",
expectedPods: []string{"m1.1", "m1.2"},
},
{
name: "One node doesn't need any preemption",
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}, Spec: v1.PodSpec{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{
Containers: veryLargeContainers,
Priority: &highPriority},
},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &highPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
},
expectedNode: "machine3",
expectedPods: []string{},
},
{
name: "Scheduler extenders allow only machine1, otherwise machine3 would have been chosen",
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}, Spec: v1.PodSpec{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{
Containers: veryLargeContainers,
Priority: &highPriority},
},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
},
extenders: []*FakeExtender{
{
@ -1258,15 +1279,15 @@ func TestPreempt(t *testing.T) {
},
{
name: "Scheduler extenders do not allow any preemption",
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}, Spec: v1.PodSpec{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{
Containers: veryLargeContainers,
Priority: &highPriority},
},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2"}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1"}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
},
extenders: []*FakeExtender{
{
@ -1276,6 +1297,30 @@ func TestPreempt(t *testing.T) {
expectedNode: "",
expectedPods: []string{},
},
{
name: "One scheduler extender allows only machine1, the other returns error but ignorable. Only machine1 would be chosen",
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{
Containers: veryLargeContainers,
Priority: &highPriority},
},
pods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "m1.1", UID: types.UID("m1.1")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &midPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m1.2", UID: types.UID("m1.2")}, Spec: v1.PodSpec{Containers: smallContainers, Priority: &lowPriority, NodeName: "machine1"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
{ObjectMeta: metav1.ObjectMeta{Name: "m2.1", UID: types.UID("m2.1")}, Spec: v1.PodSpec{Containers: largeContainers, Priority: &midPriority, NodeName: "machine2"}, Status: v1.PodStatus{Phase: v1.PodRunning}},
},
extenders: []*FakeExtender{
{
predicates: []fitPredicate{errorPredicateExtender},
ignorable: true,
},
{
predicates: []fitPredicate{machine1PredicateExtender},
},
},
expectedNode: "machine1",
expectedPods: []string{"m1.1", "m1.2"},
},
}
for _, test := range tests {
@ -1284,15 +1329,35 @@ func TestPreempt(t *testing.T) {
for _, pod := range test.pods {
cache.AddPod(pod)
}
cachedNodeInfoMap := map[string]*schedulercache.NodeInfo{}
for _, name := range nodeNames {
cache.AddNode(makeNode(name, priorityutil.DefaultMilliCPURequest*5, priorityutil.DefaultMemoryRequest*5))
node := makeNode(name, 1000*5, priorityutil.DefaultMemoryRequest*5)
cache.AddNode(node)
// Set nodeInfo to extenders to mock extenders' cache for preemption.
cachedNodeInfo := schedulercache.NewNodeInfo()
cachedNodeInfo.SetNode(node)
cachedNodeInfoMap[name] = cachedNodeInfo
}
extenders := []algorithm.SchedulerExtender{}
for _, extender := range test.extenders {
// Set nodeInfoMap as extenders cached node information.
extender.cachedNodeNameToInfo = cachedNodeInfoMap
extenders = append(extenders, extender)
}
scheduler := NewGenericScheduler(
cache, nil, NewSchedulingQueue(), map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources}, algorithm.EmptyPredicateMetadataProducer, []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}}, algorithm.EmptyPriorityMetadataProducer, extenders, nil, schedulertesting.FakePersistentVolumeClaimLister{}, false)
cache,
nil,
NewSchedulingQueue(),
map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
algorithm.EmptyPredicateMetadataProducer,
[]algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
algorithm.EmptyPriorityMetadataProducer,
extenders,
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false,
false)
// Call Preempt and check the expected results.
node, victims, _, err := scheduler.Preempt(test.pod, schedulertesting.FakeNodeLister(makeNodeList(nodeNames)), error(&FitError{Pod: test.pod, FailedPredicates: failedPredMap}))
if err != nil {

View File

@ -29,8 +29,11 @@ package core
import (
"container/heap"
"fmt"
"reflect"
"sync"
"github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/cache"
@ -38,9 +41,6 @@ import (
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/util"
"github.com/golang/glog"
"reflect"
)
// SchedulingQueue is an interface for a queue to store pods waiting to be scheduled.
@ -57,6 +57,7 @@ type SchedulingQueue interface {
AssignedPodAdded(pod *v1.Pod)
AssignedPodUpdated(pod *v1.Pod)
WaitingPodsForNode(nodeName string) []*v1.Pod
WaitingPods() []*v1.Pod
}
// NewSchedulingQueue initializes a new scheduling queue. If pod priority is
@ -116,6 +117,15 @@ func (f *FIFO) Pop() (*v1.Pod, error) {
return result.(*v1.Pod), nil
}
// WaitingPods returns all the waiting pods in the queue.
func (f *FIFO) WaitingPods() []*v1.Pod {
result := []*v1.Pod{}
for _, pod := range f.FIFO.List() {
result = append(result, pod.(*v1.Pod))
}
return result
}
// FIFO does not need to react to events, as all pods are always in the active
// scheduling queue anyway.
@ -460,6 +470,21 @@ func (p *PriorityQueue) WaitingPodsForNode(nodeName string) []*v1.Pod {
return nil
}
// WaitingPods returns all the waiting pods in the queue.
func (p *PriorityQueue) WaitingPods() []*v1.Pod {
p.lock.Lock()
defer p.lock.Unlock()
result := []*v1.Pod{}
for _, pod := range p.activeQ.List() {
result = append(result, pod.(*v1.Pod))
}
for _, pod := range p.unschedulableQ.pods {
result = append(result, pod)
}
return result
}
// UnschedulablePodsMap holds pods that cannot be scheduled. This data structure
// is used to implement unschedulableQ.
type UnschedulablePodsMap struct {

View File

@ -9,14 +9,49 @@ load(
go_library(
name = "go_default_library",
srcs = [
"cache_comparer.go",
"factory.go",
"plugins.go",
],
] + select({
"@io_bazel_rules_go//go/platform:android": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:darwin": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:dragonfly": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:freebsd": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:linux": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:nacl": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:netbsd": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:openbsd": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:plan9": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:solaris": [
"signal.go",
],
"@io_bazel_rules_go//go/platform:windows": [
"signal_windows.go",
],
"//conditions:default": [],
}),
importpath = "k8s.io/kubernetes/pkg/scheduler/factory",
deps = [
"//pkg/api/v1/pod:go_default_library",
"//pkg/apis/core/helper:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler:go_default_library",
@ -25,8 +60,8 @@ go_library(
"//pkg/scheduler/algorithm/priorities:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/api/validation:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
@ -59,24 +94,29 @@ go_library(
go_test(
name = "go_default_test",
srcs = [
"cache_comparer_test.go",
"factory_test.go",
"plugins_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/api/testing:go_default_library",
"//pkg/scheduler:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/priorities:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/api/latest:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/core:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",

View File

@ -0,0 +1,161 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import (
"sort"
"strings"
"github.com/golang/glog"
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/labels"
corelisters "k8s.io/client-go/listers/core/v1"
v1beta1 "k8s.io/client-go/listers/policy/v1beta1"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/core"
)
type cacheComparer struct {
nodeLister corelisters.NodeLister
podLister corelisters.PodLister
pdbLister v1beta1.PodDisruptionBudgetLister
cache schedulercache.Cache
podQueue core.SchedulingQueue
compareStrategy
}
func (c *cacheComparer) Compare() error {
glog.V(3).Info("cache comparer started")
defer glog.V(3).Info("cache comparer finished")
nodes, err := c.nodeLister.List(labels.Everything())
if err != nil {
return err
}
pods, err := c.podLister.List(labels.Everything())
if err != nil {
return err
}
pdbs, err := c.pdbLister.List(labels.Everything())
if err != nil {
return err
}
snapshot := c.cache.Snapshot()
waitingPods := c.podQueue.WaitingPods()
if missed, redundant := c.CompareNodes(nodes, snapshot.Nodes); len(missed)+len(redundant) != 0 {
glog.Warningf("cache mismatch: missed nodes: %s; redundant nodes: %s", missed, redundant)
}
if missed, redundant := c.ComparePods(pods, waitingPods, snapshot.Nodes); len(missed)+len(redundant) != 0 {
glog.Warningf("cache mismatch: missed pods: %s; redundant pods: %s", missed, redundant)
}
if missed, redundant := c.ComparePdbs(pdbs, snapshot.Pdbs); len(missed)+len(redundant) != 0 {
glog.Warningf("cache mismatch: missed pdbs: %s; redundant pdbs: %s", missed, redundant)
}
return nil
}
type compareStrategy struct {
}
func (c compareStrategy) CompareNodes(nodes []*v1.Node, nodeinfos map[string]*schedulercache.NodeInfo) (missed, redundant []string) {
actual := []string{}
for _, node := range nodes {
actual = append(actual, node.Name)
}
cached := []string{}
for nodeName := range nodeinfos {
cached = append(cached, nodeName)
}
return compareStrings(actual, cached)
}
func (c compareStrategy) ComparePods(pods, waitingPods []*v1.Pod, nodeinfos map[string]*schedulercache.NodeInfo) (missed, redundant []string) {
actual := []string{}
for _, pod := range pods {
actual = append(actual, string(pod.UID))
}
cached := []string{}
for _, nodeinfo := range nodeinfos {
for _, pod := range nodeinfo.Pods() {
cached = append(cached, string(pod.UID))
}
}
for _, pod := range waitingPods {
cached = append(cached, string(pod.UID))
}
return compareStrings(actual, cached)
}
func (c compareStrategy) ComparePdbs(pdbs []*policy.PodDisruptionBudget, pdbCache map[string]*policy.PodDisruptionBudget) (missed, redundant []string) {
actual := []string{}
for _, pdb := range pdbs {
actual = append(actual, string(pdb.UID))
}
cached := []string{}
for pdbUID := range pdbCache {
cached = append(cached, pdbUID)
}
return compareStrings(actual, cached)
}
func compareStrings(actual, cached []string) (missed, redundant []string) {
missed, redundant = []string{}, []string{}
sort.Strings(actual)
sort.Strings(cached)
compare := func(i, j int) int {
if i == len(actual) {
return 1
} else if j == len(cached) {
return -1
}
return strings.Compare(actual[i], cached[j])
}
for i, j := 0, 0; i < len(actual) || j < len(cached); {
switch compare(i, j) {
case 0:
i++
j++
case -1:
missed = append(missed, actual[i])
i++
case 1:
redundant = append(redundant, cached[j])
j++
}
}
return
}

View File

@ -0,0 +1,228 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import (
"reflect"
"testing"
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/types"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestCompareNodes(t *testing.T) {
compare := compareStrategy{}
tests := []struct {
actual []string
cached []string
missing []string
redundant []string
}{
{
actual: []string{"foo", "bar"},
cached: []string{"bar", "foo", "foobar"},
missing: []string{},
redundant: []string{"foobar"},
},
{
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foo"},
missing: []string{"foobar"},
redundant: []string{},
},
{
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foobar", "foo"},
missing: []string{},
redundant: []string{},
},
}
for _, test := range tests {
nodes := []*v1.Node{}
for _, nodeName := range test.actual {
node := &v1.Node{}
node.Name = nodeName
nodes = append(nodes, node)
}
nodeInfo := make(map[string]*schedulercache.NodeInfo)
for _, nodeName := range test.cached {
nodeInfo[nodeName] = &schedulercache.NodeInfo{}
}
m, r := compare.CompareNodes(nodes, nodeInfo)
if !reflect.DeepEqual(m, test.missing) {
t.Errorf("missing expected to be %s; got %s", test.missing, m)
}
if !reflect.DeepEqual(r, test.redundant) {
t.Errorf("redundant expected to be %s; got %s", test.redundant, r)
}
}
}
func TestComparePods(t *testing.T) {
compare := compareStrategy{}
tests := []struct {
actual []string
cached []string
queued []string
missing []string
redundant []string
}{
{
actual: []string{"foo", "bar"},
cached: []string{"bar", "foo", "foobar"},
queued: []string{},
missing: []string{},
redundant: []string{"foobar"},
},
{
actual: []string{"foo", "bar"},
cached: []string{"foo", "foobar"},
queued: []string{"bar"},
missing: []string{},
redundant: []string{"foobar"},
},
{
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foo"},
queued: []string{},
missing: []string{"foobar"},
redundant: []string{},
},
{
actual: []string{"foo", "bar", "foobar"},
cached: []string{"foo"},
queued: []string{"bar"},
missing: []string{"foobar"},
redundant: []string{},
},
{
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foobar", "foo"},
queued: []string{},
missing: []string{},
redundant: []string{},
},
{
actual: []string{"foo", "bar", "foobar"},
cached: []string{"foobar", "foo"},
queued: []string{"bar"},
missing: []string{},
redundant: []string{},
},
}
for _, test := range tests {
pods := []*v1.Pod{}
for _, uid := range test.actual {
pod := &v1.Pod{}
pod.UID = types.UID(uid)
pods = append(pods, pod)
}
queuedPods := []*v1.Pod{}
for _, uid := range test.queued {
pod := &v1.Pod{}
pod.UID = types.UID(uid)
queuedPods = append(queuedPods, pod)
}
nodeInfo := make(map[string]*schedulercache.NodeInfo)
for _, uid := range test.cached {
pod := &v1.Pod{}
pod.UID = types.UID(uid)
pod.Namespace = "ns"
pod.Name = uid
nodeInfo[uid] = schedulercache.NewNodeInfo(pod)
}
m, r := compare.ComparePods(pods, queuedPods, nodeInfo)
if !reflect.DeepEqual(m, test.missing) {
t.Errorf("missing expected to be %s; got %s", test.missing, m)
}
if !reflect.DeepEqual(r, test.redundant) {
t.Errorf("redundant expected to be %s; got %s", test.redundant, r)
}
}
}
func TestComparePdbs(t *testing.T) {
compare := compareStrategy{}
tests := []struct {
actual []string
cached []string
missing []string
redundant []string
}{
{
actual: []string{"foo", "bar"},
cached: []string{"bar", "foo", "foobar"},
missing: []string{},
redundant: []string{"foobar"},
},
{
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foo"},
missing: []string{"foobar"},
redundant: []string{},
},
{
actual: []string{"foo", "bar", "foobar"},
cached: []string{"bar", "foobar", "foo"},
missing: []string{},
redundant: []string{},
},
}
for _, test := range tests {
pdbs := []*policy.PodDisruptionBudget{}
for _, uid := range test.actual {
pdb := &policy.PodDisruptionBudget{}
pdb.UID = types.UID(uid)
pdbs = append(pdbs, pdb)
}
cache := make(map[string]*policy.PodDisruptionBudget)
for _, uid := range test.cached {
pdb := &policy.PodDisruptionBudget{}
pdb.UID = types.UID(uid)
cache[uid] = pdb
}
m, r := compare.ComparePdbs(pdbs, cache)
if !reflect.DeepEqual(m, test.missing) {
t.Errorf("missing expected to be %s; got %s", test.missing, m)
}
if !reflect.DeepEqual(r, test.redundant) {
t.Errorf("redundant expected to be %s; got %s", test.redundant, r)
}
}
}

View File

@ -20,6 +20,8 @@ package factory
import (
"fmt"
"os"
"os/signal"
"reflect"
"time"
@ -50,7 +52,6 @@ import (
"k8s.io/client-go/tools/cache"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/apis/core/helper"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler"
@ -58,8 +59,8 @@ import (
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/api/validation"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
)
@ -113,7 +114,7 @@ type configFactory struct {
schedulerCache schedulercache.Cache
// SchedulerName of a scheduler is used to select which pods will be
// processed by this scheduler, based on pods's "spec.SchedulerName".
// processed by this scheduler, based on pods's "spec.schedulerName".
schedulerName string
// RequiredDuringScheduling affinity is not symmetric, but there is an implicit PreferredDuringScheduling affinity rule
@ -132,6 +133,9 @@ type configFactory struct {
// always check all predicates even if the middle of one predicate fails.
alwaysCheckAllPredicates bool
// Disable pod preemption or not.
disablePreemption bool
}
// NewConfigFactory initializes the default implementation of a Configurator To encourage eventual privatization of the struct type, we only
@ -151,6 +155,7 @@ func NewConfigFactory(
storageClassInformer storageinformers.StorageClassInformer,
hardPodAffinitySymmetricWeight int32,
enableEquivalenceClassCache bool,
disablePreemption bool,
) scheduler.Configurator {
stopEverything := make(chan struct{})
schedulerCache := schedulercache.New(30*time.Second, stopEverything)
@ -178,6 +183,7 @@ func NewConfigFactory(
schedulerName: schedulerName,
hardPodAffinitySymmetricWeight: hardPodAffinitySymmetricWeight,
enableEquivalenceClassCache: enableEquivalenceClassCache,
disablePreemption: disablePreemption,
}
c.scheduledPodsHasSynced = podInformer.Informer().HasSynced
@ -212,10 +218,10 @@ func NewConfigFactory(
FilterFunc: func(obj interface{}) bool {
switch t := obj.(type) {
case *v1.Pod:
return unassignedNonTerminatedPod(t)
return unassignedNonTerminatedPod(t) && responsibleForPod(t, schedulerName)
case cache.DeletedFinalStateUnknown:
if pod, ok := t.Obj.(*v1.Pod); ok {
return unassignedNonTerminatedPod(pod)
return unassignedNonTerminatedPod(pod) && responsibleForPod(pod, schedulerName)
}
runtime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, c))
return false
@ -292,9 +298,32 @@ func NewConfigFactory(
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
// Setup volume binder
c.volumeBinder = volumebinder.NewVolumeBinder(client, pvcInformer, pvInformer, nodeInformer, storageClassInformer)
c.volumeBinder = volumebinder.NewVolumeBinder(client, pvcInformer, pvInformer, storageClassInformer)
}
// Setup cache comparer
comparer := &cacheComparer{
podLister: podInformer.Lister(),
nodeLister: nodeInformer.Lister(),
pdbLister: pdbInformer.Lister(),
cache: c.schedulerCache,
podQueue: c.podQueue,
}
ch := make(chan os.Signal, 1)
signal.Notify(ch, compareSignal)
go func() {
for {
select {
case <-c.StopEverything:
return
case <-ch:
comparer.Compare()
}
}
}()
return c
}
@ -381,23 +410,6 @@ func (c *configFactory) invalidatePredicatesForPvUpdate(oldPV, newPV *v1.Persist
break
}
}
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
oldAffinity, err := v1helper.GetStorageNodeAffinityFromAnnotation(oldPV.Annotations)
if err != nil {
glog.Errorf("cannot get node affinity fo *v1.PersistentVolume: %v", oldPV)
return
}
newAffinity, err := v1helper.GetStorageNodeAffinityFromAnnotation(newPV.Annotations)
if err != nil {
glog.Errorf("cannot get node affinity fo *v1.PersistentVolume: %v", newPV)
return
}
// If node affinity of PV is changed.
if !reflect.DeepEqual(oldAffinity, newAffinity) {
invalidPredicates.Insert(predicates.CheckVolumeBindingPred)
}
}
c.equivalencePodCache.InvalidateCachedPredicateItemOfAllNodes(invalidPredicates)
}
@ -622,6 +634,9 @@ func (c *configFactory) updatePodInCache(oldObj, newObj interface{}) {
return
}
// NOTE: Because the scheduler uses snapshots of schedulerCache and the live
// version of equivalencePodCache, updates must be written to schedulerCache
// before invalidating equivalencePodCache.
if err := c.schedulerCache.UpdatePod(oldPod, newPod); err != nil {
glog.Errorf("scheduler cache UpdatePod failed: %v", err)
}
@ -708,6 +723,9 @@ func (c *configFactory) deletePodFromCache(obj interface{}) {
glog.Errorf("cannot convert to *v1.Pod: %v", t)
return
}
// NOTE: Because the scheduler uses snapshots of schedulerCache and the live
// version of equivalencePodCache, updates must be written to schedulerCache
// before invalidating equivalencePodCache.
if err := c.schedulerCache.RemovePod(pod); err != nil {
glog.Errorf("scheduler cache RemovePod failed: %v", err)
}
@ -764,6 +782,9 @@ func (c *configFactory) updateNodeInCache(oldObj, newObj interface{}) {
return
}
// NOTE: Because the scheduler uses snapshots of schedulerCache and the live
// version of equivalencePodCache, updates must be written to schedulerCache
// before invalidating equivalencePodCache.
if err := c.schedulerCache.UpdateNode(oldNode, newNode); err != nil {
glog.Errorf("scheduler cache UpdateNode failed: %v", err)
}
@ -825,6 +846,9 @@ func (c *configFactory) invalidateCachedPredicatesOnNodeUpdate(newNode *v1.Node,
if oldConditions[v1.NodeDiskPressure] != newConditions[v1.NodeDiskPressure] {
invalidPredicates.Insert(predicates.CheckNodeDiskPressurePred)
}
if oldConditions[v1.NodePIDPressure] != newConditions[v1.NodePIDPressure] {
invalidPredicates.Insert(predicates.CheckNodePIDPressurePred)
}
if oldConditions[v1.NodeReady] != newConditions[v1.NodeReady] ||
oldConditions[v1.NodeOutOfDisk] != newConditions[v1.NodeOutOfDisk] ||
oldConditions[v1.NodeNetworkUnavailable] != newConditions[v1.NodeNetworkUnavailable] {
@ -854,6 +878,9 @@ func (c *configFactory) deleteNodeFromCache(obj interface{}) {
glog.Errorf("cannot convert to *v1.Node: %v", t)
return
}
// NOTE: Because the scheduler uses snapshots of schedulerCache and the live
// version of equivalencePodCache, updates must be written to schedulerCache
// before invalidating equivalencePodCache.
if err := c.schedulerCache.RemoveNode(node); err != nil {
glog.Errorf("scheduler cache RemoveNode failed: %v", err)
}
@ -1046,18 +1073,25 @@ func (c *configFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
}
// Init equivalence class cache
if c.enableEquivalenceClassCache && getEquivalencePodFuncFactory != nil {
pluginArgs, err := c.getPluginArgs()
if err != nil {
return nil, err
}
c.equivalencePodCache = core.NewEquivalenceCache(
getEquivalencePodFuncFactory(*pluginArgs),
)
if c.enableEquivalenceClassCache {
c.equivalencePodCache = core.NewEquivalenceCache()
glog.Info("Created equivalence class cache")
}
algo := core.NewGenericScheduler(c.schedulerCache, c.equivalencePodCache, c.podQueue, predicateFuncs, predicateMetaProducer, priorityConfigs, priorityMetaProducer, extenders, c.volumeBinder, c.pVCLister, c.alwaysCheckAllPredicates)
algo := core.NewGenericScheduler(
c.schedulerCache,
c.equivalencePodCache,
c.podQueue,
predicateFuncs,
predicateMetaProducer,
priorityConfigs,
priorityMetaProducer,
extenders,
c.volumeBinder,
c.pVCLister,
c.alwaysCheckAllPredicates,
c.disablePreemption,
)
podBackoff := util.CreateDefaultPodBackoff()
return &scheduler.Config{
@ -1173,6 +1207,11 @@ func assignedNonTerminatedPod(pod *v1.Pod) bool {
return true
}
// responsibleForPod returns true if the pod has asked to be scheduled by the given scheduler.
func responsibleForPod(pod *v1.Pod, schedulerName string) bool {
return schedulerName == pod.Spec.SchedulerName
}
// assignedPodLister filters the pods returned from a PodLister to
// only include those that have a node name set.
type assignedPodLister struct {
@ -1245,10 +1284,9 @@ func (i *podInformer) Lister() corelisters.PodLister {
}
// NewPodInformer creates a shared index informer that returns only non-terminal pods.
func NewPodInformer(client clientset.Interface, resyncPeriod time.Duration, schedulerName string) coreinformers.PodInformer {
func NewPodInformer(client clientset.Interface, resyncPeriod time.Duration) coreinformers.PodInformer {
selector := fields.ParseSelectorOrDie(
"spec.schedulerName=" + schedulerName +
",status.phase!=" + string(v1.PodSucceeded) +
"status.phase!=" + string(v1.PodSucceeded) +
",status.phase!=" + string(v1.PodFailed))
lw := cache.NewListWatchFromClient(client.CoreV1().RESTClient(), string(v1.ResourcePods), metav1.NamespaceAll, selector)
return &podInformer{
@ -1271,6 +1309,9 @@ func (c *configFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue
_, err := c.client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
if err != nil && errors.IsNotFound(err) {
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}
// NOTE: Because the scheduler uses snapshots of schedulerCache and the live
// version of equivalencePodCache, updates must be written to schedulerCache
// before invalidating equivalencePodCache.
c.schedulerCache.RemoveNode(&node)
// invalidate cached predicate for the node
if c.enableEquivalenceClassCache {

View File

@ -28,25 +28,28 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
utiltesting "k8s.io/client-go/util/testing"
"k8s.io/kubernetes/pkg/api/legacyscheme"
apitesting "k8s.io/kubernetes/pkg/api/testing"
"k8s.io/kubernetes/pkg/scheduler"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
latestschedulerapi "k8s.io/kubernetes/pkg/scheduler/api/latest"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
"k8s.io/kubernetes/pkg/scheduler/util"
)
const enableEquivalenceCache = true
const (
enableEquivalenceCache = true
disablePodPreemption = false
)
func TestCreate(t *testing.T) {
handler := utiltesting.FakeHandler{
@ -56,7 +59,7 @@ func TestCreate(t *testing.T) {
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
factory.Create()
}
@ -74,7 +77,7 @@ func TestCreateFromConfig(t *testing.T) {
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
// Pre-register some predicate and priority functions
@ -119,7 +122,7 @@ func TestCreateFromConfigWithHardPodAffinitySymmetricWeight(t *testing.T) {
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
// Pre-register some predicate and priority functions
@ -165,7 +168,7 @@ func TestCreateFromEmptyConfig(t *testing.T) {
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
configData = []byte(`{}`)
@ -187,7 +190,7 @@ func TestCreateFromConfigWithUnspecifiedPredicatesOrPriorities(t *testing.T) {
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
RegisterFitPredicate("PredicateOne", PredicateOne)
@ -227,7 +230,7 @@ func TestCreateFromConfigWithEmptyPredicatesOrPriorities(t *testing.T) {
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
RegisterFitPredicate("PredicateOne", PredicateOne)
@ -281,16 +284,16 @@ func TestDefaultErrorFunc(t *testing.T) {
}
handler := utiltesting.FakeHandler{
StatusCode: 200,
ResponseBody: runtime.EncodeOrDie(util.Test.Codec(), testPod),
ResponseBody: runtime.EncodeOrDie(schedulertesting.Test.Codec(), testPod),
T: t,
}
mux := http.NewServeMux()
// FakeHandler mustn't be sent requests other than the one you want to test.
mux.Handle(util.Test.ResourcePath(string(v1.ResourcePods), "bar", "foo"), &handler)
mux.Handle(schedulertesting.Test.ResourcePath(string(v1.ResourcePods), "bar", "foo"), &handler)
server := httptest.NewServer(mux)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
factory := newConfigFactory(client, v1.DefaultHardPodAffinitySymmetricWeight)
queue := &core.FIFO{FIFO: cache.NewFIFO(cache.MetaNamespaceKeyFunc)}
podBackoff := util.CreatePodBackoff(1*time.Millisecond, 1*time.Second)
@ -306,7 +309,7 @@ func TestDefaultErrorFunc(t *testing.T) {
if !exists {
continue
}
handler.ValidateRequest(t, util.Test.ResourcePath(string(v1.ResourcePods), "bar", "foo"), "GET", nil)
handler.ValidateRequest(t, schedulertesting.Test.ResourcePath(string(v1.ResourcePods), "bar", "foo"), "GET", nil)
if e, a := testPod, got; !reflect.DeepEqual(e, a) {
t.Errorf("Expected %v, got %v", e, a)
}
@ -361,16 +364,16 @@ func TestBind(t *testing.T) {
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
b := binder{client}
if err := b.Bind(item.binding); err != nil {
t.Errorf("Unexpected error: %v", err)
continue
}
expectedBody := runtime.EncodeOrDie(util.Test.Codec(), item.binding)
expectedBody := runtime.EncodeOrDie(schedulertesting.Test.Codec(), item.binding)
handler.ValidateRequest(t,
util.Test.SubResourcePath(string(v1.ResourcePods), metav1.NamespaceDefault, "foo", "binding"),
schedulertesting.Test.SubResourcePath(string(v1.ResourcePods), metav1.NamespaceDefault, "foo", "binding"),
"POST", &expectedBody)
}
}
@ -383,7 +386,7 @@ func TestInvalidHardPodAffinitySymmetricWeight(t *testing.T) {
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
// factory of "default-scheduler"
factory := newConfigFactory(client, -1)
_, err := factory.Create()
@ -400,7 +403,7 @@ func TestInvalidFactoryArgs(t *testing.T) {
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &legacyscheme.Registry.GroupOrDie(v1.GroupName).GroupVersion}})
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
testCases := []struct {
hardPodAffinitySymmetricWeight int32
@ -533,19 +536,44 @@ func newConfigFactory(client *clientset.Clientset, hardPodAffinitySymmetricWeigh
informerFactory.Storage().V1().StorageClasses(),
hardPodAffinitySymmetricWeight,
enableEquivalenceCache,
disablePodPreemption,
)
}
type fakeExtender struct {
isBinder bool
interestedPodName string
ignorable bool
}
func (f *fakeExtender) Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error) {
func (f *fakeExtender) IsIgnorable() bool {
return f.ignorable
}
func (f *fakeExtender) ProcessPreemption(
pod *v1.Pod,
nodeToVictims map[*v1.Node]*schedulerapi.Victims,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
) (map[*v1.Node]*schedulerapi.Victims, error) {
return nil, nil
}
func (f *fakeExtender) SupportsPreemption() bool {
return false
}
func (f *fakeExtender) Filter(
pod *v1.Pod,
nodes []*v1.Node,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error) {
return nil, nil, nil
}
func (f *fakeExtender) Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *schedulerapi.HostPriorityList, weight int, err error) {
func (f *fakeExtender) Prioritize(
pod *v1.Pod,
nodes []*v1.Node,
) (hostPriorities *schedulerapi.HostPriorityList, weight int, err error) {
return nil, 0, nil
}

View File

@ -75,9 +75,6 @@ type PriorityConfigFactory struct {
Weight int
}
// EquivalencePodFuncFactory produces a function to get equivalence class for given pod.
type EquivalencePodFuncFactory func(PluginFactoryArgs) algorithm.GetEquivalencePodFunc
var (
schedulerFactoryMutex sync.Mutex
@ -90,9 +87,6 @@ var (
// Registered metadata producers
priorityMetadataProducer PriorityMetadataProducerFactory
predicateMetadataProducer PredicateMetadataProducerFactory
// get equivalence pod function
getEquivalencePodFuncFactory EquivalencePodFuncFactory
)
const (
@ -328,6 +322,15 @@ func RegisterCustomPriorityFunction(policy schedulerapi.PriorityPolicy) string {
},
Weight: policy.Weight,
}
} else if policy.Argument.RequestedToCapacityRatioArguments != nil {
pcf = &PriorityConfigFactory{
MapReduceFunction: func(args PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
scoringFunctionShape := buildScoringFunctionShapeFromRequestedToCapacityRatioArguments(policy.Argument.RequestedToCapacityRatioArguments)
p := priorities.RequestedToCapacityRatioResourceAllocationPriority(scoringFunctionShape)
return p.PriorityMap, nil
},
Weight: policy.Weight,
}
}
} else if existingPcf, ok := priorityFunctionMap[policy.Name]; ok {
glog.V(2).Infof("Priority type %s already registered, reusing.", policy.Name)
@ -346,9 +349,17 @@ func RegisterCustomPriorityFunction(policy schedulerapi.PriorityPolicy) string {
return RegisterPriorityConfigFactory(policy.Name, *pcf)
}
// RegisterGetEquivalencePodFunction registers equivalenceFuncFactory to produce equivalence class for given pod.
func RegisterGetEquivalencePodFunction(equivalenceFuncFactory EquivalencePodFuncFactory) {
getEquivalencePodFuncFactory = equivalenceFuncFactory
func buildScoringFunctionShapeFromRequestedToCapacityRatioArguments(arguments *schedulerapi.RequestedToCapacityRatioArguments) priorities.FunctionShape {
n := len(arguments.UtilizationShape)
points := make([]priorities.FunctionShapePoint, 0, n)
for _, point := range arguments.UtilizationShape {
points = append(points, priorities.FunctionShapePoint{Utilization: int64(point.Utilization), Score: int64(point.Score)})
}
shape, err := priorities.NewFunctionShape(points)
if err != nil {
glog.Fatalf("invalid RequestedToCapacityRatioPriority arguments: %s", err.Error())
}
return shape
}
// IsPriorityFunctionRegistered is useful for testing providers.
@ -505,6 +516,9 @@ func validatePriorityOrDie(priority schedulerapi.PriorityPolicy) {
if priority.Argument.LabelPreference != nil {
numArgs++
}
if priority.Argument.RequestedToCapacityRatioArguments != nil {
numArgs++
}
if numArgs != 1 {
glog.Fatalf("Exactly 1 priority argument is required, numArgs: %v, Priority: %s", numArgs, priority.Name)
}

View File

@ -19,7 +19,9 @@ package factory
import (
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/priorities"
"k8s.io/kubernetes/pkg/scheduler/api"
)
@ -80,3 +82,19 @@ func TestValidatePriorityConfigOverFlow(t *testing.T) {
}
}
}
func TestBuildScoringFunctionShapeFromRequestedToCapacityRatioArguments(t *testing.T) {
arguments := api.RequestedToCapacityRatioArguments{
UtilizationShape: []api.UtilizationShapePoint{
{Utilization: 10, Score: 1},
{Utilization: 30, Score: 5},
{Utilization: 70, Score: 2},
}}
builtShape := buildScoringFunctionShapeFromRequestedToCapacityRatioArguments(&arguments)
expectedShape, _ := priorities.NewFunctionShape([]priorities.FunctionShapePoint{
{Utilization: 10, Score: 1},
{Utilization: 30, Score: 5},
{Utilization: 70, Score: 2},
})
assert.Equal(t, expectedShape, builtShape)
}

View File

@ -0,0 +1,25 @@
// +build !windows
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import "syscall"
// compareSignal is the signal to trigger cache compare. For non-windows
// environment it's SIGUSR2.
var compareSignal = syscall.SIGUSR2

View File

@ -0,0 +1,23 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import "os"
// compareSignal is the signal to trigger cache compare. For windows,
// it's SIGINT.
var compareSignal = os.Interrupt

View File

@ -23,13 +23,43 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const schedulerSubsystem = "scheduler"
const (
// SchedulerSubsystem - subsystem name used by scheduler
SchedulerSubsystem = "scheduler"
// SchedulingLatencyName - scheduler latency metric name
SchedulingLatencyName = "scheduling_latency_seconds"
// OperationLabel - operation label name
OperationLabel = "operation"
// Below are possible values for the operation label. Each represents a substep of e2e scheduling:
// PredicateEvaluation - predicate evaluation operation label value
PredicateEvaluation = "predicate_evaluation"
// PriorityEvaluation - priority evaluation operation label value
PriorityEvaluation = "priority_evaluation"
// PreemptionEvaluation - preemption evaluation operation label value (occurs in case of scheduling fitError).
PreemptionEvaluation = "preemption_evaluation"
// Binding - binding operation label value
Binding = "binding"
// E2eScheduling - e2e scheduling operation label value
)
// All the histogram based metrics have 1ms as size for the smallest bucket.
var (
SchedulingLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: SchedulerSubsystem,
Name: SchedulingLatencyName,
Help: "Scheduling latency in seconds split by sub-parts of the scheduling operation",
// Make the sliding window of 5h.
// TODO: The value for this should be based on some SLI definition (long term).
MaxAge: 5 * time.Hour,
},
[]string{OperationLabel},
)
E2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "e2e_scheduling_latency_microseconds",
Help: "E2e scheduling latency (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@ -37,7 +67,7 @@ var (
)
SchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_latency_microseconds",
Help: "Scheduling algorithm latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@ -45,7 +75,7 @@ var (
)
SchedulingAlgorithmPredicateEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_predicate_evaluation",
Help: "Scheduling algorithm predicate evaluation duration",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@ -53,7 +83,7 @@ var (
)
SchedulingAlgorithmPriorityEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_priority_evaluation",
Help: "Scheduling algorithm priority evaluation duration",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@ -61,7 +91,7 @@ var (
)
SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "scheduling_algorithm_preemption_evaluation",
Help: "Scheduling algorithm preemption evaluation duration",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@ -69,7 +99,7 @@ var (
)
BindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "binding_latency_microseconds",
Help: "Binding latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
@ -77,16 +107,27 @@ var (
)
PreemptionVictims = prometheus.NewGauge(
prometheus.GaugeOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "pod_preemption_victims",
Help: "Number of selected preemption victims",
})
PreemptionAttempts = prometheus.NewCounter(
prometheus.CounterOpts{
Subsystem: schedulerSubsystem,
Subsystem: SchedulerSubsystem,
Name: "total_preemption_attempts",
Help: "Total preemption attempts in the cluster till now",
})
metricsList = []prometheus.Collector{
SchedulingLatency,
E2eSchedulingLatency,
SchedulingAlgorithmLatency,
BindingLatency,
SchedulingAlgorithmPredicateEvaluationDuration,
SchedulingAlgorithmPriorityEvaluationDuration,
SchedulingAlgorithmPremptionEvaluationDuration,
PreemptionVictims,
PreemptionAttempts,
}
)
var registerMetrics sync.Once
@ -95,19 +136,23 @@ var registerMetrics sync.Once
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
prometheus.MustRegister(E2eSchedulingLatency)
prometheus.MustRegister(SchedulingAlgorithmLatency)
prometheus.MustRegister(BindingLatency)
prometheus.MustRegister(SchedulingAlgorithmPredicateEvaluationDuration)
prometheus.MustRegister(SchedulingAlgorithmPriorityEvaluationDuration)
prometheus.MustRegister(SchedulingAlgorithmPremptionEvaluationDuration)
prometheus.MustRegister(PreemptionVictims)
prometheus.MustRegister(PreemptionAttempts)
for _, metric := range metricsList {
prometheus.MustRegister(metric)
}
})
}
// Reset resets metrics
func Reset() {
SchedulingLatency.Reset()
}
// SinceInMicroseconds gets the time since the specified start in microseconds.
func SinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
}
// SinceInSeconds gets the time since the specified start in seconds.
func SinceInSeconds(start time.Time) float64 {
return time.Since(start).Seconds()
}

View File

@ -32,9 +32,9 @@ import (
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/metrics"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
@ -137,6 +137,9 @@ type Config struct {
// VolumeBinder handles PVC/PV binding for the pod.
VolumeBinder *volumebinder.VolumeBinder
// Disable pod preemption or not.
DisablePreemption bool
}
// NewFromConfigurator returns a new scheduler that is created entirely by the Configurator. Assumes Create() is implemented.
@ -207,8 +210,9 @@ func (sched *Scheduler) schedule(pod *v1.Pod) (string, error) {
// If it succeeds, it adds the name of the node where preemption has happened to the pod annotations.
// It returns the node name and an error if any.
func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, error) {
if !util.PodPriorityEnabled() {
glog.V(3).Infof("Pod priority feature is not enabled. No preemption is performed.")
if !util.PodPriorityEnabled() || sched.config.DisablePreemption {
glog.V(3).Infof("Pod priority feature is not enabled or preemption is disabled by scheduler configuration." +
" No preemption is performed.")
return "", nil
}
preemptor, err := sched.config.PodPreemptor.GetUpdatedPod(preemptor)
@ -328,7 +332,7 @@ func (sched *Scheduler) bindVolumesWorker() {
// The Pod is always sent back to the scheduler afterwards.
err := sched.config.VolumeBinder.Binder.BindPodVolumes(assumed)
if err != nil {
glog.V(1).Infof("Failed to bind volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name, err)
glog.V(1).Infof("Failed to bind volumes for pod \"%v/%v\": %v", assumed.Namespace, assumed.Name, err)
reason = "VolumeBindingFailed"
eventType = v1.EventTypeWarning
} else {
@ -369,6 +373,9 @@ func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
// If the binding fails, scheduler will release resources allocated to assumed pod
// immediately.
assumed.Spec.NodeName = host
// NOTE: Because the scheduler uses snapshots of SchedulerCache and the live
// version of Ecache, updates must be written to SchedulerCache before
// invalidating Ecache.
if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil {
glog.Errorf("scheduler cache AssumePod failed: %v", err)
@ -423,7 +430,8 @@ func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
}
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", assumed.Name, b.Target.Name)
metrics.SchedulingLatency.WithLabelValues(metrics.Binding).Observe(metrics.SinceInSeconds(bindingStart))
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", assumed.Namespace, assumed.Name, b.Target.Name)
return nil
}
@ -451,6 +459,7 @@ func (sched *Scheduler) scheduleOne() {
sched.preempt(pod, fitError)
metrics.PreemptionAttempts.Inc()
metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
metrics.SchedulingLatency.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime))
}
return
}

View File

@ -27,6 +27,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -36,10 +37,9 @@ import (
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/scheduler/core"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
"k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
)
@ -75,7 +75,11 @@ func (fp fakePodPreemptor) RemoveNominatedNodeName(pod *v1.Pod) error {
func podWithID(id, desiredHost string) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: id, SelfLink: util.Test.SelfLink(string(v1.ResourcePods), id)},
ObjectMeta: metav1.ObjectMeta{
Name: id,
UID: types.UID(id),
SelfLink: schedulertesting.Test.SelfLink(string(v1.ResourcePods), id),
},
Spec: v1.PodSpec{
NodeName: desiredHost,
},
@ -85,7 +89,12 @@ func podWithID(id, desiredHost string) *v1.Pod {
func deletingPod(id string) *v1.Pod {
deletionTimestamp := metav1.Now()
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: id, SelfLink: util.Test.SelfLink(string(v1.ResourcePods), id), DeletionTimestamp: &deletionTimestamp},
ObjectMeta: metav1.ObjectMeta{
Name: id,
UID: types.UID(id),
SelfLink: schedulertesting.Test.SelfLink(string(v1.ResourcePods), id),
DeletionTimestamp: &deletionTimestamp,
},
Spec: v1.PodSpec{
NodeName: "",
},
@ -133,7 +142,7 @@ func TestScheduler(t *testing.T) {
eventBroadcaster.StartLogging(t.Logf).Stop()
errS := errors.New("scheduler")
errB := errors.New("binder")
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
table := []struct {
injectBindError error
@ -149,7 +158,7 @@ func TestScheduler(t *testing.T) {
{
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name),
eventReason: "Scheduled",
}, {
@ -161,7 +170,7 @@ func TestScheduler(t *testing.T) {
}, {
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name),
injectBindError: errB,
expectError: errB,
@ -249,7 +258,7 @@ func TestSchedulerNoPhantomPodAfterExpire(t *testing.T) {
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(100*time.Millisecond, stop)
pod := podWithPort("pod.Name", "", 8080)
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
scache.AddNode(&node)
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
@ -290,7 +299,7 @@ func TestSchedulerNoPhantomPodAfterExpire(t *testing.T) {
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
ObjectMeta: metav1.ObjectMeta{Name: "bar", UID: types.UID("bar")},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
@ -307,7 +316,7 @@ func TestSchedulerNoPhantomPodAfterDelete(t *testing.T) {
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(10*time.Minute, stop)
firstPod := podWithPort("pod.Name", "", 8080)
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
scache.AddNode(&node)
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
@ -351,7 +360,7 @@ func TestSchedulerNoPhantomPodAfterDelete(t *testing.T) {
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Name: "bar"},
ObjectMeta: metav1.ObjectMeta{Name: "bar", UID: types.UID("bar")},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
@ -390,7 +399,7 @@ func TestSchedulerErrorWithLongBinding(t *testing.T) {
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(test.CacheTTL, stop)
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
scache.AddNode(&node)
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&node})
@ -439,7 +448,7 @@ func setupTestSchedulerWithOnePodOnNode(t *testing.T, queuedPodStore *clientcach
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Name: pod.Name},
ObjectMeta: metav1.ObjectMeta{Name: pod.Name, UID: types.UID(pod.Name)},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
@ -471,8 +480,9 @@ func TestSchedulerFailedSchedulingReasons(t *testing.T) {
// create several nodes which cannot schedule the above pod
nodes := []*v1.Node{}
for i := 0; i < 100; i++ {
uid := fmt.Sprintf("machine%v", i)
node := v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("machine%v", i)},
ObjectMeta: metav1.ObjectMeta{Name: uid, UID: types.UID(uid)},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *(resource.NewQuantity(cpu/2, resource.DecimalSI)),
@ -537,6 +547,7 @@ func setupTestScheduler(queuedPodStore *clientcache.FIFO, scache schedulercache.
[]algorithm.SchedulerExtender{},
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false,
false)
bindingChan := make(chan *v1.Binding, 1)
errChan := make(chan error, 1)
@ -585,6 +596,7 @@ func setupTestSchedulerLongBindingWithRetry(queuedPodStore *clientcache.FIFO, sc
[]algorithm.SchedulerExtender{},
nil,
schedulertesting.FakePersistentVolumeClaimLister{},
false,
false)
bindingChan := make(chan *v1.Binding, 2)
configurator := &FakeConfigurator{
@ -622,7 +634,7 @@ func setupTestSchedulerLongBindingWithRetry(queuedPodStore *clientcache.FIFO, sc
}
func setupTestSchedulerWithVolumeBinding(fakeVolumeBinder *volumebinder.VolumeBinder, stop <-chan struct{}, broadcaster record.EventBroadcaster) (*Scheduler, chan *v1.Binding, chan error) {
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1"}}
testNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "machine1", UID: types.UID("machine1")}}
nodeLister := schedulertesting.FakeNodeLister([]*v1.Node{&testNode})
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
queuedPodStore.Add(podWithID("foo", ""))
@ -648,8 +660,6 @@ func makePredicateError(failReason string) error {
}
func TestSchedulerWithVolumeBinding(t *testing.T) {
order := []string{predicates.CheckVolumeBindingPred, predicates.GeneralPred}
predicates.SetPredicatesOrdering(order)
findErr := fmt.Errorf("find err")
assumeErr := fmt.Errorf("assume err")
bindErr := fmt.Errorf("bind err")
@ -678,8 +688,9 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
FindBoundSatsified: true,
},
expectAssumeCalled: true,
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
eventReason: "Scheduled",
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
eventReason: "Scheduled",
},
"bound,invalid-pv-affinity": {
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{

View File

@ -3,6 +3,7 @@ package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
@ -11,17 +12,23 @@ go_library(
"fake_cache.go",
"fake_lister.go",
"pods_to_cache.go",
"util.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/testing",
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/apis/core:go_default_library",
"//pkg/apis/core/install:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
],
)
@ -38,3 +45,13 @@ filegroup(
srcs = [":package-srcs"],
tags = ["automanaged"],
)
go_test(
name = "go_default_test",
srcs = ["util_test.go"],
embed = [":go_default_library"],
deps = [
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
],
)

View File

@ -20,7 +20,7 @@ import (
"k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// FakeCache is used for testing
@ -100,3 +100,11 @@ func (f *FakeCache) List(s labels.Selector) ([]*v1.Pod, error) { return nil, nil
func (f *FakeCache) FilteredList(filter schedulercache.PodFilter, selector labels.Selector) ([]*v1.Pod, error) {
return nil, nil
}
// Snapshot is a fake method for testing
func (f *FakeCache) Snapshot() *schedulercache.Snapshot {
return &schedulercache.Snapshot{}
}
// IsUpToDate is a fake mthod for testing
func (f *FakeCache) IsUpToDate(*schedulercache.NodeInfo) bool { return true }

View File

@ -26,7 +26,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
var _ algorithm.NodeLister = &FakeNodeLister{}

View File

@ -19,7 +19,7 @@ package testing
import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// PodsToCache is used for testing

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package util
package testing
import (
"fmt"
@ -84,7 +84,7 @@ func init() {
}
if _, ok := Groups[api.GroupName]; !ok {
externalGroupVersion := schema.GroupVersion{Group: api.GroupName, Version: legacyscheme.Registry.GroupOrDie(api.GroupName).GroupVersion.Version}
externalGroupVersion := schema.GroupVersion{Group: api.GroupName, Version: "v1"}
Groups[api.GroupName] = TestGroup{
externalGroupVersion: externalGroupVersion,
internalGroupVersion: api.SchemeGroupVersion,

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package util
package testing
import (
"encoding/json"

View File

@ -10,15 +10,12 @@ go_test(
name = "go_default_test",
srcs = [
"backoff_utils_test.go",
"testutil_test.go",
"utils_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/apis/scheduling:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
],
)
@ -27,20 +24,14 @@ go_library(
name = "go_default_library",
srcs = [
"backoff_utils.go",
"testutil.go",
"utils.go",
],
importpath = "k8s.io/kubernetes/pkg/scheduler/util",
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/apis/core:go_default_library",
"//pkg/apis/core/install:go_default_library",
"//pkg/apis/scheduling:go_default_library",
"//pkg/features:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],

View File

@ -40,11 +40,10 @@ func NewVolumeBinder(
client clientset.Interface,
pvcInformer coreinformers.PersistentVolumeClaimInformer,
pvInformer coreinformers.PersistentVolumeInformer,
nodeInformer coreinformers.NodeInformer,
storageClassInformer storageinformers.StorageClassInformer) *VolumeBinder {
return &VolumeBinder{
Binder: persistentvolume.NewVolumeBinder(client, pvcInformer, pvInformer, nodeInformer, storageClassInformer),
Binder: persistentvolume.NewVolumeBinder(client, pvcInformer, pvInformer, storageClassInformer),
BindQueue: workqueue.NewNamed("podsToBind"),
}
}