vendor update for CSI 0.3.0

This commit is contained in:
gman
2018-07-18 16:47:22 +02:00
parent 6f484f92fc
commit 8ea659f0d5
6810 changed files with 438061 additions and 193861 deletions

View File

@ -16,8 +16,9 @@ go_library(
],
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm",
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
@ -33,7 +34,7 @@ go_test(
],
embed = [":go_default_library"],
deps = [
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
],

View File

@ -23,7 +23,7 @@ go_library(
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/util:go_default_library",
"//pkg/scheduler/volumebinder:go_default_library",
"//pkg/volume/util:go_default_library",
@ -32,6 +32,7 @@ go_library(
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/fields:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
@ -45,6 +46,7 @@ go_library(
go_test(
name = "go_default_test",
srcs = [
"max_attachable_volume_predicate_test.go",
"metadata_test.go",
"predicates_test.go",
"utils_test.go",
@ -52,10 +54,12 @@ go_test(
embed = [":go_default_library"],
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/volume/util:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/storage/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
@ -63,6 +67,7 @@ go_test(
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature/testing:go_default_library",
],
)

View File

@ -61,6 +61,8 @@ var (
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure", "node(s) had memory pressure")
// ErrNodeUnderDiskPressure is used for NodeUnderDiskPressure predicate error.
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure", "node(s) had disk pressure")
// ErrNodeUnderPIDPressure is used for NodeUnderPIDPressure predicate error.
ErrNodeUnderPIDPressure = newPredicateFailureError("NodeUnderPIDPressure", "node(s) had pid pressure")
// ErrNodeOutOfDisk is used for NodeOutOfDisk predicate error.
ErrNodeOutOfDisk = newPredicateFailureError("NodeOutOfDisk", "node(s) were out of disk space")
// ErrNodeNotReady is used for NodeNotReady predicate error.

View File

@ -0,0 +1,854 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"os"
"reflect"
"strconv"
"strings"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
func onePVCPod(filterName string) *v1.Pod {
return &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "some" + filterName + "Vol",
},
},
},
},
},
}
}
func splitPVCPod(filterName string) *v1.Pod {
return &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someNon" + filterName + "Vol",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "some" + filterName + "Vol",
},
},
},
},
},
}
}
func TestVolumeCountConflicts(t *testing.T) {
oneVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp"},
},
},
},
},
}
twoVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "tvp1"},
},
},
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "tvp2"},
},
},
},
},
}
splitVolsPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{},
},
},
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "svp"},
},
},
},
},
}
nonApplicablePod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{},
},
},
},
},
}
deletedPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "deletedPVC",
},
},
},
},
},
}
twoDeletedPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "deletedPVC",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherDeletedPVC",
},
},
},
},
},
}
deletedPVPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "pvcWithDeletedPV",
},
},
},
},
},
}
// deletedPVPod2 is a different pod than deletedPVPod but using the same PVC
deletedPVPod2 := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "pvcWithDeletedPV",
},
},
},
},
},
}
// anotherDeletedPVPod is a different pod than deletedPVPod and uses another PVC
anotherDeletedPVPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherPVCWithDeletedPV",
},
},
},
},
},
}
emptyPod := &v1.Pod{
Spec: v1.PodSpec{},
}
unboundPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "unboundPVC",
},
},
},
},
},
}
// Different pod than unboundPVCPod, but using the same unbound PVC
unboundPVCPod2 := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "unboundPVC",
},
},
},
},
},
}
// pod with unbound PVC that's different to unboundPVC
anotherUnboundPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherUnboundPVC",
},
},
},
},
},
}
tests := []struct {
newPod *v1.Pod
existingPods []*v1.Pod
filterName string
maxVols int
fits bool
test string
}{
// filterName:EBSVolumeFilterType
{
newPod: oneVolPod,
existingPods: []*v1.Pod{twoVolPod, oneVolPod},
filterName: EBSVolumeFilterType,
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pod's EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "doesn't fit when node capacity < new pod's EBS volumes",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{twoVolPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores non-EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "existing pods' counts ignore non-EBS volumes",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count considers PVCs backed by EBS volumes",
},
{
newPod: splitPVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, oneVolPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores PVCs not backed by EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, onePVCPod(EBSVolumeFilterType)},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: false,
test: "existing pods' counts considers PVCs backed by EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, twoVolPod, onePVCPod(EBSVolumeFilterType)},
filterName: EBSVolumeFilterType,
maxVols: 4,
fits: true,
test: "already-mounted EBS volumes are always ok to allow",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{oneVolPod, oneVolPod, onePVCPod(EBSVolumeFilterType)},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "the same EBS volumes are not counted multiple times",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, twoDeletedPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: false,
test: "pod with missing two PVCs is counted towards the PV limit twice",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: deletedPVPod2,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: true,
test: "two pods missing the same PV are counted towards the PV limit only once",
},
{
newPod: anotherDeletedPVPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "two pods missing different PVs are counted towards the PV limit twice",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(EBSVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: unboundPVCPod2,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: true,
test: "the same unbound PVC in multiple pods is counted towards the PV limit only once",
},
{
newPod: anotherUnboundPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: EBSVolumeFilterType,
maxVols: 2,
fits: false,
test: "two different unbound PVCs are counted towards the PV limit as two volumes",
},
// filterName:GCEPDVolumeFilterType
{
newPod: oneVolPod,
existingPods: []*v1.Pod{twoVolPod, oneVolPod},
filterName: GCEPDVolumeFilterType,
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pod's GCE volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "fit when node capacity < new pod's GCE volumes",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{twoVolPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores non-GCE volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "existing pods' counts ignore non-GCE volumes",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count considers PVCs backed by GCE volumes",
},
{
newPod: splitPVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, oneVolPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores PVCs not backed by GCE volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, onePVCPod(GCEPDVolumeFilterType)},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "existing pods' counts considers PVCs backed by GCE volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, twoVolPod, onePVCPod(GCEPDVolumeFilterType)},
filterName: GCEPDVolumeFilterType,
maxVols: 4,
fits: true,
test: "already-mounted EBS volumes are always ok to allow",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{oneVolPod, oneVolPod, onePVCPod(GCEPDVolumeFilterType)},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "the same GCE volumes are not counted multiple times",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, twoDeletedPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing two PVCs is counted towards the PV limit twice",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: deletedPVPod2,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "two pods missing the same PV are counted towards the PV limit only once",
},
{
newPod: anotherDeletedPVPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "two pods missing different PVs are counted towards the PV limit twice",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(GCEPDVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: unboundPVCPod2,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "the same unbound PVC in multiple pods is counted towards the PV limit only once",
},
{
newPod: anotherUnboundPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: GCEPDVolumeFilterType,
maxVols: 2,
fits: true,
test: "two different unbound PVCs are counted towards the PV limit as two volumes",
},
// filterName:AzureDiskVolumeFilterType
{
newPod: oneVolPod,
existingPods: []*v1.Pod{twoVolPod, oneVolPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pod's AzureDisk volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "fit when node capacity < new pod's AzureDisk volumes",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{twoVolPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores non-AzureDisk volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "existing pods' counts ignore non-AzureDisk volumes",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count considers PVCs backed by AzureDisk volumes",
},
{
newPod: splitPVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{splitVolsPod, oneVolPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "new pod's count ignores PVCs not backed by AzureDisk volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, onePVCPod(AzureDiskVolumeFilterType)},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "existing pods' counts considers PVCs backed by AzureDisk volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, twoVolPod, onePVCPod(AzureDiskVolumeFilterType)},
filterName: AzureDiskVolumeFilterType,
maxVols: 4,
fits: true,
test: "already-mounted AzureDisk volumes are always ok to allow",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{oneVolPod, oneVolPod, onePVCPod(AzureDiskVolumeFilterType)},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "the same AzureDisk volumes are not counted multiple times",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, twoDeletedPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing two PVCs is counted towards the PV limit twice",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: deletedPVPod2,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "two pods missing the same PV are counted towards the PV limit only once",
},
{
newPod: anotherDeletedPVPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "two pods missing different PVs are counted towards the PV limit twice",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: onePVCPod(AzureDiskVolumeFilterType),
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 3,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: unboundPVCPod2,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "the same unbound PVC in multiple pods is counted towards the PV limit only once",
},
{
newPod: anotherUnboundPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
filterName: AzureDiskVolumeFilterType,
maxVols: 2,
fits: true,
test: "two different unbound PVCs are counted towards the PV limit as two volumes",
},
}
pvInfo := func(filterName string) FakePersistentVolumeInfo {
return FakePersistentVolumeInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: strings.ToLower(filterName) + "Vol"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{},
},
},
}
}
pvcInfo := func(filterName string) FakePersistentVolumeClaimInfo {
return FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "some" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "some" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNon" + filterName + "Vol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someNon" + filterName + "Vol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvcWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherPVCWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "unboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherUnboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
}
}
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}
// running attachable predicate tests without feature gate and no limit present on nodes
for _, test := range tests {
os.Setenv(KubeMaxPDVols, strconv.Itoa(test.maxVols))
pred := NewMaxPDVolumeCountPredicate(test.filterName, pvInfo(test.filterName), pvcInfo(test.filterName))
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), schedulercache.NewNodeInfo(test.existingPods...))
if err != nil {
t.Errorf("[%s]%s: unexpected error: %v", test.filterName, test.test, err)
}
if !fits && !reflect.DeepEqual(reasons, expectedFailureReasons) {
t.Errorf("[%s]%s: unexpected failure reasons: %v, want: %v", test.filterName, test.test, reasons, expectedFailureReasons)
}
if fits != test.fits {
t.Errorf("[%s]%s: expected %v, got %v", test.filterName, test.test, test.fits, fits)
}
}
defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.AttachVolumeLimit, true)()
// running attachable predicate tests with feature gate and limit present on nodes
for _, test := range tests {
node := getNodeWithPodAndVolumeLimits(test.existingPods, int64(test.maxVols), test.filterName)
pred := NewMaxPDVolumeCountPredicate(test.filterName, pvInfo(test.filterName), pvcInfo(test.filterName))
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), node)
if err != nil {
t.Errorf("Using allocatable [%s]%s: unexpected error: %v", test.filterName, test.test, err)
}
if !fits && !reflect.DeepEqual(reasons, expectedFailureReasons) {
t.Errorf("Using allocatable [%s]%s: unexpected failure reasons: %v, want: %v", test.filterName, test.test, reasons, expectedFailureReasons)
}
if fits != test.fits {
t.Errorf("Using allocatable [%s]%s: expected %v, got %v", test.filterName, test.test, test.fits, fits)
}
}
}
func getNodeWithPodAndVolumeLimits(pods []*v1.Pod, limit int64, filter string) *schedulercache.NodeInfo {
nodeInfo := schedulercache.NewNodeInfo(pods...)
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "node-for-max-pd-test-1"},
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{
getVolumeLimitKey(filter): *resource.NewQuantity(limit, resource.DecimalSI),
},
},
}
nodeInfo.SetNode(node)
return nodeInfo
}
func getVolumeLimitKey(filterType string) v1.ResourceName {
switch filterType {
case EBSVolumeFilterType:
return v1.ResourceName(volumeutil.EBSVolumeLimitKey)
case GCEPDVolumeFilterType:
return v1.ResourceName(volumeutil.GCEVolumeLimitKey)
case AzureDiskVolumeFilterType:
return v1.ResourceName(volumeutil.AzureVolumeLimitKey)
default:
return ""
}
}

View File

@ -20,14 +20,17 @@ import (
"fmt"
"sync"
"github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/workqueue"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
"github.com/golang/glog"
)
// PredicateMetadataFactory defines a factory of predicate metadata.
@ -50,7 +53,13 @@ type predicateMetadata struct {
podRequest *schedulercache.Resource
podPorts []*v1.ContainerPort
//key is a pod full name with the anti-affinity rules.
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
matchingAntiAffinityTerms map[string][]matchingPodAntiAffinityTerm
// A map of node name to a list of Pods on the node that can potentially match
// the affinity rules of the "pod".
nodeNameToMatchingAffinityPods map[string][]*v1.Pod
// A map of node name to a list of Pods on the node that can potentially match
// the anti-affinity rules of the "pod".
nodeNameToMatchingAntiAffinityPods map[string][]*v1.Pod
serviceAffinityInUse bool
serviceAffinityMatchingPodList []*v1.Pod
serviceAffinityMatchingPodServices []*v1.Service
@ -108,12 +117,19 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
if err != nil {
return nil
}
affinityPods, antiAffinityPods, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap)
if err != nil {
glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
matchingAntiAffinityTerms: matchingTerms,
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: schedutil.GetContainerPorts(pod),
matchingAntiAffinityTerms: matchingTerms,
nodeNameToMatchingAffinityPods: affinityPods,
nodeNameToMatchingAntiAffinityPods: antiAffinityPods,
}
for predicateName, precomputeFunc := range predicateMetadataProducers {
glog.V(10).Infof("Precompute: %v", predicateName)
@ -131,6 +147,33 @@ func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
}
// Delete any anti-affinity rule from the deletedPod.
delete(meta.matchingAntiAffinityTerms, deletedPodFullName)
// Delete pod from the matching affinity or anti-affinity pods if exists.
affinity := meta.pod.Spec.Affinity
podNodeName := deletedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
if affinity.PodAffinity != nil {
for i, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] {
if p == deletedPod {
s := meta.nodeNameToMatchingAffinityPods[podNodeName]
s[i] = s[len(s)-1]
s = s[:len(s)-1]
meta.nodeNameToMatchingAffinityPods[podNodeName] = s
break
}
}
}
if affinity.PodAntiAffinity != nil {
for i, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] {
if p == deletedPod {
s := meta.nodeNameToMatchingAntiAffinityPods[podNodeName]
s[i] = s[len(s)-1]
s = s[:len(s)-1]
meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = s
break
}
}
}
}
// All pods in the serviceAffinityMatchingPodList are in the same namespace.
// So, if the namespace of the first one is not the same as the namespace of the
// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
@ -173,6 +216,35 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulercache
meta.matchingAntiAffinityTerms[addedPodFullName] = podMatchingTerms
}
}
// Add the pod to nodeNameToMatchingAffinityPods and nodeNameToMatchingAntiAffinityPods if needed.
affinity := meta.pod.Spec.Affinity
podNodeName := addedPod.Spec.NodeName
if affinity != nil && len(podNodeName) > 0 {
if targetPodMatchesAffinityOfPod(meta.pod, addedPod) {
found := false
for _, p := range meta.nodeNameToMatchingAffinityPods[podNodeName] {
if p == addedPod {
found = true
break
}
}
if !found {
meta.nodeNameToMatchingAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAffinityPods[podNodeName], addedPod)
}
}
if targetPodMatchesAntiAffinityOfPod(meta.pod, addedPod) {
found := false
for _, p := range meta.nodeNameToMatchingAntiAffinityPods[podNodeName] {
if p == addedPod {
found = true
break
}
}
if !found {
meta.nodeNameToMatchingAntiAffinityPods[podNodeName] = append(meta.nodeNameToMatchingAntiAffinityPods[podNodeName], addedPod)
}
}
}
// If addedPod is in the same namespace as the meta.pod, update the list
// of matching pods if applicable.
if meta.serviceAffinityInUse && addedPod.Namespace == meta.pod.Namespace {
@ -200,9 +272,162 @@ func (meta *predicateMetadata) ShallowCopy() algorithm.PredicateMetadata {
for k, v := range meta.matchingAntiAffinityTerms {
newPredMeta.matchingAntiAffinityTerms[k] = append([]matchingPodAntiAffinityTerm(nil), v...)
}
newPredMeta.nodeNameToMatchingAffinityPods = make(map[string][]*v1.Pod)
for k, v := range meta.nodeNameToMatchingAffinityPods {
newPredMeta.nodeNameToMatchingAffinityPods[k] = append([]*v1.Pod(nil), v...)
}
newPredMeta.nodeNameToMatchingAntiAffinityPods = make(map[string][]*v1.Pod)
for k, v := range meta.nodeNameToMatchingAntiAffinityPods {
newPredMeta.nodeNameToMatchingAntiAffinityPods[k] = append([]*v1.Pod(nil), v...)
}
newPredMeta.serviceAffinityMatchingPodServices = append([]*v1.Service(nil),
meta.serviceAffinityMatchingPodServices...)
newPredMeta.serviceAffinityMatchingPodList = append([]*v1.Pod(nil),
meta.serviceAffinityMatchingPodList...)
return (algorithm.PredicateMetadata)(newPredMeta)
}
type affinityTermProperties struct {
namespaces sets.String
selector labels.Selector
}
// getAffinityTermProperties receives a Pod and affinity terms and returns the namespaces and
// selectors of the terms.
func getAffinityTermProperties(pod *v1.Pod, terms []v1.PodAffinityTerm) (properties []*affinityTermProperties, err error) {
if terms == nil {
return properties, nil
}
for _, term := range terms {
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
return nil, err
}
properties = append(properties, &affinityTermProperties{namespaces: namespaces, selector: selector})
}
return properties, nil
}
// podMatchesAffinityTermProperties return true IFF the given pod matches all the given properties.
func podMatchesAffinityTermProperties(pod *v1.Pod, properties []*affinityTermProperties) bool {
if len(properties) == 0 {
return false
}
for _, property := range properties {
if !priorityutil.PodMatchesTermsNamespaceAndSelector(pod, property.namespaces, property.selector) {
return false
}
}
return true
}
// getPodsMatchingAffinity finds existing Pods that match affinity terms of the given "pod".
// It ignores topology. It returns a set of Pods that are checked later by the affinity
// predicate. With this set of pods available, the affinity predicate does not
// need to check all the pods in the cluster.
func getPodsMatchingAffinity(pod *v1.Pod, nodeInfoMap map[string]*schedulercache.NodeInfo) (affinityPods map[string][]*v1.Pod, antiAffinityPods map[string][]*v1.Pod, err error) {
allNodeNames := make([]string, 0, len(nodeInfoMap))
affinity := pod.Spec.Affinity
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return nil, nil, nil
}
for name := range nodeInfoMap {
allNodeNames = append(allNodeNames, name)
}
var lock sync.Mutex
var firstError error
affinityPods = make(map[string][]*v1.Pod)
antiAffinityPods = make(map[string][]*v1.Pod)
appendResult := func(nodeName string, affPods, antiAffPods []*v1.Pod) {
lock.Lock()
defer lock.Unlock()
if len(affPods) > 0 {
affinityPods[nodeName] = affPods
}
if len(antiAffPods) > 0 {
antiAffinityPods[nodeName] = antiAffPods
}
}
catchError := func(err error) {
lock.Lock()
defer lock.Unlock()
if firstError == nil {
firstError = err
}
}
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
if err != nil {
return nil, nil, err
}
antiAffinityProperties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
if err != nil {
return nil, nil, err
}
processNode := func(i int) {
nodeInfo := nodeInfoMap[allNodeNames[i]]
node := nodeInfo.Node()
if node == nil {
catchError(fmt.Errorf("nodeInfo.Node is nil"))
return
}
affPods := make([]*v1.Pod, 0, len(nodeInfo.Pods()))
antiAffPods := make([]*v1.Pod, 0, len(nodeInfo.Pods()))
for _, existingPod := range nodeInfo.Pods() {
// Check affinity properties.
if podMatchesAffinityTermProperties(existingPod, affinityProperties) {
affPods = append(affPods, existingPod)
}
// Check anti-affinity properties.
if podMatchesAffinityTermProperties(existingPod, antiAffinityProperties) {
antiAffPods = append(antiAffPods, existingPod)
}
}
if len(antiAffPods) > 0 || len(affPods) > 0 {
appendResult(node.Name, affPods, antiAffPods)
}
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
return affinityPods, antiAffinityPods, firstError
}
// podMatchesAffinity returns true if "targetPod" matches any affinity rule of
// "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAffinityOfPod(pod, targetPod *v1.Pod) bool {
affinity := pod.Spec.Affinity
if affinity == nil || affinity.PodAffinity == nil {
return false
}
affinityProperties, err := getAffinityTermProperties(pod, GetPodAffinityTerms(affinity.PodAffinity))
if err != nil {
glog.Errorf("error in getting affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAffinityTermProperties(targetPod, affinityProperties)
}
// targetPodMatchesAntiAffinityOfPod returns true if "targetPod" matches any anti-affinity
// rule of "pod". Similar to getPodsMatchingAffinity, this function does not check topology.
// So, whether the targetPod actually matches or not needs further checks for a specific
// node.
func targetPodMatchesAntiAffinityOfPod(pod, targetPod *v1.Pod) bool {
affinity := pod.Spec.Affinity
if affinity == nil || affinity.PodAntiAffinity == nil {
return false
}
properties, err := getAffinityTermProperties(pod, GetPodAntiAffinityTerms(affinity.PodAntiAffinity))
if err != nil {
glog.Errorf("error in getting anti-affinity properties of Pod %v", pod.Name)
return false
}
return podMatchesAffinityTermProperties(targetPod, properties)
}

View File

@ -24,7 +24,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
@ -88,6 +88,13 @@ func (s sortableServices) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
var _ = sort.Interface(&sortableServices{})
func sortNodePodMap(np map[string][]*v1.Pod) {
for _, pl := range np {
sortablePods := sortablePods(pl)
sort.Sort(sortablePods)
}
}
// predicateMetadataEquivalent returns true if the two metadata are equivalent.
// Note: this function does not compare podRequest.
func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
@ -111,6 +118,16 @@ func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
if !reflect.DeepEqual(meta1.matchingAntiAffinityTerms, meta2.matchingAntiAffinityTerms) {
return fmt.Errorf("matchingAntiAffinityTerms are not euqal")
}
sortNodePodMap(meta1.nodeNameToMatchingAffinityPods)
sortNodePodMap(meta2.nodeNameToMatchingAffinityPods)
if !reflect.DeepEqual(meta1.nodeNameToMatchingAffinityPods, meta2.nodeNameToMatchingAffinityPods) {
return fmt.Errorf("nodeNameToMatchingAffinityPods are not euqal")
}
sortNodePodMap(meta1.nodeNameToMatchingAntiAffinityPods)
sortNodePodMap(meta2.nodeNameToMatchingAntiAffinityPods)
if !reflect.DeepEqual(meta1.nodeNameToMatchingAntiAffinityPods, meta2.nodeNameToMatchingAntiAffinityPods) {
return fmt.Errorf("nodeNameToMatchingAntiAffinityPods are not euqal")
}
if meta1.serviceAffinityInUse {
sortablePods1 := sortablePods(meta1.serviceAffinityMatchingPodList)
sort.Sort(sortablePods1)
@ -189,6 +206,34 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
},
},
}
affinityComplex := &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "buzz"},
},
},
},
TopologyKey: "region",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"bar", "security", "test"},
},
},
},
TopologyKey: "zone",
},
},
}
tests := []struct {
description string
@ -312,6 +357,41 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
description: "metadata matching pod affinity and anti-affinity are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
PodAffinity: affinityComplex,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeA",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityComplex,
},
},
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
}
for _, test := range tests {
@ -360,6 +440,7 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
// on the idea that shallow-copy should produce an object that is deep-equal to the original
// object.
func TestPredicateMetadata_ShallowCopy(t *testing.T) {
selector1 := map[string]string{"foo": "bar"}
source := predicateMetadata{
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
@ -392,6 +473,45 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) {
},
},
},
nodeNameToMatchingAffinityPods: map[string][]*v1.Pod{
"nodeA": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
},
"nodeC": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeC"},
},
},
},
nodeNameToMatchingAntiAffinityPods: map[string][]*v1.Pod{
"nodeN": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeN"},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p3"},
Spec: v1.PodSpec{
NodeName: "nodeM",
},
},
},
"nodeM": {
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "p6", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeM"},
},
},
},
serviceAffinityInUse: true,
serviceAffinityMatchingPodList: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "pod1"}},

View File

@ -23,10 +23,13 @@ import (
"strconv"
"sync"
"github.com/golang/glog"
"k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/rand"
"k8s.io/apimachinery/pkg/util/sets"
@ -40,12 +43,10 @@ import (
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/kubernetes/pkg/scheduler/volumebinder"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
"github.com/golang/glog"
)
const (
@ -69,6 +70,8 @@ const (
NoDiskConflictPred = "NoDiskConflict"
// PodToleratesNodeTaintsPred defines the name of predicate PodToleratesNodeTaints.
PodToleratesNodeTaintsPred = "PodToleratesNodeTaints"
// CheckNodeUnschedulablePred defines the name of predicate CheckNodeUnschedulablePredicate.
CheckNodeUnschedulablePred = "CheckNodeUnschedulable"
// PodToleratesNodeNoExecuteTaintsPred defines the name of predicate PodToleratesNodeNoExecuteTaints.
PodToleratesNodeNoExecuteTaintsPred = "PodToleratesNodeNoExecuteTaints"
// CheckNodeLabelPresencePred defines the name of predicate CheckNodeLabelPresence.
@ -87,6 +90,8 @@ const (
CheckNodeMemoryPressurePred = "CheckNodeMemoryPressure"
// CheckNodeDiskPressurePred defines the name of predicate CheckNodeDiskPressure.
CheckNodeDiskPressurePred = "CheckNodeDiskPressure"
// CheckNodePIDPressurePred defines the name of predicate CheckNodePIDPressure.
CheckNodePIDPressurePred = "CheckNodePIDPressure"
// DefaultMaxEBSVolumes is the limit for volumes attached to an instance.
// Amazon recommends no more than 40; the system root volume uses at least one.
@ -125,13 +130,13 @@ const (
// The order is based on the restrictiveness & complexity of predicates.
// Design doc: https://github.com/kubernetes/community/blob/master/contributors/design-proposals/scheduling/predicates-ordering.md
var (
predicatesOrdering = []string{CheckNodeConditionPred,
predicatesOrdering = []string{CheckNodeConditionPred, CheckNodeUnschedulablePred,
GeneralPred, HostNamePred, PodFitsHostPortsPred,
MatchNodeSelectorPred, PodFitsResourcesPred, NoDiskConflictPred,
PodToleratesNodeTaintsPred, PodToleratesNodeNoExecuteTaintsPred, CheckNodeLabelPresencePred,
CheckServiceAffinityPred, MaxEBSVolumeCountPred, MaxGCEPDVolumeCountPred,
MaxAzureDiskVolumeCountPred, CheckVolumeBindingPred, NoVolumeZoneConflictPred,
CheckNodeMemoryPressurePred, CheckNodeDiskPressurePred, MatchInterPodAffinityPred}
CheckNodeMemoryPressurePred, CheckNodePIDPressurePred, CheckNodeDiskPressurePred, MatchInterPodAffinityPred}
)
// NodeInfo interface represents anything that can get node object from node ID.
@ -284,10 +289,11 @@ func NoDiskConflict(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *sch
// MaxPDVolumeCountChecker contains information to check the max number of volumes for a predicate.
type MaxPDVolumeCountChecker struct {
filter VolumeFilter
maxVolumes int
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
filter VolumeFilter
volumeLimitKey v1.ResourceName
maxVolumes int
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
// The string below is generated randomly during the struct's initialization.
// It is used to prefix volumeID generated inside the predicate() method to
@ -308,21 +314,25 @@ type VolumeFilter struct {
// The predicate looks for both volumes used directly, as well as PVC volumes that are backed by relevant volume
// types, counts the number of unique volumes, and rejects the new pod if it would place the total count over
// the maximum.
func NewMaxPDVolumeCountPredicate(filterName string, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
func NewMaxPDVolumeCountPredicate(
filterName string, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
var filter VolumeFilter
var maxVolumes int
var volumeLimitKey v1.ResourceName
switch filterName {
case EBSVolumeFilterType:
filter = EBSVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.EBSVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxEBSVolumes)
case GCEPDVolumeFilterType:
filter = GCEPDVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.GCEVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxGCEPDVolumes)
case AzureDiskVolumeFilterType:
filter = AzureDiskVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.AzureVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxAzureDiskVolumes)
default:
glog.Fatalf("Wrong filterName, Only Support %v %v %v ", EBSVolumeFilterType,
@ -332,6 +342,7 @@ func NewMaxPDVolumeCountPredicate(filterName string, pvInfo PersistentVolumeInfo
}
c := &MaxPDVolumeCountChecker{
filter: filter,
volumeLimitKey: volumeLimitKey,
maxVolumes: maxVolumes,
pvInfo: pvInfo,
pvcInfo: pvcInfo,
@ -357,7 +368,6 @@ func getMaxVols(defaultVal int) int {
}
func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, filteredVolumes map[string]bool) error {
for i := range volumes {
vol := &volumes[i]
if id, ok := c.filter.FilterVolume(vol); ok {
@ -444,12 +454,25 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *v1.Pod, meta algorithm.Predicat
}
numNewVolumes := len(newVolumes)
maxAttachLimit := c.maxVolumes
if numExistingVolumes+numNewVolumes > c.maxVolumes {
if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
volumeLimits := nodeInfo.VolumeLimits()
if maxAttachLimitFromAllocatable, ok := volumeLimits[c.volumeLimitKey]; ok {
maxAttachLimit = int(maxAttachLimitFromAllocatable)
}
}
if numExistingVolumes+numNewVolumes > maxAttachLimit {
// violates MaxEBSVolumeCount or MaxGCEPDVolumeCount
return false, []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}, nil
}
if nodeInfo != nil && nodeInfo.TransientInfo != nil && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) {
nodeInfo.TransientInfo.TransientLock.Lock()
defer nodeInfo.TransientInfo.TransientLock.Unlock()
nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount = maxAttachLimit - numExistingVolumes
nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes = numNewVolumes
}
return true, nil, nil
}
@ -582,9 +605,9 @@ func (c *VolumeZoneChecker) predicate(pod *v1.Pod, meta algorithm.PredicateMetad
pvName := pvc.Spec.VolumeName
if pvName == "" {
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
scName := pvc.Spec.StorageClassName
if scName != nil && len(*scName) > 0 {
class, _ := c.classInfo.GetStorageClassInfo(*scName)
scName := v1helper.GetPersistentVolumeClaimClass(pvc)
if len(scName) > 0 {
class, _ := c.classInfo.GetStorageClassInfo(scName)
if class != nil {
if class.VolumeBindingMode == nil {
return false, nil, fmt.Errorf("VolumeBindingMode not set for StorageClass %q", scName)
@ -662,33 +685,7 @@ func GetResourceRequest(pod *v1.Pod) *schedulercache.Resource {
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
for rName, rQuantity := range container.Resources.Requests {
switch rName {
case v1.ResourceMemory:
if mem := rQuantity.Value(); mem > result.Memory {
result.Memory = mem
}
case v1.ResourceEphemeralStorage:
if ephemeralStorage := rQuantity.Value(); ephemeralStorage > result.EphemeralStorage {
result.EphemeralStorage = ephemeralStorage
}
case v1.ResourceCPU:
if cpu := rQuantity.MilliValue(); cpu > result.MilliCPU {
result.MilliCPU = cpu
}
case v1.ResourceNvidiaGPU:
if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
result.NvidiaGPU = gpu
}
default:
if v1helper.IsScalarResourceName(rName) {
value := rQuantity.Value()
if value > result.ScalarResources[rName] {
result.SetScalar(rName, value)
}
}
}
}
result.SetMaxResource(container.Resources.Requests)
}
return result
@ -728,7 +725,6 @@ func PodFitsResources(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *s
}
if podRequest.MilliCPU == 0 &&
podRequest.Memory == 0 &&
podRequest.NvidiaGPU == 0 &&
podRequest.EphemeralStorage == 0 &&
len(podRequest.ScalarResources) == 0 {
return len(predicateFails) == 0, predicateFails, nil
@ -741,10 +737,6 @@ func PodFitsResources(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *s
if allocatable.Memory < podRequest.Memory+nodeInfo.RequestedResource().Memory {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceMemory, podRequest.Memory, nodeInfo.RequestedResource().Memory, allocatable.Memory))
}
if allocatable.NvidiaGPU < podRequest.NvidiaGPU+nodeInfo.RequestedResource().NvidiaGPU {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceNvidiaGPU, podRequest.NvidiaGPU, nodeInfo.RequestedResource().NvidiaGPU, allocatable.NvidiaGPU))
}
if allocatable.EphemeralStorage < podRequest.EphemeralStorage+nodeInfo.RequestedResource().EphemeralStorage {
predicateFails = append(predicateFails, NewInsufficientResourceError(v1.ResourceEphemeralStorage, podRequest.EphemeralStorage, nodeInfo.RequestedResource().EphemeralStorage, allocatable.EphemeralStorage))
}
@ -776,21 +768,16 @@ func PodFitsResources(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *s
// nodeMatchesNodeSelectorTerms checks if a node's labels satisfy a list of node selector terms,
// terms are ORed, and an empty list of terms will match nothing.
func nodeMatchesNodeSelectorTerms(node *v1.Node, nodeSelectorTerms []v1.NodeSelectorTerm) bool {
for _, req := range nodeSelectorTerms {
nodeSelector, err := v1helper.NodeSelectorRequirementsAsSelector(req.MatchExpressions)
if err != nil {
glog.V(10).Infof("Failed to parse MatchExpressions: %+v, regarding as not match.", req.MatchExpressions)
return false
}
if nodeSelector.Matches(labels.Set(node.Labels)) {
return true
}
nodeFields := map[string]string{}
for k, f := range algorithm.NodeFieldSelectorKeys {
nodeFields[k] = f(node)
}
return false
return v1helper.MatchNodeSelectorTerms(nodeSelectorTerms, labels.Set(node.Labels), fields.Set(nodeFields))
}
// The pod can only schedule onto nodes that satisfy requirements in both NodeAffinity and nodeSelector.
func podMatchesNodeLabels(pod *v1.Pod, node *v1.Node) bool {
// podMatchesNodeSelectorAndAffinityTerms checks whether the pod is schedulable onto nodes according to
// the requirements in both NodeAffinity and nodeSelector.
func podMatchesNodeSelectorAndAffinityTerms(pod *v1.Pod, node *v1.Node) bool {
// Check if node.Labels match pod.Spec.NodeSelector.
if len(pod.Spec.NodeSelector) > 0 {
selector := labels.SelectorFromSet(pod.Spec.NodeSelector)
@ -841,7 +828,7 @@ func PodMatchNodeSelector(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInf
if node == nil {
return false, nil, fmt.Errorf("node not found")
}
if podMatchesNodeLabels(pod, node) {
if podMatchesNodeSelectorAndAffinityTerms(pod, node) {
return true, nil, nil
}
return false, []algorithm.PredicateFailureReason{ErrNodeSelectorNotMatch}, nil
@ -1153,7 +1140,7 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
if affinity == nil || (affinity.PodAffinity == nil && affinity.PodAntiAffinity == nil) {
return true, nil, nil
}
if failedPredicates, error := c.satisfiesPodsAffinityAntiAffinity(pod, nodeInfo, affinity); failedPredicates != nil {
if failedPredicates, error := c.satisfiesPodsAffinityAntiAffinity(pod, meta, nodeInfo, affinity); failedPredicates != nil {
failedPredicates := append([]algorithm.PredicateFailureReason{ErrPodAffinityNotMatch}, failedPredicates)
return false, failedPredicates, error
}
@ -1167,39 +1154,36 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
return true, nil, nil
}
// anyPodMatchesPodAffinityTerm checks if any of given pods can match the specific podAffinityTerm.
// First return value indicates whether a matching pod exists on a node that matches the topology key,
// while the second return value indicates whether a matching pod exists anywhere.
// TODO: Do we really need any pod matching, or all pods matching? I think the latter.
func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, pods []*v1.Pod, nodeInfo *schedulercache.NodeInfo, term *v1.PodAffinityTerm) (bool, bool, error) {
if len(term.TopologyKey) == 0 {
return false, false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
// podMatchesPodAffinityTerms checks if the "targetPod" matches the given "terms"
// of the "pod" on the given "nodeInfo".Node(). It returns three values: 1) whether
// targetPod matches all the terms and their topologies, 2) whether targetPod
// matches all the terms label selector and namespaces (AKA term properties),
// 3) any error.
func (c *PodAffinityChecker) podMatchesPodAffinityTerms(pod *v1.Pod, targetPod *v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, bool, error) {
if len(terms) == 0 {
return false, false, fmt.Errorf("terms array is empty")
}
matchingPodExists := false
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
props, err := getAffinityTermProperties(pod, terms)
if err != nil {
return false, false, err
}
// Special case: When the topological domain is node, we can limit our
// search to pods on that node without searching the entire cluster.
if term.TopologyKey == kubeletapis.LabelHostname {
pods = nodeInfo.Pods()
if !podMatchesAffinityTermProperties(targetPod, props) {
return false, false, nil
}
for _, existingPod := range pods {
match := priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector)
if match {
matchingPodExists = true
existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
return false, matchingPodExists, err
}
if priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), existingPodNode, term.TopologyKey) {
return true, matchingPodExists, nil
}
// Namespace and selector of the terms have matched. Now we check topology of the terms.
targetPodNode, err := c.info.GetNodeInfo(targetPod.Spec.NodeName)
if err != nil {
return false, false, err
}
for _, term := range terms {
if len(term.TopologyKey) == 0 {
return false, false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
}
if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNode, term.TopologyKey) {
return false, true, nil
}
}
return false, matchingPodExists, nil
return true, true, nil
}
// GetPodAffinityTerms gets pod affinity terms by a pod affinity object.
@ -1383,57 +1367,127 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta
return nil, nil
}
// anyPodsMatchingTopologyTerms checks whether any of the nodes given via
// "targetPods" matches topology of all the "terms" for the give "pod" and "nodeInfo".
func (c *PodAffinityChecker) anyPodsMatchingTopologyTerms(pod *v1.Pod, targetPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, error) {
for nodeName, targetPods := range targetPods {
targetPodNodeInfo, err := c.info.GetNodeInfo(nodeName)
if err != nil {
return false, err
}
if len(targetPods) > 0 {
allTermsMatched := true
for _, term := range terms {
if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNodeInfo, term.TopologyKey) {
allTermsMatched = false
break
}
}
if allTermsMatched {
// We have 1 or more pods on the target node that have already matched namespace and selector
// and all of the terms topologies matched the target node. So, there is at least 1 matching pod on the node.
return true, nil
}
}
}
return false, nil
}
// Checks if scheduling the pod onto this node would break any rules of this pod.
func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, nodeInfo *schedulercache.NodeInfo, affinity *v1.Affinity) (algorithm.PredicateFailureReason, error) {
func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo,
affinity *v1.Affinity) (algorithm.PredicateFailureReason, error) {
node := nodeInfo.Node()
if node == nil {
return ErrPodAffinityRulesNotMatch, fmt.Errorf("Node is nil")
}
filteredPods, err := c.podLister.FilteredList(nodeInfo.Filter, labels.Everything())
if err != nil {
return ErrPodAffinityRulesNotMatch, err
}
// Check all affinity terms.
for _, term := range GetPodAffinityTerms(affinity.PodAffinity) {
termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
if err != nil {
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err)
glog.Error(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
if !termMatches {
// If the requirement matches a pod's own labels are namespace, and there are
// no other such pods, then disregard the requirement. This is necessary to
// not block forever because the first pod of the collection can't be scheduled.
if matchingPodExists {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
podName(pod), node.Name, term)
return ErrPodAffinityRulesNotMatch, nil
}
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if predicateMeta, ok := meta.(*predicateMetadata); ok {
// Check all affinity terms.
matchingPods := predicateMeta.nodeNameToMatchingAffinityPods
if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 {
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, affinityTerms)
if err != nil {
errMessage := fmt.Sprintf("Cannot parse selector on term %v for pod %v. Details %v", term, podName(pod), err)
glog.Error(errMessage)
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
glog.Errorf(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector)
if !match {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
podName(pod), node.Name, term)
return ErrPodAffinityRulesNotMatch, nil
if !matchExists {
// This pod may the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches
// its own terms, then we allow the pod to pass the affinity check.
if !(len(matchingPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
}
}
}
// Check all anti-affinity terms.
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
if err != nil || termMatches {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v",
podName(pod), node.Name, term, err)
return ErrPodAntiAffinityRulesNotMatch, nil
// Check all anti-affinity terms.
matchingPods = predicateMeta.nodeNameToMatchingAntiAffinityPods
if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 {
matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, antiAffinityTerms)
if err != nil || matchExists {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity, err: %v",
podName(pod), node.Name, err)
return ErrPodAntiAffinityRulesNotMatch, nil
}
}
} else { // We don't have precomputed metadata. We have to follow a slow path to check affinity rules.
filteredPods, err := c.podLister.FilteredList(nodeInfo.Filter, labels.Everything())
if err != nil {
return ErrPodAffinityRulesNotMatch, err
}
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
matchFound, termsSelectorMatchFound := false, false
for _, targetPod := range filteredPods {
// Check all affinity terms.
if !matchFound && len(affinityTerms) > 0 {
affTermsMatch, termsSelectorMatch, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, affinityTerms)
if err != nil {
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
glog.Error(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
if termsSelectorMatch {
termsSelectorMatchFound = true
}
if affTermsMatch {
matchFound = true
}
}
// Check all anti-affinity terms.
if len(antiAffinityTerms) > 0 {
antiAffTermsMatch, _, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, antiAffinityTerms)
if err != nil || antiAffTermsMatch {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm, err: %v",
podName(pod), node.Name, err)
return ErrPodAntiAffinityRulesNotMatch, nil
}
}
}
if !matchFound && len(affinityTerms) > 0 {
// We have not been able to find any matches for the pod's affinity rules.
// This pod may be the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches
// its own terms, then we allow the pod to pass the affinity check.
if termsSelectorMatchFound {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
// Check if pod matches its own affinity properties (namespace and label selector).
if !targetPodMatchesAffinityOfPod(pod, pod) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
}
}
@ -1446,8 +1500,8 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node
return nil, nil
}
// PodToleratesNodeTaints checks if a pod tolerations can tolerate the node taints
func PodToleratesNodeTaints(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
// CheckNodeUnschedulablePredicate checks if a pod can be scheduled on a node with Unschedulable spec.
func CheckNodeUnschedulablePredicate(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
if nodeInfo == nil || nodeInfo.Node() == nil {
return false, []algorithm.PredicateFailureReason{ErrNodeUnknownCondition}, nil
}
@ -1456,6 +1510,15 @@ func PodToleratesNodeTaints(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeI
return false, []algorithm.PredicateFailureReason{ErrNodeUnschedulable}, nil
}
return true, nil, nil
}
// PodToleratesNodeTaints checks if a pod tolerations can tolerate the node taints
func PodToleratesNodeTaints(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
if nodeInfo == nil || nodeInfo.Node() == nil {
return false, []algorithm.PredicateFailureReason{ErrNodeUnknownCondition}, nil
}
return podToleratesNodeTaints(pod, nodeInfo, func(t *v1.Taint) bool {
// PodToleratesNodeTaints is only interested in NoSchedule and NoExecute taints.
return t.Effect == v1.TaintEffectNoSchedule || t.Effect == v1.TaintEffectNoExecute
@ -1518,6 +1581,16 @@ func CheckNodeDiskPressurePredicate(pod *v1.Pod, meta algorithm.PredicateMetadat
return true, nil, nil
}
// CheckNodePIDPressurePredicate checks if a pod can be scheduled on a node
// reporting pid pressure condition.
func CheckNodePIDPressurePredicate(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
// check if node is under pid pressure
if nodeInfo.PIDPressureCondition() == v1.ConditionTrue {
return false, []algorithm.PredicateFailureReason{ErrNodeUnderPIDPressure}, nil
}
return true, nil, nil
}
// CheckNodeConditionPredicate checks if a pod can be scheduled on a node reporting out of disk,
// network unavailable and not ready condition. Only node conditions are accounted in this predicate.
func CheckNodeConditionPredicate(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
@ -1582,7 +1655,7 @@ func (c *VolumeBindingChecker) predicate(pod *v1.Pod, meta algorithm.PredicateMe
return false, nil, fmt.Errorf("node not found")
}
unboundSatisfied, boundSatisfied, err := c.binder.Binder.FindPodVolumes(pod, node.Name)
unboundSatisfied, boundSatisfied, err := c.binder.Binder.FindPodVolumes(pod, node)
if err != nil {
return false, nil, err
}

View File

@ -32,23 +32,24 @@ import (
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
var (
extendedResourceA = v1.ResourceName("example.com/aaa")
extendedResourceB = v1.ResourceName("example.com/bbb")
hugePageResourceA = v1helper.HugePageResourceName(resource.MustParse("2Mi"))
extendedResourceA = v1.ResourceName("example.com/aaa")
extendedResourceB = v1.ResourceName("example.com/bbb")
kubernetesIOResourceA = v1.ResourceName("kubernetes.io/something")
kubernetesIOResourceB = v1.ResourceName("subdomain.kubernetes.io/something")
hugePageResourceA = v1helper.HugePageResourceName(resource.MustParse("2Mi"))
)
func makeResources(milliCPU, memory, nvidiaGPUs, pods, extendedA, storage, hugePageA int64) v1.NodeResources {
func makeResources(milliCPU, memory, pods, extendedA, storage, hugePageA int64) v1.NodeResources {
return v1.NodeResources{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
extendedResourceA: *resource.NewQuantity(extendedA, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI),
@ -56,12 +57,11 @@ func makeResources(milliCPU, memory, nvidiaGPUs, pods, extendedA, storage, hugeP
}
}
func makeAllocatableResources(milliCPU, memory, nvidiaGPUs, pods, extendedA, storage, hugePageA int64) v1.ResourceList {
func makeAllocatableResources(milliCPU, memory, pods, extendedA, storage, hugePageA int64) v1.ResourceList {
return v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(memory, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(nvidiaGPUs, resource.DecimalSI),
extendedResourceA: *resource.NewQuantity(extendedA, resource.DecimalSI),
v1.ResourceEphemeralStorage: *resource.NewQuantity(storage, resource.BinarySI),
hugePageResourceA: *resource.NewQuantity(hugePageA, resource.BinarySI),
@ -297,6 +297,24 @@ func TestPodFitsResources(t *testing.T) {
test: "extended resource allocatable enforced for unknown resource for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(extendedResourceB, 1, 0, 0)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, ScalarResources: map[v1.ResourceName]int64{kubernetesIOResourceA: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0})),
fits: false,
test: "kubernetes.io resource capacity enforced",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(kubernetesIOResourceA, 10, 0, 0)},
},
{
pod: newResourceInitPod(newResourcePod(schedulercache.Resource{}),
schedulercache.Resource{MilliCPU: 1, Memory: 1, ScalarResources: map[v1.ResourceName]int64{kubernetesIOResourceB: 10}}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 0, Memory: 0})),
fits: false,
test: "kubernetes.io resource capacity enforced for init container",
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(kubernetesIOResourceB, 10, 0, 0)},
},
{
pod: newResourcePod(
schedulercache.Resource{MilliCPU: 1, Memory: 1, ScalarResources: map[v1.ResourceName]int64{hugePageResourceA: 10}}),
@ -337,7 +355,7 @@ func TestPodFitsResources(t *testing.T) {
}
for _, test := range enoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node)
RegisterPredicateMetadataProducerWithExtendedResourceOptions(test.ignoredExtendedResources)
meta := PredicateMetadata(test.pod, nil)
@ -394,7 +412,7 @@ func TestPodFitsResources(t *testing.T) {
},
}
for _, test := range notEnoughPodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 0, 1, 0, 0, 0)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: v1.ResourceList{}, Allocatable: makeAllocatableResources(10, 20, 1, 0, 0, 0)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@ -452,7 +470,7 @@ func TestPodFitsResources(t *testing.T) {
}
for _, test := range storagePodsTests {
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 5, 20, 5)}}
node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}}
test.nodeInfo.SetNode(&node)
fits, reasons, err := PodFitsResources(test.pod, PredicateMetadata(test.pod, nil), test.nodeInfo)
if err != nil {
@ -893,10 +911,11 @@ func TestISCSIDiskConflicts(t *testing.T) {
// TODO: Add test case for RequiredDuringSchedulingRequiredDuringExecution after it's implemented.
func TestPodFitsSelector(t *testing.T) {
tests := []struct {
pod *v1.Pod
labels map[string]string
fits bool
test string
pod *v1.Pod
labels map[string]string
nodeName string
fits bool
test string
}{
{
pod: &v1.Pod{},
@ -1341,11 +1360,234 @@ func TestPodFitsSelector(t *testing.T) {
test: "Pod with an Affinity matches node's labels but the PodSpec.NodeSelector(the old thing that we are deprecating) " +
"is not satisfied, won't schedule onto the node",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpNotIn,
Values: []string{"invalid value: ___@#$%^"},
},
},
},
},
},
},
},
},
},
labels: map[string]string{
"foo": "bar",
},
fits: false,
test: "Pod with an invalid value in Affinity term won't be scheduled onto the node",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
},
},
},
},
},
},
},
nodeName: "node_1",
fits: true,
test: "Pod with matchFields using In operator that matches the existing node",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
},
},
},
},
},
},
},
nodeName: "node_2",
fits: false,
test: "Pod with matchFields using In operator that does not match the existing node",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
},
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
},
},
},
},
},
},
},
},
},
nodeName: "node_2",
labels: map[string]string{"foo": "bar"},
fits: true,
test: "Pod with two terms: matchFields does not match, but matchExpressions matches",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
},
},
},
},
},
},
},
},
},
nodeName: "node_2",
labels: map[string]string{"foo": "bar"},
fits: false,
test: "Pod with one term: matchFields does not match, but matchExpressions matches",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"bar"},
},
},
},
},
},
},
},
},
},
nodeName: "node_1",
labels: map[string]string{"foo": "bar"},
fits: true,
test: "Pod with one term: both matchFields and matchExpressions match",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchFields: []v1.NodeSelectorRequirement{
{
Key: algorithm.NodeFieldSelectorKeyNodeName,
Operator: v1.NodeSelectorOpIn,
Values: []string{"node_1"},
},
},
},
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "foo",
Operator: v1.NodeSelectorOpIn,
Values: []string{"not-match-to-bar"},
},
},
},
},
},
},
},
},
},
nodeName: "node_2",
labels: map[string]string{"foo": "bar"},
fits: false,
test: "Pod with two terms: both matchFields and matchExpressions do not match",
},
}
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrNodeSelectorNotMatch}
for _, test := range tests {
node := v1.Node{ObjectMeta: metav1.ObjectMeta{Labels: test.labels}}
node := v1.Node{ObjectMeta: metav1.ObjectMeta{
Name: test.nodeName,
Labels: test.labels,
}}
nodeInfo := schedulercache.NewNodeInfo()
nodeInfo.SetNode(&node)
@ -1591,425 +1833,6 @@ func TestServiceAffinity(t *testing.T) {
}
}
func TestEBSVolumeCountConflicts(t *testing.T) {
oneVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp"},
},
},
},
},
}
ebsPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol",
},
},
},
},
},
}
splitPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someNonEBSVol",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol",
},
},
},
},
},
}
twoVolPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "tvp1"},
},
},
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "tvp2"},
},
},
},
},
}
splitVolsPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{},
},
},
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "svp"},
},
},
},
},
}
nonApplicablePod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{},
},
},
},
},
}
deletedPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "deletedPVC",
},
},
},
},
},
}
twoDeletedPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "deletedPVC",
},
},
},
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherDeletedPVC",
},
},
},
},
},
}
deletedPVPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "pvcWithDeletedPV",
},
},
},
},
},
}
// deletedPVPod2 is a different pod than deletedPVPod but using the same PVC
deletedPVPod2 := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "pvcWithDeletedPV",
},
},
},
},
},
}
// anotherDeletedPVPod is a different pod than deletedPVPod and uses another PVC
anotherDeletedPVPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherPVCWithDeletedPV",
},
},
},
},
},
}
emptyPod := &v1.Pod{
Spec: v1.PodSpec{},
}
unboundPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "unboundPVC",
},
},
},
},
},
}
// Different pod than unboundPVCPod, but using the same unbound PVC
unboundPVCPod2 := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "unboundPVC",
},
},
},
},
},
}
// pod with unbound PVC that's different to unboundPVC
anotherUnboundPVCPod := &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: "anotherUnboundPVC",
},
},
},
},
},
}
tests := []struct {
newPod *v1.Pod
existingPods []*v1.Pod
maxVols int
fits bool
test string
}{
{
newPod: oneVolPod,
existingPods: []*v1.Pod{twoVolPod, oneVolPod},
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pod's EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod},
maxVols: 2,
fits: false,
test: "doesn't fit when node capacity < new pod's EBS volumes",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{twoVolPod},
maxVols: 3,
fits: true,
test: "new pod's count ignores non-EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
maxVols: 3,
fits: true,
test: "existing pods' counts ignore non-EBS volumes",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{splitVolsPod, nonApplicablePod, emptyPod},
maxVols: 3,
fits: true,
test: "new pod's count considers PVCs backed by EBS volumes",
},
{
newPod: splitPVCPod,
existingPods: []*v1.Pod{splitVolsPod, oneVolPod},
maxVols: 3,
fits: true,
test: "new pod's count ignores PVCs not backed by EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, ebsPVCPod},
maxVols: 3,
fits: false,
test: "existing pods' counts considers PVCs backed by EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*v1.Pod{oneVolPod, twoVolPod, ebsPVCPod},
maxVols: 4,
fits: true,
test: "already-mounted EBS volumes are always ok to allow",
},
{
newPod: splitVolsPod,
existingPods: []*v1.Pod{oneVolPod, oneVolPod, ebsPVCPod},
maxVols: 3,
fits: true,
test: "the same EBS volumes are not counted multiple times",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
maxVols: 2,
fits: false,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVCPod},
maxVols: 3,
fits: true,
test: "pod with missing PVC is counted towards the PV limit",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, twoDeletedPVCPod},
maxVols: 3,
fits: false,
test: "pod with missing two PVCs is counted towards the PV limit twice",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
maxVols: 2,
fits: false,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
maxVols: 3,
fits: true,
test: "pod with missing PV is counted towards the PV limit",
},
{
newPod: deletedPVPod2,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
maxVols: 2,
fits: true,
test: "two pods missing the same PV are counted towards the PV limit only once",
},
{
newPod: anotherDeletedPVPod,
existingPods: []*v1.Pod{oneVolPod, deletedPVPod},
maxVols: 2,
fits: false,
test: "two pods missing different PVs are counted towards the PV limit twice",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
maxVols: 2,
fits: false,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: ebsPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
maxVols: 3,
fits: true,
test: "pod with unbound PVC is counted towards the PV limit",
},
{
newPod: unboundPVCPod2,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
maxVols: 2,
fits: true,
test: "the same unbound PVC in multiple pods is counted towards the PV limit only once",
},
{
newPod: anotherUnboundPVCPod,
existingPods: []*v1.Pod{oneVolPod, unboundPVCPod},
maxVols: 2,
fits: false,
test: "two different unbound PVCs are counted towards the PV limit as two volumes",
},
}
pvInfo := FakePersistentVolumeInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "someEBSVol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ebsVol"},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNonEBSVol"},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeSource: v1.PersistentVolumeSource{},
},
},
}
pvcInfo := FakePersistentVolumeClaimInfo{
{
ObjectMeta: metav1.ObjectMeta{Name: "someEBSVol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someEBSVol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "someNonEBSVol"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "someNonEBSVol"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pvcWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "pvcWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherPVCWithDeletedPV"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: "anotherPVCWithDeletedPV"},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "unboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "anotherUnboundPVC"},
Spec: v1.PersistentVolumeClaimSpec{VolumeName: ""},
},
}
expectedFailureReasons := []algorithm.PredicateFailureReason{ErrMaxVolumeCountExceeded}
for _, test := range tests {
os.Setenv(KubeMaxPDVols, strconv.Itoa(test.maxVols))
pred := NewMaxPDVolumeCountPredicate(EBSVolumeFilterType, pvInfo, pvcInfo)
fits, reasons, err := pred(test.newPod, PredicateMetadata(test.newPod, nil), schedulercache.NewNodeInfo(test.existingPods...))
if err != nil {
t.Errorf("%s: unexpected error: %v", test.test, err)
}
if !fits && !reflect.DeepEqual(reasons, expectedFailureReasons) {
t.Errorf("%s: unexpected failure reasons: %v, want: %v", test.test, reasons, expectedFailureReasons)
}
if fits != test.fits {
t.Errorf("%s: expected %v, got %v", test.test, test.fits, fits)
}
}
}
func newPodWithPort(hostPorts ...int) *v1.Pod {
networkPorts := []v1.ContainerPort{}
for _, port := range hostPorts {
@ -2042,7 +1865,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)},
},
fits: true,
wErr: nil,
@ -2054,7 +1877,7 @@ func TestRunGeneralPredicates(t *testing.T) {
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 19})),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -2064,34 +1887,6 @@ func TestRunGeneralPredicates(t *testing.T) {
},
test: "not enough cpu and memory resource",
},
{
pod: &v1.Pod{},
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 9, Memory: 19})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: true,
wErr: nil,
test: "no resources/port/host requested always fits on GPU machine",
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 1})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: false,
wErr: nil,
reasons: []algorithm.PredicateFailureReason{NewInsufficientResourceError(v1.ResourceNvidiaGPU, 1, 1, 1)},
test: "not enough GPU resource",
},
{
pod: newResourcePod(schedulercache.Resource{MilliCPU: 3, Memory: 1, NvidiaGPU: 1}),
nodeInfo: schedulercache.NewNodeInfo(
newResourcePod(schedulercache.Resource{MilliCPU: 5, Memory: 10, NvidiaGPU: 0})),
node: &v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 1, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 1, 32, 0, 0, 0)}},
fits: true,
wErr: nil,
test: "enough GPU resource",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
@ -2101,7 +1896,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -2113,7 +1908,7 @@ func TestRunGeneralPredicates(t *testing.T) {
nodeInfo: schedulercache.NewNodeInfo(newPodWithPort(123)),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: "machine1"},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 0, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 0, 32, 0, 0, 0)},
Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 0, 0, 0).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 0, 0, 0)},
},
fits: false,
wErr: nil,
@ -2803,7 +2598,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
},
},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelA}},
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: podLabelA}},
},
nodes: []v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
@ -2875,7 +2670,8 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"foo": "bar",
"foo": "bar",
"service": "securityscan",
},
},
Spec: v1.PodSpec{
@ -2894,12 +2690,24 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
},
TopologyKey: "zone",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
},
pods: []*v1.Pod{},
pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: map[string]string{"foo": "bar"}}}},
nodes: []v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"zone": "az1", "hostname": "h1"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"zone": "az2", "hostname": "h2"}}},
@ -2949,6 +2757,55 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
},
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB.",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"abc"},
},
},
},
TopologyKey: "region",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "abc", "service": "securityscan"}}},
},
nodes: []v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}},
},
nodesExpectAffinityFailureReasons: [][]algorithm.PredicateFailureReason{{ErrPodAffinityNotMatch, ErrPodAntiAffinityRulesNotMatch}},
fits: map[string]bool{
"nodeA": false,
"nodeB": true,
},
test: "This test ensures that anti-affinity matches a pod when all terms of the anti-affinity rule matches a pod.",
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
@ -2986,7 +2843,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
"nodeB": false,
"nodeC": true,
},
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB but can be schedulerd onto nodeC",
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB but can be scheduled onto nodeC",
},
{
pod: &v1.Pod{
@ -3134,7 +2991,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
"nodeB": false,
"nodeC": true,
},
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA, nodeB, but can be schedulerd onto nodeC (NodeC has an existing pod that match the inter pod affinity rule but in different namespace)",
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA, nodeB, but can be scheduled onto nodeC (NodeC has an existing pod that match the inter pod affinity rule but in different namespace)",
},
}
@ -3142,7 +2999,8 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
for indexTest, test := range tests {
nodeListInfo := FakeNodeListInfo(test.nodes)
for indexNode, node := range test.nodes {
nodeInfoMap := make(map[string]*schedulercache.NodeInfo)
for i, node := range test.nodes {
var podsOnNode []*v1.Pod
for _, pod := range test.pods {
if pod.Spec.NodeName == node.Name {
@ -3150,21 +3008,23 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
}
}
nodeInfo := schedulercache.NewNodeInfo(podsOnNode...)
nodeInfo.SetNode(&test.nodes[i])
nodeInfoMap[node.Name] = nodeInfo
}
for indexNode, node := range test.nodes {
testFit := PodAffinityChecker{
info: nodeListInfo,
podLister: schedulertesting.FakePodLister(test.pods),
}
nodeInfo := schedulercache.NewNodeInfo(podsOnNode...)
nodeInfo.SetNode(&node)
nodeInfoMap := map[string]*schedulercache.NodeInfo{node.Name: nodeInfo}
var meta algorithm.PredicateMetadata
if !test.nometa {
meta = PredicateMetadata(test.pod, nodeInfoMap)
}
fits, reasons, _ := testFit.InterPodAffinityMatches(test.pod, meta, nodeInfo)
fits, reasons, _ := testFit.InterPodAffinityMatches(test.pod, meta, nodeInfoMap[node.Name])
if !fits && !reflect.DeepEqual(reasons, test.nodesExpectAffinityFailureReasons[indexNode]) {
t.Errorf("index: %d test: %s unexpected failure reasons: %v expect: %v", indexTest, test.test, reasons, test.nodesExpectAffinityFailureReasons[indexNode])
}
@ -3423,7 +3283,7 @@ func TestPodSchedulesOnNodeWithMemoryPressureCondition(t *testing.T) {
ImagePullPolicy: "Always",
// at least one requirement -> burstable pod
Resources: v1.ResourceRequirements{
Requests: makeAllocatableResources(100, 100, 100, 100, 0, 0, 0),
Requests: makeAllocatableResources(100, 100, 100, 0, 0, 0),
},
},
},

View File

@ -17,12 +17,8 @@ limitations under the License.
package predicates
import (
"github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
@ -70,69 +66,6 @@ func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
return labels.Set(aL).AsSelector()
}
// EquivalencePodGenerator is a generator of equivalence class for pod with consideration of PVC info.
type EquivalencePodGenerator struct {
pvcInfo PersistentVolumeClaimInfo
}
// NewEquivalencePodGenerator returns a getEquivalencePod method with consideration of PVC info.
func NewEquivalencePodGenerator(pvcInfo PersistentVolumeClaimInfo) algorithm.GetEquivalencePodFunc {
g := &EquivalencePodGenerator{
pvcInfo: pvcInfo,
}
return g.getEquivalencePod
}
// GetEquivalencePod returns a EquivalencePod which contains a group of pod attributes which can be reused.
func (e *EquivalencePodGenerator) getEquivalencePod(pod *v1.Pod) interface{} {
// For now we only consider pods:
// 1. OwnerReferences is Controller
// 2. with same OwnerReferences
// 3. with same PVC claim
// to be equivalent
for _, ref := range pod.OwnerReferences {
if ref.Controller != nil && *ref.Controller {
pvcSet, err := e.getPVCSet(pod)
if err == nil {
// A pod can only belongs to one controller, so let's return.
return &EquivalencePod{
ControllerRef: ref,
PVCSet: pvcSet,
}
}
// If error encountered, log warning and return nil (i.e. no equivalent pod found)
glog.Warningf("[EquivalencePodGenerator] for pod: %v failed due to: %v", pod.GetName(), err)
return nil
}
}
return nil
}
// getPVCSet returns a set of PVC UIDs of given pod.
func (e *EquivalencePodGenerator) getPVCSet(pod *v1.Pod) (sets.String, error) {
result := sets.NewString()
for _, volume := range pod.Spec.Volumes {
if volume.PersistentVolumeClaim == nil {
continue
}
pvcName := volume.PersistentVolumeClaim.ClaimName
pvc, err := e.pvcInfo.GetPersistentVolumeClaimInfo(pod.GetNamespace(), pvcName)
if err != nil {
return nil, err
}
result.Insert(string(pvc.UID))
}
return result, nil
}
// EquivalencePod is a group of pod attributes which can be reused as equivalence to schedule other pods.
type EquivalencePod struct {
ControllerRef metav1.OwnerReference
PVCSet sets.String
}
// portsConflict check whether existingPorts and wantPorts conflict with each other
// return true if we have a conflict
func portsConflict(existingPorts schedutil.HostPortInfo, wantPorts []*v1.ContainerPort) bool {

View File

@ -19,6 +19,7 @@ go_library(
"node_label.go",
"node_prefer_avoid_pods.go",
"reduce.go",
"requested_to_capacity_ratio.go",
"resource_allocation.go",
"resource_limits.go",
"selector_spreading.go",
@ -28,18 +29,21 @@ go_library(
importpath = "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities",
deps = [
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/util/node:go_default_library",
"//pkg/util/parsers:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/labels:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/util/workqueue:go_default_library",
],
)
@ -56,22 +60,27 @@ go_test(
"node_affinity_test.go",
"node_label_test.go",
"node_prefer_avoid_pods_test.go",
"requested_to_capacity_ratio_test.go",
"resource_limits_test.go",
"selector_spreading_test.go",
"taint_toleration_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/features:go_default_library",
"//pkg/kubelet/apis:go_default_library",
"//pkg/scheduler/algorithm/priorities/util:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/scheduler/schedulercache:go_default_library",
"//pkg/scheduler/cache:go_default_library",
"//pkg/scheduler/testing:go_default_library",
"//pkg/util/parsers:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/k8s.io/api/apps/v1beta1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

View File

@ -19,8 +19,10 @@ package priorities
import (
"math"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
var (
@ -36,17 +38,31 @@ var (
BalancedResourceAllocationMap = balancedResourcePriority.PriorityMap
)
func balancedResourceScorer(requested, allocable *schedulercache.Resource) int64 {
func balancedResourceScorer(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
cpuFraction := fractionOfCapacity(requested.MilliCPU, allocable.MilliCPU)
memoryFraction := fractionOfCapacity(requested.Memory, allocable.Memory)
// This to find a node which has most balanced CPU, memory and volume usage.
if includeVolumes && utilfeature.DefaultFeatureGate.Enabled(features.BalanceAttachedNodeVolumes) && allocatableVolumes > 0 {
volumeFraction := float64(requestedVolumes) / float64(allocatableVolumes)
if cpuFraction >= 1 || memoryFraction >= 1 || volumeFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
return 0
}
// Compute variance for all the three fractions.
mean := (cpuFraction + memoryFraction + volumeFraction) / float64(3)
variance := float64((((cpuFraction - mean) * (cpuFraction - mean)) + ((memoryFraction - mean) * (memoryFraction - mean)) + ((volumeFraction - mean) * (volumeFraction - mean))) / float64(3))
// Since the variance is between positive fractions, it will be positive fraction. 1-variance lets the
// score to be higher for node which has least variance and multiplying it with 10 provides the scaling
// factor needed.
return int64((1 - variance) * float64(schedulerapi.MaxPriority))
}
if cpuFraction >= 1 || memoryFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
return 0
}
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
// respectively. Multilying the absolute value of the difference by 10 scales the value to
// respectively. Multiplying the absolute value of the difference by 10 scales the value to
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)

View File

@ -17,17 +17,118 @@ limitations under the License.
package priorities
import (
"fmt"
"reflect"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// getExistingVolumeCountForNode gets the current number of volumes on node.
func getExistingVolumeCountForNode(pods []*v1.Pod, maxVolumes int) int {
volumeCount := 0
for _, pod := range pods {
volumeCount += len(pod.Spec.Volumes)
}
if maxVolumes-volumeCount > 0 {
return maxVolumes - volumeCount
}
return 0
}
func TestBalancedResourceAllocation(t *testing.T) {
// Enable volumesOnNodeForBalancing to do balanced resource allocation
utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.BalanceAttachedNodeVolumes))
podwithVol1 := v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp"},
},
},
},
NodeName: "machine4",
}
podwithVol2 := v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp1"},
},
},
},
NodeName: "machine4",
}
podwithVol3 := v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("0m"),
v1.ResourceMemory: resource.MustParse("0"),
},
},
},
},
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp1"},
},
},
},
NodeName: "machine4",
}
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
@ -89,12 +190,33 @@ func TestBalancedResourceAllocation(t *testing.T) {
},
},
}
cpuAndMemory3 := v1.PodSpec{
NodeName: "machine3",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
/*
@ -111,7 +233,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, nothing requested",
name: "nothing scheduled, nothing requested",
},
{
/*
@ -128,7 +250,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, resources requested, differently sized machines",
name: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
@ -145,7 +267,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no resources requested, pods scheduled",
name: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -168,7 +290,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}},
test: "no resources requested, pods scheduled with resources",
name: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -191,7 +313,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}},
test: "resources requested, pods scheduled with resources",
name: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -212,7 +334,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
name: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -233,7 +355,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "requested resources exceed node capacity",
name: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -243,22 +365,57 @@ func TestBalancedResourceAllocation(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
name: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Machine4 will be chosen here because it already has a existing volume making the variance
of volume count, CPU usage, memory usage closer.
*/
pod: &v1.Pod{
Spec: v1.PodSpec{
Volumes: []v1.Volume{
{
VolumeSource: v1.VolumeSource{
AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp2"},
},
},
},
},
},
nodes: []*v1.Node{makeNode("machine3", 3500, 40000), makeNode("machine4", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine3", Score: 8}, {Host: "machine4", Score: 9}},
name: "Include volume count on a node for balanced resource allocation",
pods: []*v1.Pod{
{Spec: cpuAndMemory3},
{Spec: podwithVol1},
{Spec: podwithVol2},
{Spec: podwithVol3},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(BalancedResourceAllocationMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
if len(test.pod.Spec.Volumes) > 0 {
maxVolumes := 5
for _, info := range nodeNameToInfo {
info.TransientInfo.TransNodeInfo.AllocatableVolumesCount = getExistingVolumeCountForNode(info.Pods(), maxVolumes)
info.TransientInfo.TransNodeInfo.RequestedVolumes = len(test.pod.Spec.Volumes)
}
}
list, err := priorityFunction(BalancedResourceAllocationMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -18,10 +18,12 @@ package priorities
import (
"fmt"
"strings"
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/util/parsers"
)
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
@ -42,7 +44,7 @@ func ImageLocalityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *scheduler
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
sumSize := totalImageSize(node, pod.Spec.Containers)
sumSize := totalImageSize(nodeInfo, pod.Spec.Containers)
return schedulerapi.HostPriority{
Host: node.Name,
@ -69,20 +71,27 @@ func calculateScoreFromSize(sumSize int64) int {
}
// totalImageSize returns the total image size of all the containers that are already on the node.
func totalImageSize(node *v1.Node, containers []v1.Container) int64 {
imageSizes := make(map[string]int64)
for _, image := range node.Status.Images {
for _, name := range image.Names {
imageSizes[name] = image.SizeBytes
}
}
func totalImageSize(nodeInfo *schedulercache.NodeInfo, containers []v1.Container) int64 {
var total int64
imageSizes := nodeInfo.ImageSizes()
for _, container := range containers {
if size, ok := imageSizes[container.Image]; ok {
if size, ok := imageSizes[normalizedImageName(container.Image)]; ok {
total += size
}
}
return total
}
// normalizedImageName returns the CRI compliant name for a given image.
// TODO: cover the corner cases of missed matches, e.g,
// 1. Using Docker as runtime and docker.io/library/test:tag in pod spec, but only test:tag will present in node status
// 2. Using the implicit registry, i.e., test:tag or library/test:tag in pod spec but only docker.io/library/test:tag
// in node status; note that if users consistently use one registry format, this should not happen.
func normalizedImageName(name string) string {
if strings.LastIndex(name, ":") <= strings.LastIndex(name, "/") {
name = name + ":" + parsers.DefaultImageTag
}
return name
}

View File

@ -17,14 +17,17 @@ limitations under the License.
package priorities
import (
"crypto/sha256"
"reflect"
"sort"
"testing"
"encoding/hex"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/kubernetes/pkg/util/parsers"
)
func TestImageLocalityPriority(t *testing.T) {
@ -65,7 +68,7 @@ func TestImageLocalityPriority(t *testing.T) {
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/40",
"gcr.io/40:" + parsers.DefaultImageTag,
"gcr.io/40:v1",
"gcr.io/40:v1",
},
@ -73,14 +76,14 @@ func TestImageLocalityPriority(t *testing.T) {
},
{
Names: []string{
"gcr.io/140",
"gcr.io/140:" + parsers.DefaultImageTag,
"gcr.io/140:v1",
},
SizeBytes: int64(140 * mb),
},
{
Names: []string{
"gcr.io/2000",
"gcr.io/2000:" + parsers.DefaultImageTag,
},
SizeBytes: int64(2000 * mb),
},
@ -91,13 +94,13 @@ func TestImageLocalityPriority(t *testing.T) {
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/250",
"gcr.io/250:" + parsers.DefaultImageTag,
},
SizeBytes: int64(250 * mb),
},
{
Names: []string{
"gcr.io/10",
"gcr.io/10:" + parsers.DefaultImageTag,
"gcr.io/10:v1",
},
SizeBytes: int64(10 * mb),
@ -110,28 +113,28 @@ func TestImageLocalityPriority(t *testing.T) {
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
// Pod: gcr.io/40 gcr.io/250
// Node1
// Image: gcr.io/40 40MB
// Image: gcr.io/40:latest 40MB
// Score: (40M-23M)/97.7M + 1 = 1
// Node2
// Image: gcr.io/250 250MB
// Image: gcr.io/250:latest 250MB
// Score: (250M-23M)/97.7M + 1 = 3
pod: &v1.Pod{Spec: test40250},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
test: "two images spread on two nodes, prefer the larger image one",
name: "two images spread on two nodes, prefer the larger image one",
},
{
// Pod: gcr.io/40 gcr.io/140
// Node1
// Image: gcr.io/40 40MB, gcr.io/140 140MB
// Image: gcr.io/40:latest 40MB, gcr.io/140:latest 140MB
// Score: (40M+140M-23M)/97.7M + 1 = 2
// Node2
@ -140,37 +143,56 @@ func TestImageLocalityPriority(t *testing.T) {
pod: &v1.Pod{Spec: test40140},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
test: "two images on one node, prefer this node",
name: "two images on one node, prefer this node",
},
{
// Pod: gcr.io/2000 gcr.io/10
// Node1
// Image: gcr.io/2000 2000MB
// Image: gcr.io/2000:latest 2000MB
// Score: 2000 > max score = 10
// Node2
// Image: gcr.io/10 10MB
// Image: gcr.io/10:latest 10MB
// Score: 10 < min score = 0
pod: &v1.Pod{Spec: testMinMax},
nodes: []*v1.Node{makeImageNode("machine1", node401402000), makeImageNode("machine2", node25010)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "if exceed limit, use limit",
name: "if exceed limit, use limit",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
sort.Sort(test.expectedList)
sort.Sort(list)
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}
func TestNormalizedImageName(t *testing.T) {
for _, testCase := range []struct {
Input string
Output string
}{
{Input: "root", Output: "root:latest"},
{Input: "root:tag", Output: "root:tag"},
{Input: "gcr.io:5000/root", Output: "gcr.io:5000/root:latest"},
{Input: "root@" + getImageFakeDigest("root"), Output: "root@" + getImageFakeDigest("root")},
} {
image := normalizedImageName(testCase.Input)
if image != testCase.Output {
t.Errorf("expected image reference: %q, got %q", testCase.Output, image)
}
}
}
@ -181,3 +203,8 @@ func makeImageNode(node string, status v1.NodeStatus) *v1.Node {
Status: status,
}
}
func getImageFakeDigest(fakeContent string) string {
hash := sha256.Sum256([]byte(fakeContent))
return "sha256:" + hex.EncodeToString(hash[:])
}

View File

@ -27,7 +27,7 @@ import (
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"github.com/golang/glog"
)

View File

@ -24,7 +24,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
@ -267,7 +267,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
@ -277,7 +277,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "all machines are same priority as Affinity is nil",
name: "all machines are same priority as Affinity is nil",
},
// the node(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score
// the node(machine3) that don't have the label {"region": "whatever the value is"} (mismatch the topology key) but that have existing pods that match the labelSelector get low score
@ -295,7 +295,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "Affinity: pod that matches topology key & pods in nodes will get high score comparing to others" +
name: "Affinity: pod that matches topology key & pods in nodes will get high score comparing to others" +
"which doesn't match either pods in nodes or in topology key",
},
// the node1(machine1) that have the label {"region": "China"} (match the topology key) and that have existing pods that match the labelSelector get high score
@ -313,7 +313,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "All the nodes that have the same topology key & label value with one of them has an existing pod that match the affinity rules, have the same score",
name: "All the nodes that have the same topology key & label value with one of them has an existing pod that match the affinity rules, have the same score",
},
// there are 2 regions, say regionChina(machine1,machine3,machine4) and regionIndia(machine2,machine5), both regions have nodes that match the preference.
// But there are more nodes(actually more existing pods) in regionChina that match the preference than regionIndia.
@ -337,7 +337,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 5}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: schedulerapi.MaxPriority}, {Host: "machine5", Score: 5}},
test: "Affinity: nodes in one region has more matching pods comparing to other reqion, so the region which has more macthes will get high score",
name: "Affinity: nodes in one region has more matching pods comparing to other reqion, so the region which has more macthes will get high score",
},
// Test with the different operators and values for pod affinity scheduling preference, including some match failures.
{
@ -353,7 +353,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity: different Label operators and values for pod affinity scheduling preference, including some match failures ",
name: "Affinity: different Label operators and values for pod affinity scheduling preference, including some match failures ",
},
// Test the symmetry cases for affinity, the difference between affinity and symmetry is not the pod wants to run together with some existing pods,
// but the existing pods have the inter pod affinity preference while the pod to schedule satisfy the preference.
@ -369,7 +369,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity symmetry: considred only the preferredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
name: "Affinity symmetry: considred only the preferredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
@ -383,7 +383,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Affinity symmetry: considred RequiredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
name: "Affinity symmetry: considred RequiredDuringSchedulingIgnoredDuringExecution in pod affinity symmetry",
},
// The pod to schedule prefer to stay away from some existing pods at node level using the pod anti affinity.
@ -403,7 +403,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: pod that doesnot match existing pods in node will get high score ",
name: "Anti Affinity: pod that doesnot match existing pods in node will get high score ",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
@ -416,7 +416,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: pod that does not matches topology key & matches the pods in nodes will get higher score comparing to others ",
name: "Anti Affinity: pod that does not matches topology key & matches the pods in nodes will get higher score comparing to others ",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: "", Affinity: awayFromS1InAz}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
@ -430,7 +430,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity: one node has more matching pods comparing to other node, so the node which has more unmacthes will get high score",
name: "Anti Affinity: one node has more matching pods comparing to other node, so the node which has more unmacthes will get high score",
},
// Test the symmetry cases for anti affinity
{
@ -444,7 +444,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "Anti Affinity symmetry: the existing pods in node which has anti affinity match will get high score",
name: "Anti Affinity symmetry: the existing pods in node which has anti affinity match will get high score",
},
// Test both affinity and anti-affinity
{
@ -458,7 +458,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelAzAz1}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "Affinity and Anti Affinity: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity",
name: "Affinity and Anti Affinity: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity",
},
// Combined cases considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels (they are in the same RC/service),
// the pod prefer to run together with its brother pods in the same region, but wants to stay away from them at node level,
@ -483,7 +483,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: labelRgIndia}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 4}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: schedulerapi.MaxPriority}, {Host: "machine5", Score: 4}},
test: "Affinity and Anti Affinity: considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels",
name: "Affinity and Anti Affinity: considering both affinity and anti-affinity, the pod to schedule and existing pods have the same labels",
},
// Consider Affinity, Anti Affinity and symmetry together.
// for Affinity, the weights are: 8, 0, 0, 0
@ -505,24 +505,26 @@ func TestInterPodAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine4", Labels: labelAzAz2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: 0}},
test: "Affinity and Anti Affinity and symmetry: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity & symmetry",
name: "Affinity and Anti Affinity and symmetry: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity & symmetry",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
interPodAffinity := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
}
list, err := interPodAffinity.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected \n\t%#v, \ngot \n\t%#v\n", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
interPodAffinity := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
}
list, err := interPodAffinity.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected \n\t%#v, \ngot \n\t%#v\n", test.expectedList, list)
}
})
}
}
@ -563,7 +565,7 @@ func TestHardPodAffinitySymmetricWeight(t *testing.T) {
nodes []*v1.Node
hardPodAffinityWeight int32
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelServiceS1}},
@ -578,7 +580,7 @@ func TestHardPodAffinitySymmetricWeight(t *testing.T) {
},
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: 0}},
test: "Hard Pod Affinity symmetry: hard pod affinity symmetry weights 1 by default, then nodes that match the hard pod affinity symmetry rules, get a high score",
name: "Hard Pod Affinity symmetry: hard pod affinity symmetry weights 1 by default, then nodes that match the hard pod affinity symmetry rules, get a high score",
},
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelServiceS1}},
@ -593,23 +595,25 @@ func TestHardPodAffinitySymmetricWeight(t *testing.T) {
},
hardPodAffinityWeight: 0,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "Hard Pod Affinity symmetry: hard pod affinity symmetry is closed(weights 0), then nodes that match the hard pod affinity symmetry rules, get same score with those not match",
name: "Hard Pod Affinity symmetry: hard pod affinity symmetry is closed(weights 0), then nodes that match the hard pod affinity symmetry rules, get same score with those not match",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
ipa := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: test.hardPodAffinityWeight,
}
list, err := ipa.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected \n\t%#v, \ngot \n\t%#v\n", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
ipa := InterPodAffinity{
info: FakeNodeListInfo(test.nodes),
nodeLister: schedulertesting.FakeNodeLister(test.nodes),
podLister: schedulertesting.FakePodLister(test.pods),
hardPodAffinityWeight: test.hardPodAffinityWeight,
}
list, err := ipa.CalculateInterPodAffinityPriority(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected \n\t%#v, \ngot \n\t%#v\n", test.expectedList, list)
}
})
}
}

View File

@ -18,7 +18,7 @@ package priorities
import (
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
var (
@ -33,7 +33,7 @@ var (
LeastRequestedPriorityMap = leastResourcePriority.PriorityMap
)
func leastResourceScorer(requested, allocable *schedulercache.Resource) int64 {
func leastResourceScorer(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
return (leastRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
leastRequestedScore(requested.Memory, allocable.Memory)) / 2
}

View File

@ -24,7 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestLeastRequested(t *testing.T) {
@ -94,7 +94,7 @@ func TestLeastRequested(t *testing.T) {
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
/*
@ -111,7 +111,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled, nothing requested",
name: "nothing scheduled, nothing requested",
},
{
/*
@ -128,7 +128,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
name: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
@ -145,7 +145,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no resources requested, pods scheduled",
name: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -168,7 +168,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}},
test: "no resources requested, pods scheduled with resources",
name: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -191,7 +191,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}},
test: "resources requested, pods scheduled with resources",
name: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -212,7 +212,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
name: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -233,7 +233,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}},
test: "requested resources exceed node capacity",
name: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -243,7 +243,7 @@ func TestLeastRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
name: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -252,13 +252,15 @@ func TestLeastRequested(t *testing.T) {
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(LeastRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(LeastRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -22,7 +22,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// PriorityMetadataFactory is a factory to produce PriorityMetadata.

View File

@ -26,7 +26,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
@ -117,13 +117,13 @@ func TestPriorityMetadata(t *testing.T) {
}
tests := []struct {
pod *v1.Pod
test string
name string
expected interface{}
}{
{
pod: nil,
expected: nil,
test: "pod is nil , priorityMetadata is nil",
name: "pod is nil , priorityMetadata is nil",
},
{
pod: podWithTolerationsAndAffinity,
@ -132,7 +132,7 @@ func TestPriorityMetadata(t *testing.T) {
podTolerations: tolerations,
affinity: podAffinity,
},
test: "Produce a priorityMetadata with default requests",
name: "Produce a priorityMetadata with default requests",
},
{
pod: podWithTolerationsAndRequests,
@ -141,7 +141,7 @@ func TestPriorityMetadata(t *testing.T) {
podTolerations: tolerations,
affinity: nil,
},
test: "Produce a priorityMetadata with specified requests",
name: "Produce a priorityMetadata with specified requests",
},
{
pod: podWithAffinityAndRequests,
@ -150,7 +150,7 @@ func TestPriorityMetadata(t *testing.T) {
podTolerations: nil,
affinity: podAffinity,
},
test: "Produce a priorityMetadata with specified requests",
name: "Produce a priorityMetadata with specified requests",
},
}
mataDataProducer := NewPriorityMetadataFactory(
@ -159,9 +159,11 @@ func TestPriorityMetadata(t *testing.T) {
schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
for _, test := range tests {
ptData := mataDataProducer(test.pod, nil)
if !reflect.DeepEqual(test.expected, ptData) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expected, ptData)
}
t.Run(test.name, func(t *testing.T) {
ptData := mataDataProducer(test.pod, nil)
if !reflect.DeepEqual(test.expected, ptData) {
t.Errorf("expected %#v, got %#v", test.expected, ptData)
}
})
}
}

View File

@ -18,7 +18,7 @@ package priorities
import (
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
var (
@ -31,7 +31,7 @@ var (
MostRequestedPriorityMap = mostResourcePriority.PriorityMap
)
func mostResourceScorer(requested, allocable *schedulercache.Resource) int64 {
func mostResourceScorer(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
return (mostRequestedScore(requested.MilliCPU, allocable.MilliCPU) +
mostRequestedScore(requested.Memory, allocable.Memory)) / 2
}

View File

@ -24,7 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestMostRequested(t *testing.T) {
@ -109,7 +109,7 @@ func TestMostRequested(t *testing.T) {
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
/*
@ -126,7 +126,7 @@ func TestMostRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "nothing scheduled, nothing requested",
name: "nothing scheduled, nothing requested",
},
{
/*
@ -143,7 +143,7 @@ func TestMostRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
name: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
@ -160,7 +160,7 @@ func TestMostRequested(t *testing.T) {
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}},
test: "no resources requested, pods scheduled with resources",
name: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -183,7 +183,7 @@ func TestMostRequested(t *testing.T) {
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}},
test: "resources requested, pods scheduled with resources",
name: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
@ -204,18 +204,20 @@ func TestMostRequested(t *testing.T) {
pod: &v1.Pod{Spec: bigCPUAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 10000, 8000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 2}},
test: "resources requested with more than the node, pods scheduled with resources",
name: "resources requested with more than the node, pods scheduled with resources",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(MostRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(MostRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -23,7 +23,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// CalculateNodeAffinityPriorityMap prioritizes nodes according to node affinity scheduling preferences
@ -37,12 +37,11 @@ func CalculateNodeAffinityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *s
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var affinity *v1.Affinity
// default is the podspec.
affinity := pod.Spec.Affinity
if priorityMeta, ok := meta.(*priorityMetadata); ok {
// We were able to parse metadata, use affinity from there.
affinity = priorityMeta.affinity
} else {
// We couldn't parse metadata - fallback to the podspec.
affinity = pod.Spec.Affinity
}
var count int32

View File

@ -23,7 +23,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestNodeAffinityPriority(t *testing.T) {
@ -105,7 +105,7 @@ func TestNodeAffinityPriority(t *testing.T) {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: &v1.Pod{
@ -119,7 +119,7 @@ func TestNodeAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "all machines are same priority as NodeAffinity is nil",
name: "all machines are same priority as NodeAffinity is nil",
},
{
pod: &v1.Pod{
@ -133,7 +133,7 @@ func TestNodeAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "no machine macthes preferred scheduling requirements in NodeAffinity of pod so all machines' priority is zero",
name: "no machine macthes preferred scheduling requirements in NodeAffinity of pod so all machines' priority is zero",
},
{
pod: &v1.Pod{
@ -147,7 +147,7 @@ func TestNodeAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "only machine1 matches the preferred scheduling requirements of pod",
name: "only machine1 matches the preferred scheduling requirements of pod",
},
{
pod: &v1.Pod{
@ -161,19 +161,21 @@ func TestNodeAffinityPriority(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine5", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 3}},
test: "all machines matches the preferred scheduling requirements of pod but with different priorities ",
name: "all machines matches the preferred scheduling requirements of pod but with different priorities ",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce, nil)
list, err := nap(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected %#v, \ngot %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce, nil)
list, err := nap(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, \ngot %#v", test.expectedList, list)
}
})
}
}

View File

@ -23,7 +23,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// NodeLabelPrioritizer contains information to calculate node label priority.

View File

@ -24,7 +24,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestNewNodeLabelPriority(t *testing.T) {
@ -36,7 +36,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
label string
presence bool
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
nodes: []*v1.Node{
@ -47,7 +47,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "baz",
presence: true,
test: "no match found, presence true",
name: "no match found, presence true",
},
{
nodes: []*v1.Node{
@ -58,7 +58,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "baz",
presence: false,
test: "no match found, presence false",
name: "no match found, presence false",
},
{
nodes: []*v1.Node{
@ -69,7 +69,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "foo",
presence: true,
test: "one match found, presence true",
name: "one match found, presence true",
},
{
nodes: []*v1.Node{
@ -80,7 +80,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "foo",
presence: false,
test: "one match found, presence false",
name: "one match found, presence false",
},
{
nodes: []*v1.Node{
@ -91,7 +91,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
label: "bar",
presence: true,
test: "two matches found, presence true",
name: "two matches found, presence true",
},
{
nodes: []*v1.Node{
@ -102,25 +102,27 @@ func TestNewNodeLabelPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "bar",
presence: false,
test: "two matches found, presence false",
name: "two matches found, presence false",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
labelPrioritizer := &NodeLabelPrioritizer{
label: test.label,
presence: test.presence,
}
list, err := priorityFunction(labelPrioritizer.CalculateNodeLabelPriorityMap, nil, nil)(nil, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
labelPrioritizer := &NodeLabelPrioritizer{
label: test.label,
presence: test.presence,
}
list, err := priorityFunction(labelPrioritizer.CalculateNodeLabelPriorityMap, nil, nil)(nil, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -24,7 +24,7 @@ import (
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// CalculateNodePreferAvoidPodsPriorityMap priorities nodes according to the node annotation

View File

@ -24,7 +24,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestNodePreferAvoidPriority(t *testing.T) {
@ -84,7 +84,7 @@ func TestNodePreferAvoidPriority(t *testing.T) {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: &v1.Pod{
@ -97,7 +97,7 @@ func TestNodePreferAvoidPriority(t *testing.T) {
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
name: "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
},
{
pod: &v1.Pod{
@ -110,7 +110,7 @@ func TestNodePreferAvoidPriority(t *testing.T) {
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "ownership by random controller should be ignored",
name: "ownership by random controller should be ignored",
},
{
pod: &v1.Pod{
@ -123,7 +123,7 @@ func TestNodePreferAvoidPriority(t *testing.T) {
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "owner without Controller field set should be ignored",
name: "owner without Controller field set should be ignored",
},
{
pod: &v1.Pod{
@ -136,21 +136,23 @@ func TestNodePreferAvoidPriority(t *testing.T) {
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: schedulerapi.MaxPriority}},
test: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
name: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -20,7 +20,7 @@ import (
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// NormalizeReduce generates a PriorityReduceFunction that can normalize the result

View File

@ -0,0 +1,141 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// FunctionShape represents shape of scoring function.
// For safety use NewFunctionShape which performs precondition checks for struct creation.
type FunctionShape []FunctionShapePoint
// FunctionShapePoint represents single point in scoring function shape.
type FunctionShapePoint struct {
// Utilization is function argument.
Utilization int64
// Score is function value.
Score int64
}
var (
// give priority to least utilized nodes by default
defaultFunctionShape, _ = NewFunctionShape([]FunctionShapePoint{{0, 10}, {100, 0}})
)
const (
minUtilization = 0
maxUtilization = 100
minScore = 0
maxScore = schedulerapi.MaxPriority
)
// NewFunctionShape creates instance of FunctionShape in a safe way performing all
// necessary sanity checks.
func NewFunctionShape(points []FunctionShapePoint) (FunctionShape, error) {
n := len(points)
if n == 0 {
return nil, fmt.Errorf("at least one point must be specified")
}
for i := 1; i < n; i++ {
if points[i-1].Utilization >= points[i].Utilization {
return nil, fmt.Errorf("utilization values must be sorted. Utilization[%d]==%d >= Utilization[%d]==%d", i-1, points[i-1].Utilization, i, points[i].Utilization)
}
}
for i, point := range points {
if point.Utilization < minUtilization {
return nil, fmt.Errorf("utilization values must not be less than %d. Utilization[%d]==%d", minUtilization, i, point.Utilization)
}
if point.Utilization > maxUtilization {
return nil, fmt.Errorf("utilization values must not be greater than %d. Utilization[%d]==%d", maxUtilization, i, point.Utilization)
}
if point.Score < minScore {
return nil, fmt.Errorf("score values must not be less than %d. Score[%d]==%d", minScore, i, point.Score)
}
if point.Score > maxScore {
return nil, fmt.Errorf("score valuses not be greater than %d. Score[%d]==%d", maxScore, i, point.Score)
}
}
// We make defensive copy so we make no assumption if array passed as argument is not changed afterwards
pointsCopy := make(FunctionShape, n)
copy(pointsCopy, points)
return pointsCopy, nil
}
// RequestedToCapacityRatioResourceAllocationPriorityDefault creates a requestedToCapacity based
// ResourceAllocationPriority using default resource scoring function shape.
// The default function assigns 1.0 to resource when all capacity is available
// and 0.0 when requested amount is equal to capacity.
func RequestedToCapacityRatioResourceAllocationPriorityDefault() *ResourceAllocationPriority {
return RequestedToCapacityRatioResourceAllocationPriority(defaultFunctionShape)
}
// RequestedToCapacityRatioResourceAllocationPriority creates a requestedToCapacity based
// ResourceAllocationPriority using provided resource scoring function shape.
func RequestedToCapacityRatioResourceAllocationPriority(scoringFunctionShape FunctionShape) *ResourceAllocationPriority {
return &ResourceAllocationPriority{"RequestedToCapacityRatioResourceAllocationPriority", buildRequestedToCapacityRatioScorerFunction(scoringFunctionShape)}
}
func buildRequestedToCapacityRatioScorerFunction(scoringFunctionShape FunctionShape) func(*schedulercache.Resource, *schedulercache.Resource, bool, int, int) int64 {
rawScoringFunction := buildBrokenLinearFunction(scoringFunctionShape)
resourceScoringFunction := func(requested, capacity int64) int64 {
if capacity == 0 || requested > capacity {
return rawScoringFunction(maxUtilization)
}
return rawScoringFunction(maxUtilization - (capacity-requested)*maxUtilization/capacity)
}
return func(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64 {
cpuScore := resourceScoringFunction(requested.MilliCPU, allocable.MilliCPU)
memoryScore := resourceScoringFunction(requested.Memory, allocable.Memory)
return (cpuScore + memoryScore) / 2
}
}
// Creates a function which is built using linear segments. Segments are defined via shape array.
// Shape[i].Utilization slice represents points on "utilization" axis where different segments meet.
// Shape[i].Score represents function values at meeting points.
//
// function f(p) is defined as:
// shape[0].Score for p < f[0].Utilization
// shape[i].Score for p == shape[i].Utilization
// shape[n-1].Score for p > shape[n-1].Utilization
// and linear between points (p < shape[i].Utilization)
func buildBrokenLinearFunction(shape FunctionShape) func(int64) int64 {
n := len(shape)
return func(p int64) int64 {
for i := 0; i < n; i++ {
if p <= shape[i].Utilization {
if i == 0 {
return shape[0].Score
}
return shape[i-1].Score + (shape[i].Score-shape[i-1].Score)*(p-shape[i-1].Utilization)/(shape[i].Utilization-shape[i-1].Utilization)
}
}
return shape[n-1].Score
}
}

View File

@ -0,0 +1,241 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestCreatingFunctionShapeErrorsIfEmptyPoints(t *testing.T) {
var err error
_, err = NewFunctionShape([]FunctionShapePoint{})
assert.Equal(t, "at least one point must be specified", err.Error())
}
func TestCreatingFunctionShapeErrorsIfXIsNotSorted(t *testing.T) {
var err error
_, err = NewFunctionShape([]FunctionShapePoint{{10, 1}, {15, 2}, {20, 3}, {19, 4}, {25, 5}})
assert.Equal(t, "utilization values must be sorted. Utilization[2]==20 >= Utilization[3]==19", err.Error())
_, err = NewFunctionShape([]FunctionShapePoint{{10, 1}, {20, 2}, {20, 3}, {22, 4}, {25, 5}})
assert.Equal(t, "utilization values must be sorted. Utilization[1]==20 >= Utilization[2]==20", err.Error())
}
func TestCreatingFunctionPointNotInAllowedRange(t *testing.T) {
var err error
_, err = NewFunctionShape([]FunctionShapePoint{{-1, 0}, {100, 10}})
assert.Equal(t, "utilization values must not be less than 0. Utilization[0]==-1", err.Error())
_, err = NewFunctionShape([]FunctionShapePoint{{0, 0}, {101, 10}})
assert.Equal(t, "utilization values must not be greater than 100. Utilization[1]==101", err.Error())
_, err = NewFunctionShape([]FunctionShapePoint{{0, -1}, {100, 10}})
assert.Equal(t, "score values must not be less than 0. Score[0]==-1", err.Error())
_, err = NewFunctionShape([]FunctionShapePoint{{0, 0}, {100, 11}})
assert.Equal(t, "score valuses not be greater than 10. Score[1]==11", err.Error())
}
func TestBrokenLinearFunction(t *testing.T) {
type Assertion struct {
p int64
expected int64
}
type Test struct {
points []FunctionShapePoint
assertions []Assertion
}
tests := []Test{
{
points: []FunctionShapePoint{{10, 1}, {90, 9}},
assertions: []Assertion{
{p: -10, expected: 1},
{p: 0, expected: 1},
{p: 9, expected: 1},
{p: 10, expected: 1},
{p: 15, expected: 1},
{p: 19, expected: 1},
{p: 20, expected: 2},
{p: 89, expected: 8},
{p: 90, expected: 9},
{p: 99, expected: 9},
{p: 100, expected: 9},
{p: 110, expected: 9},
},
},
{
points: []FunctionShapePoint{{0, 2}, {40, 10}, {100, 0}},
assertions: []Assertion{
{p: -10, expected: 2},
{p: 0, expected: 2},
{p: 20, expected: 6},
{p: 30, expected: 8},
{p: 40, expected: 10},
{p: 70, expected: 5},
{p: 100, expected: 0},
{p: 110, expected: 0},
},
},
{
points: []FunctionShapePoint{{0, 2}, {40, 2}, {100, 2}},
assertions: []Assertion{
{p: -10, expected: 2},
{p: 0, expected: 2},
{p: 20, expected: 2},
{p: 30, expected: 2},
{p: 40, expected: 2},
{p: 70, expected: 2},
{p: 100, expected: 2},
{p: 110, expected: 2},
},
},
}
for _, test := range tests {
functionShape, err := NewFunctionShape(test.points)
assert.Nil(t, err)
function := buildBrokenLinearFunction(functionShape)
for _, assertion := range test.assertions {
assert.InDelta(t, assertion.expected, function(assertion.p), 0.1, "points=%v, p=%f", test.points, assertion.p)
}
}
}
func TestRequestedToCapacityRatio(t *testing.T) {
type resources struct {
cpu int64
mem int64
}
type nodeResources struct {
capacity resources
used resources
}
type test struct {
test string
requested resources
nodes map[string]nodeResources
expectedPriorities schedulerapi.HostPriorityList
}
tests := []test{
{
test: "nothing scheduled, nothing requested (default - least requested nodes have priority)",
requested: resources{0, 0},
nodes: map[string]nodeResources{
"node1": {
capacity: resources{4000, 10000},
used: resources{0, 0},
},
"node2": {
capacity: resources{4000, 10000},
used: resources{0, 0},
},
},
expectedPriorities: []schedulerapi.HostPriority{{Host: "node1", Score: 10}, {Host: "node2", Score: 10}},
},
{
test: "nothing scheduled, resources requested, differently sized machines (default - least requested nodes have priority)",
requested: resources{3000, 5000},
nodes: map[string]nodeResources{
"node1": {
capacity: resources{4000, 10000},
used: resources{0, 0},
},
"node2": {
capacity: resources{6000, 10000},
used: resources{0, 0},
},
},
expectedPriorities: []schedulerapi.HostPriority{{Host: "node1", Score: 4}, {Host: "node2", Score: 5}},
},
{
test: "no resources requested, pods scheduled with resources (default - least requested nodes have priority)",
requested: resources{0, 0},
nodes: map[string]nodeResources{
"node1": {
capacity: resources{4000, 10000},
used: resources{3000, 5000},
},
"node2": {
capacity: resources{6000, 10000},
used: resources{3000, 5000},
},
},
expectedPriorities: []schedulerapi.HostPriority{{Host: "node1", Score: 4}, {Host: "node2", Score: 5}},
},
}
buildResourcesPod := func(node string, requestedResources resources) *v1.Pod {
return &v1.Pod{Spec: v1.PodSpec{
NodeName: node,
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(requestedResources.cpu, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(requestedResources.mem, resource.DecimalSI),
},
},
},
},
},
}
}
for _, test := range tests {
nodeNames := make([]string, 0)
for nodeName := range test.nodes {
nodeNames = append(nodeNames, nodeName)
}
sort.Strings(nodeNames)
nodes := make([]*v1.Node, 0)
for _, nodeName := range nodeNames {
node := test.nodes[nodeName]
nodes = append(nodes, makeNode(nodeName, node.capacity.cpu, node.capacity.mem))
}
scheduledPods := make([]*v1.Pod, 0)
for name, node := range test.nodes {
scheduledPods = append(scheduledPods,
buildResourcesPod(name, node.used))
}
newPod := buildResourcesPod("", test.requested)
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(scheduledPods, nodes)
list, err := priorityFunction(RequestedToCapacityRatioResourceAllocationPriorityDefault().PriorityMap, nil, nil)(newPod, nodeNameToInfo, nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedPriorities, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedPriorities, list)
}
}
}

View File

@ -23,13 +23,13 @@ import (
"k8s.io/api/core/v1"
priorityutil "k8s.io/kubernetes/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// ResourceAllocationPriority contains information to calculate resource allocation priority.
type ResourceAllocationPriority struct {
Name string
scorer func(requested, allocable *schedulercache.Resource) int64
scorer func(requested, allocable *schedulercache.Resource, includeVolumes bool, requestedVolumes int, allocatableVolumes int) int64
}
// PriorityMap priorities nodes according to the resource allocations on the node.
@ -54,8 +54,13 @@ func (r *ResourceAllocationPriority) PriorityMap(
requested.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
requested.Memory += nodeInfo.NonZeroRequest().Memory
score := r.scorer(&requested, &allocatable)
var score int64
// Check if the pod has volumes and this could be added to scorer function for balanced resource allocation.
if len(pod.Spec.Volumes) >= 0 && nodeInfo.TransientInfo != nil {
score = r.scorer(&requested, &allocatable, true, nodeInfo.TransientInfo.TransNodeInfo.RequestedVolumes, nodeInfo.TransientInfo.TransNodeInfo.AllocatableVolumesCount)
} else {
score = r.scorer(&requested, &allocatable, false, 0, 0)
}
if glog.V(10) {
glog.Infof(

View File

@ -20,9 +20,8 @@ import (
"fmt"
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"github.com/golang/glog"
)
@ -93,35 +92,7 @@ func getResourceLimits(pod *v1.Pod) *schedulercache.Resource {
// take max_resource(sum_pod, any_init_container)
for _, container := range pod.Spec.InitContainers {
for rName, rQuantity := range container.Resources.Limits {
switch rName {
case v1.ResourceMemory:
if mem := rQuantity.Value(); mem > result.Memory {
result.Memory = mem
}
case v1.ResourceCPU:
if cpu := rQuantity.MilliValue(); cpu > result.MilliCPU {
result.MilliCPU = cpu
}
// keeping these resources though score computation in other priority functions and in this
// are only computed based on cpu and memory only.
case v1.ResourceEphemeralStorage:
if ephemeralStorage := rQuantity.Value(); ephemeralStorage > result.EphemeralStorage {
result.EphemeralStorage = ephemeralStorage
}
case v1.ResourceNvidiaGPU:
if gpu := rQuantity.Value(); gpu > result.NvidiaGPU {
result.NvidiaGPU = gpu
}
default:
if v1helper.IsScalarResourceName(rName) {
value := rQuantity.Value()
if value > result.ScalarResources[rName] {
result.SetScalar(rName, value)
}
}
}
}
result.SetMaxResource(container.Resources.Limits)
}
return result

View File

@ -24,7 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
//metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func TestResourceLimistPriority(t *testing.T) {
@ -103,49 +103,50 @@ func TestResourceLimistPriority(t *testing.T) {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 0), makeNode("machine3", 0, 10000), makeNode("machine4", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}, {Host: "machine4", Score: 0}},
test: "pod does not specify its resource limits",
name: "pod does not specify its resource limits",
},
{
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 3000, 10000), makeNode("machine2", 2000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 0}},
test: "pod only specifies cpu limits",
name: "pod only specifies cpu limits",
},
{
pod: &v1.Pod{Spec: memOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 1}},
test: "pod only specifies mem limits",
name: "pod only specifies mem limits",
},
{
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 4000), makeNode("machine2", 5000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 1}},
test: "pod specifies both cpu and mem limits",
name: "pod specifies both cpu and mem limits",
},
{
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}},
test: "node does not advertise its allocatables",
name: "node does not advertise its allocatables",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(ResourceLimitsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(ResourceLimitsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -23,7 +23,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
utilnode "k8s.io/kubernetes/pkg/util/node"
"github.com/golang/glog"
@ -211,7 +211,9 @@ func filteredPod(namespace string, selector labels.Selector, nodeInfo *scheduler
return []*v1.Pod{}
}
for _, pod := range nodeInfo.Pods() {
if namespace == pod.Namespace && selector.Matches(labels.Set(pod.Labels)) {
// Ignore pods being deleted for spreading purposes
// Similar to how it is done for SelectorSpreadPriority
if namespace == pod.Namespace && pod.DeletionTimestamp == nil && selector.Matches(labels.Set(pod.Labels)) {
pods = append(pods, pod)
}
}

View File

@ -27,7 +27,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
schedulertesting "k8s.io/kubernetes/pkg/scheduler/testing"
)
@ -64,20 +64,20 @@ func TestSelectorSpreadPriority(t *testing.T) {
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: new(v1.Pod),
nodes: []string{"machine1", "machine2"},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "nothing scheduled",
name: "nothing scheduled",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
pods: []*v1.Pod{{Spec: zone1Spec}},
nodes: []string{"machine1", "machine2"},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "no services",
name: "no services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -85,7 +85,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"key": "value"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
test: "different services",
name: "different services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -96,7 +96,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "two pods, one service pod",
name: "two pods, one service pod",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -110,7 +110,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "five pods, one service pod in no namespace",
name: "five pods, one service pod in no namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
@ -123,7 +123,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}, ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "four pods, one service pod in default namespace",
name: "four pods, one service pod in default namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: "ns1"}},
@ -137,7 +137,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}, ObjectMeta: metav1.ObjectMeta{Namespace: "ns1"}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}},
test: "five pods, one service pod in specific namespace",
name: "five pods, one service pod in specific namespace",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -149,7 +149,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "three pods, two service pods on different machines",
name: "three pods, two service pods on different machines",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -162,7 +162,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: labels1}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 0}},
test: "four pods, three service pods",
name: "four pods, three service pods",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -174,7 +174,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches",
name: "service with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
@ -189,7 +189,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
// "baz=blah" matches both labels1 and labels2, and "foo=bar" matches only labels 1. This means that we assume that we want to
// do spreading between all pods. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replication controller",
name: "service with partial pod label matches with service and replication controller",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
@ -203,7 +203,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replica set",
name: "service with partial pod label matches with service and replica set",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
@ -216,7 +216,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "service with partial pod label matches with service and replica set",
name: "service with partial pod label matches with service and replica set",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
@ -230,7 +230,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
// Taken together Service and Replication Controller should match all Pods, hence result should be equal to one above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replication controller should be treated equally",
name: "disjoined service and replication controller should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
@ -244,7 +244,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replica set should be treated equally",
name: "disjoined service and replica set should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
@ -257,7 +257,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "disjoined service and replica set should be treated equally",
name: "disjoined service and replica set should be treated equally",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
@ -270,7 +270,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
// Both Nodes have one pod from the given RC, hence both get 0 score.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "Replication controller with partial pod label matches",
name: "Replication controller with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
@ -283,7 +283,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "Replica set with partial pod label matches",
name: "Replica set with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
@ -296,7 +296,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}},
// We use StatefulSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "StatefulSet with partial pod label matches",
name: "StatefulSet with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicationController", "name", "abc123")}},
@ -308,7 +308,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
nodes: []string{"machine1", "machine2"},
rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another replication controller with partial pod label matches",
name: "Another replication controller with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("ReplicaSet", "name", "abc123")}},
@ -321,7 +321,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
rss: []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
// We use ReplicaSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another replication set with partial pod label matches",
name: "Another replication set with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, OwnerReferences: controllerRef("StatefulSet", "name", "abc123")}},
@ -334,34 +334,36 @@ func TestSelectorSpreadPriority(t *testing.T) {
sss: []*apps.StatefulSet{{Spec: apps.StatefulSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"baz": "blah"}}}}},
// We use StatefulSet, instead of ReplicationController. The result should be exactly as above.
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 5}},
test: "Another stateful set with partial pod label matches",
name: "Another stateful set with partial pod label matches",
},
}
for i, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeNodeList(test.nodes))
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeNodeList(test.nodes))
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v index : %d\n", err, i)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v \n", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}
@ -407,13 +409,12 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []string
rcs []*v1.ReplicationController
rss []*extensions.ReplicaSet
services []*v1.Service
sss []*apps.StatefulSet
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: new(v1.Pod),
@ -425,7 +426,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "nothing scheduled",
name: "nothing scheduled",
},
{
pod: buildPod("", labels1, nil),
@ -438,7 +439,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "no services",
name: "no services",
},
{
pod: buildPod("", labels1, nil),
@ -452,7 +453,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "different services",
name: "different services",
},
{
pod: buildPod("", labels1, nil),
@ -469,7 +470,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "two pods, 0 matching",
name: "two pods, 0 matching",
},
{
pod: buildPod("", labels1, nil),
@ -486,7 +487,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: schedulerapi.MaxPriority},
{Host: nodeMachine3Zone3, Score: schedulerapi.MaxPriority},
},
test: "two pods, 1 matching (in z2)",
name: "two pods, 1 matching (in z2)",
},
{
pod: buildPod("", labels1, nil),
@ -506,7 +507,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: 3}, // Pod on node
{Host: nodeMachine3Zone3, Score: 6}, // Pod in zone
},
test: "five pods, 3 matching (z2=2, z3=1)",
name: "five pods, 3 matching (z2=2, z3=1)",
},
{
pod: buildPod("", labels1, nil),
@ -525,7 +526,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "four pods, 3 matching (z1=1, z2=1, z3=1)",
name: "four pods, 3 matching (z1=1, z2=1, z3=1)",
},
{
pod: buildPod("", labels1, nil),
@ -544,7 +545,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "four pods, 3 matching (z1=1, z2=1, z3=1)",
name: "four pods, 3 matching (z1=1, z2=1, z3=1)",
},
{
pod: buildPod("", labels1, controllerRef("ReplicationController", "name", "abc123")),
@ -569,36 +570,38 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
{Host: nodeMachine2Zone3, Score: 3}, // Pod in zone
{Host: nodeMachine3Zone3, Score: 3}, // Pod in zone
},
test: "Replication controller spreading (z1=0, z2=1, z3=2)",
name: "Replication controller spreading (z1=0, z2=1, z3=2)",
},
}
for i, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(labeledNodes))
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(labeledNodes))
selectorSpread := SelectorSpread{
serviceLister: schedulertesting.FakeServiceLister(test.services),
controllerLister: schedulertesting.FakeControllerLister(test.rcs),
replicaSetLister: schedulertesting.FakeReplicaSetLister(test.rss),
statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
if err != nil {
t.Errorf("unexpected error: %v index : %d", err, i)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(test.rcs),
schedulertesting.FakeReplicaSetLister(test.rss),
schedulertesting.FakeStatefulSetLister(test.sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}
@ -640,7 +643,7 @@ func TestZoneSpreadPriority(t *testing.T) {
nodes map[string]map[string]string
services []*v1.Service
expectedList schedulerapi.HostPriorityList
test string
name string
}{
{
pod: new(v1.Pod),
@ -648,7 +651,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "nothing scheduled",
name: "nothing scheduled",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -657,7 +660,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "no services",
name: "no services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -667,7 +670,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "different services",
name: "different services",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -681,7 +684,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: schedulerapi.MaxPriority}, {Host: "machine12", Score: schedulerapi.MaxPriority},
{Host: "machine21", Score: 0}, {Host: "machine22", Score: 0},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three pods, one service pod",
name: "three pods, one service pod",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -695,7 +698,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 5}, {Host: "machine12", Score: 5},
{Host: "machine21", Score: 5}, {Host: "machine22", Score: 5},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three pods, two service pods on different machines",
name: "three pods, two service pods on different machines",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1, Namespace: metav1.NamespaceDefault}},
@ -710,7 +713,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 0}, {Host: "machine12", Score: 0},
{Host: "machine21", Score: schedulerapi.MaxPriority}, {Host: "machine22", Score: schedulerapi.MaxPriority},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "three service label match pods in different namespaces",
name: "three service label match pods in different namespaces",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -725,7 +728,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 6}, {Host: "machine12", Score: 6},
{Host: "machine21", Score: 3}, {Host: "machine22", Score: 3},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "four pods, three service pods",
name: "four pods, three service pods",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -739,7 +742,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 3}, {Host: "machine12", Score: 3},
{Host: "machine21", Score: 6}, {Host: "machine22", Score: 6},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "service with partial pod label matches",
name: "service with partial pod label matches",
},
{
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
@ -754,7 +757,7 @@ func TestZoneSpreadPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine11", Score: 7}, {Host: "machine12", Score: 7},
{Host: "machine21", Score: 5}, {Host: "machine22", Score: 5},
{Host: "machine01", Score: 0}, {Host: "machine02", Score: 0}},
test: "service pod on non-zoned node",
name: "service pod on non-zoned node",
},
}
// these local variables just make sure controllerLister\replicaSetLister\statefulSetLister not nil
@ -763,28 +766,30 @@ func TestZoneSpreadPriority(t *testing.T) {
rcs := []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}}
rss := []*extensions.ReplicaSet{{Spec: extensions.ReplicaSetSpec{Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}}}}
for i, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(test.nodes))
zoneSpread := ServiceAntiAffinity{podLister: schedulertesting.FakePodLister(test.pods), serviceLister: schedulertesting.FakeServiceLister(test.services), label: "zone"}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(test.nodes))
zoneSpread := ServiceAntiAffinity{podLister: schedulertesting.FakePodLister(test.pods), serviceLister: schedulertesting.FakeServiceLister(test.services), label: "zone"}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(rcs),
schedulertesting.FakeReplicaSetLister(rss),
schedulertesting.FakeStatefulSetLister(sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(zoneSpread.CalculateAntiAffinityPriorityMap, zoneSpread.CalculateAntiAffinityPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v index : %d", err, i)
}
mataDataProducer := NewPriorityMetadataFactory(
schedulertesting.FakeServiceLister(test.services),
schedulertesting.FakeControllerLister(rcs),
schedulertesting.FakeReplicaSetLister(rss),
schedulertesting.FakeStatefulSetLister(sss))
mataData := mataDataProducer(test.pod, nodeNameToInfo)
ttp := priorityFunction(zoneSpread.CalculateAntiAffinityPriorityMap, zoneSpread.CalculateAntiAffinityPriorityReduce, mataData)
list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(test.nodes))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("test index %d (%s): expected %#v, got %#v", i, test.test, test.expectedList, list)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected %#v, got %#v", test.expectedList, list)
}
})
}
}

View File

@ -22,7 +22,7 @@ import (
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// CountIntolerableTaintsPreferNoSchedule gives the count of intolerable taints of a pod with effect PreferNoSchedule

View File

@ -23,7 +23,7 @@ import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func nodeWithTaints(nodeName string, taints []v1.Taint) *v1.Node {
@ -54,11 +54,11 @@ func TestTaintAndToleration(t *testing.T) {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
name string
}{
// basic test case
{
test: "node with taints tolerated by the pod, gets a higher score than those node with intolerable taints",
name: "node with taints tolerated by the pod, gets a higher score than those node with intolerable taints",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
@ -84,7 +84,7 @@ func TestTaintAndToleration(t *testing.T) {
},
// the count of taints that are tolerated by pod, does not matter.
{
test: "the nodes that all of their taints are tolerated by the pod, get the same score, no matter how many tolerable taints a node has",
name: "the nodes that all of their taints are tolerated by the pod, get the same score, no matter how many tolerable taints a node has",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
@ -127,7 +127,7 @@ func TestTaintAndToleration(t *testing.T) {
},
// the count of taints on a node that are not tolerated by pod, matters.
{
test: "the more intolerable taints a node has, the lower score it gets.",
name: "the more intolerable taints a node has, the lower score it gets.",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
@ -163,7 +163,7 @@ func TestTaintAndToleration(t *testing.T) {
},
// taints-tolerations priority only takes care about the taints and tolerations that have effect PreferNoSchedule
{
test: "only taints and tolerations that have effect PreferNoSchedule are checked by taints-tolerations priority function",
name: "only taints and tolerations that have effect PreferNoSchedule are checked by taints-tolerations priority function",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
@ -205,7 +205,7 @@ func TestTaintAndToleration(t *testing.T) {
},
},
{
test: "Default behaviour No taints and tolerations, lands on node with no taints",
name: "Default behaviour No taints and tolerations, lands on node with no taints",
//pod without tolerations
pod: podWithTolerations([]v1.Toleration{}),
nodes: []*v1.Node{
@ -226,16 +226,17 @@ func TestTaintAndToleration(t *testing.T) {
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce, nil)
list, err := ttp(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("%s, unexpected error: %v", test.test, err)
}
t.Run(test.name, func(t *testing.T) {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce, nil)
list, err := ttp(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s,\nexpected:\n\t%+v,\ngot:\n\t%+v", test.test, test.expectedList, list)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("expected:\n\t%+v,\ngot:\n\t%+v", test.expectedList, list)
}
})
}
}

View File

@ -22,7 +22,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
func makeNode(node string, milliCPU, memory int64) *v1.Node {

View File

@ -32,9 +32,8 @@ const DefaultMilliCPURequest int64 = 100 // 0.1 core
// DefaultMemoryRequest defines default memory request size.
const DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
// GetNonzeroRequests returns the default resource request if none is found or what is provided on the request
// TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity v1.ResourceList"
// as an additional argument here) rather than using constants
// GetNonzeroRequests returns the default resource request if none is found or
// what is provided on the request.
func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
var outMilliCPU, outMemory int64
// Override if un-set, but not if explicitly set to zero

View File

@ -19,7 +19,7 @@ package algorithm
import (
"k8s.io/api/core/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// SchedulerExtender is an interface for external processes to influence scheduling
@ -29,7 +29,9 @@ type SchedulerExtender interface {
// Filter based on extender-implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
Filter(pod *v1.Pod, nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
Filter(pod *v1.Pod,
nodes []*v1.Node, nodeNameToInfo map[string]*schedulercache.NodeInfo,
) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
// Prioritize based on extender-implemented priority functions. The returned scores & weight
// are used to compute the weighted score for an extender. The weighted scores are added to
@ -45,6 +47,27 @@ type SchedulerExtender interface {
// IsInterested returns true if at least one extended resource requested by
// this pod is managed by this extender.
IsInterested(pod *v1.Pod) bool
// ProcessPreemption returns nodes with their victim pods processed by extender based on
// given:
// 1. Pod to schedule
// 2. Candidate nodes and victim pods (nodeToVictims) generated by previous scheduling process.
// 3. nodeNameToInfo to restore v1.Node from node name if extender cache is enabled.
// The possible changes made by extender may include:
// 1. Subset of given candidate nodes after preemption phase of extender.
// 2. A different set of victim pod for every given candidate node after preemption phase of extender.
ProcessPreemption(
pod *v1.Pod,
nodeToVictims map[*v1.Node]*schedulerapi.Victims,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
) (map[*v1.Node]*schedulerapi.Victims, error)
// SupportsPreemption returns if the scheduler extender support preemption or not.
SupportsPreemption() bool
// IsIgnorable returns true indicates scheduling should not fail when this extender
// is unavailable. This gives scheduler ability to fail fast and tolerate non-critical extenders as well.
IsIgnorable() bool
}
// ScheduleAlgorithm is an interface implemented by things that know how to schedule pods

View File

@ -22,9 +22,15 @@ import (
extensions "k8s.io/api/extensions/v1beta1"
"k8s.io/apimachinery/pkg/labels"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// NodeFieldSelectorKeys is a map that: the key are node field selector keys; the values are
// the functions to get the value of the node field.
var NodeFieldSelectorKeys = map[string]func(*v1.Node) string{
NodeFieldSelectorKeyNodeName: func(n *v1.Node) string { return n.Name },
}
// FitPredicate is a function that indicates if a pod fits into an existing node.
// The failure information is given by the error.
type FitPredicate func(pod *v1.Pod, meta PredicateMetadata, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)
@ -78,9 +84,6 @@ type PredicateFailureReason interface {
GetReason() string
}
// GetEquivalencePodFunc is a function that gets a EquivalencePod from a pod.
type GetEquivalencePodFunc func(pod *v1.Pod) interface{}
// NodeLister interface represents anything that can list nodes for a scheduler.
type NodeLister interface {
// We explicitly return []*v1.Node, instead of v1.NodeList, to avoid

View File

@ -21,7 +21,7 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/kubernetes/pkg/scheduler/schedulercache"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
// EmptyPriorityMetadataProducer should returns a no-op PriorityMetadataProducer type.

View File

@ -16,6 +16,10 @@ limitations under the License.
package algorithm
import (
api "k8s.io/kubernetes/pkg/apis/core"
)
const (
// TaintNodeNotReady will be added when node is not ready
// and feature-gate for TaintBasedEvictions flag is enabled,
@ -36,6 +40,11 @@ const (
// It is deprecated since 1.9
DeprecatedTaintNodeUnreachable = "node.alpha.kubernetes.io/unreachable"
// TaintNodeUnschedulable will be added when node becomes unschedulable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node becomes scheduable.
TaintNodeUnschedulable = "node.kubernetes.io/unschedulable"
// TaintNodeOutOfDisk will be added when node becomes out of disk
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
@ -66,4 +75,11 @@ const (
// from the cloud-controller-manager intitializes this node, and then removes
// the taint
TaintExternalCloudProvider = "node.cloudprovider.kubernetes.io/uninitialized"
// TaintNodeShutdown when node is shutdown in external cloud provider
TaintNodeShutdown = "node.cloudprovider.kubernetes.io/shutdown"
// NodeFieldSelectorKeyNodeName ('metadata.name') uses this as node field selector key
// when selecting node by node's name.
NodeFieldSelectorKeyNodeName = api.ObjectNameField
)